daemon: capture->stt->match->inject loop and CLI

daemon.py runs the loop with pidfile/state, runtime mode switching, and the privacy invariant: in listen mode any non-wake utterance is dropped the instant grammar.parse() returns None. __main__.py exposes start|stop|status|test-audio| install|switch. Signed-off-by: disqualifier <dev@disqualifier.me>
2026-06-25 17:55:25 -04:00 · 2026-06-25 17:55:25 -04:00 · 7780a8d47c
commit 7780a8d47c
parent 947b30c22e
2 changed files with 405 additions and 0 deletions
--- a/src/claudedo/main.py
+++ b/src/claudedo/main.py
@ -0,0 +1,186 @@
 """claudedo CLI: start | stop | status | test-audio | install."""
 from __future__ import annotations
 import argparse
 import logging
 import subprocess
 import sys
 import wave
 from pathlib import Path
 from . import __version__, daemon, target
 from .config import Config, ConfigError, load_config
 def _setup_logging(verbose: bool) -> None:
    logging.basicConfig(
        level=logging.DEBUG if verbose else logging.INFO,
        format="%(asctime)s %(levelname)s %(name)s: %(message)s",
        datefmt="%H:%M:%S",
    )
 def _load_or_die(path: str | None) -> Config:
    try:
        return load_config(path)
    except ConfigError as exc:
        print(f"config error: {exc}", file=sys.stderr)
        raise SystemExit(2)
 def cmd_start(args: argparse.Namespace) -> int:
    config = _load_or_die(args.config)
    if args.mode:
        config.mode = args.mode
    try:
        daemon.run_daemon(config)
    except RuntimeError as exc:
        print(str(exc), file=sys.stderr)
        return 1
    return 0
 def cmd_stop(_args: argparse.Namespace) -> int:
    if daemon.stop_running():
        print("sent stop signal to claudedo")
        return 0
    print("claudedo is not running")
    return 1
 def cmd_status(_args: argparse.Namespace) -> int:
    pid = daemon.read_pid()
    if pid is None:
        print("claudedo: not running")
        return 1
    state = daemon.read_state() or {}
    print(f"claudedo: running (pid {pid})")
    print(f"  mode:   {state.get('mode', '?')}")
    print(f"  target: {state.get('target') or '(none — run cc to attach)'}")
    return 0
 def _check_audio_tools() -> None:
    for tool in ("pactl", "arecord"):
        path = subprocess.run(["which", tool], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        mark = "ok" if path.returncode == 0 else "MISSING (run install.sh)"
        print(f"  {tool}: {mark}")
 def cmd_test_audio(args: argparse.Namespace) -> int:
    config = _load_or_die(args.config)
    print("== claudedo test-audio ==")
    print("WSLg PulseServer:", "present" if Path("/mnt/wslg/PulseServer").exists() else "MISSING")
    _check_audio_tools()
    try:
        pactl = subprocess.run(["pactl", "info"], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
        if pactl.returncode == 0:
            for line in pactl.stdout.decode("utf-8", "replace").splitlines():
                if line.startswith("Default Source"):
                    print(" ", line.strip())
    except FileNotFoundError:
        pass
    try:
        from . import audio as audio_mod
        print("\nsounddevice input devices:")
        for idx, dev in enumerate(audio_mod.list_devices()):
            if dev.get("max_input_channels", 0) > 0:
                print(f"  [{idx}] {dev['name']} ({dev['max_input_channels']}ch)")
        device = audio_mod.resolve_device(config.stt_device)
        print(f"\ncapturing {2}s from device={device if device is not None else 'default'} ...")
        chunk = audio_mod.record_while(
            config.samplerate, config.channels, device,
            held=_timed_hold(2.0), max_utterance=3.0, min_utterance=0.0,
        )
    except Exception as exc:  # noqa: BLE001 — surface any capture failure to the user
        print(f"\naudio capture FAILED: {exc}", file=sys.stderr)
        print("fix-chain: install.sh apt deps + ~/.asoundrc pulse shim + Windows mic permission",
              file=sys.stderr)
        return 1
    if chunk is None or chunk.size == 0:
        print("captured no audio — check mic permission + RDPSource", file=sys.stderr)
        return 1
    out = Path("/tmp/claudedo_test.wav")
    _write_wav(out, chunk, config.samplerate)
    peak = float(abs(chunk).max())
    print(f"captured {chunk.size / config.samplerate:.1f}s, peak amplitude {peak:.3f} -> {out}")
    if peak < 0.005:
        print("WARNING: near-silent capture — is the mic muted / permission denied?")
    return 0
 def _timed_hold(seconds: float):
    import time
    end = [None]
    def held() -> bool:
        now = time.monotonic()
        if end[0] is None:
            end[0] = now + seconds
        return now < end[0]
    return held
 def _write_wav(path: Path, chunk, samplerate: int) -> None:
    import numpy as np
    pcm = (np.clip(chunk, -1.0, 1.0) * 32767).astype("<i2")
    with wave.open(str(path), "wb") as wf:
        wf.setnchannels(1)
        wf.setsampwidth(2)
        wf.setframerate(samplerate)
        wf.writeframes(pcm.tobytes())
 def cmd_install(_args: argparse.Namespace) -> int:
    script = Path(__file__).resolve().parents[2] / "install.sh"
    if not script.is_file():
        print(f"install.sh not found at {script}", file=sys.stderr)
        return 1
    return subprocess.call(["bash", str(script)])
 def cmd_switch(args: argparse.Namespace) -> int:
    session = target.set_target(args.name)
    print(f"target -> {session}")
    return 0
 def build_parser() -> argparse.ArgumentParser:
    p = argparse.ArgumentParser(prog="claudedo", description="voice control for claude code")
    p.add_argument("--version", action="version", version=f"claudedo {__version__}")
    p.add_argument("-v", "--verbose", action="store_true", help="debug logging")
    p.add_argument("-c", "--config", help="path to config.toml")
    sub = p.add_subparsers(dest="command", required=True)
    sp = sub.add_parser("start", help="run the daemon (foreground)")
    sp.add_argument("--mode", choices=("listen", "ptt"), help="override input mode")
    sp.set_defaults(func=cmd_start)
    sub.add_parser("stop", help="stop a running daemon").set_defaults(func=cmd_stop)
    sub.add_parser("status", help="show daemon status").set_defaults(func=cmd_status)
    sub.add_parser("test-audio", help="verify the mic capture path").set_defaults(func=cmd_test_audio)
    sub.add_parser("install", help="re-run the bootstrap (install.sh)").set_defaults(func=cmd_install)
    sw = sub.add_parser("switch", help="set the active target session")
    sw.add_argument("name", help="project short-name (claude- prefix optional)")
    sw.set_defaults(func=cmd_switch)
    return p
 def main(argv: list[str] | None = None) -> int:
    parser = build_parser()
    args = parser.parse_args(argv)
    _setup_logging(getattr(args, "verbose", False))
    return args.func(args)
 if __name__ == "__main__":
    sys.exit(main())
--- a/src/claudedo/daemon.py
+++ b/src/claudedo/daemon.py
@ -0,0 +1,219 @@
 """the capture -> stt -> match -> inject loop.
 privacy invariant: in listen mode, any utterance that does not start with a wake
 phrase is discarded the instant grammar.parse() returns None — the transcript text
 is dropped and never stored or transmitted. nothing about non-command speech is
 persisted.
 """
 from __future__ import annotations
 import json
 import logging
 import os
 import signal
 import sys
 import time
 from pathlib import Path
 from . import audio, grammar, inject, target
 from .config import Config
 from .stt import Transcriber
 log = logging.getLogger(__name__)
 STATE_DIR = Path(os.environ.get("XDG_CACHE_HOME", str(Path.home() / ".cache"))) / "claudedo"
 PIDFILE = STATE_DIR / "claudedo.pid"
 STATEFILE = STATE_DIR / "state.json"
 def _ensure_state_dir() -> None:
    STATE_DIR.mkdir(parents=True, exist_ok=True)
 def write_state(pid: int, mode: str, target_session: str | None) -> None:
    """write the running daemon's status for `claudedo status` to read."""
    _ensure_state_dir()
    STATEFILE.write_text(json.dumps({
        "pid": pid,
        "mode": mode,
        "target": target_session,
        "since": time.time(),
    }), encoding="utf-8")
 def read_state() -> dict | None:
    """read the daemon status file, or None if absent/unreadable."""
    try:
        return json.loads(STATEFILE.read_text(encoding="utf-8"))
    except (FileNotFoundError, json.JSONDecodeError, OSError):
        return None
 def read_pid() -> int | None:
    """return the pid of a running daemon, or None (also clears stale pidfiles)."""
    try:
        pid = int(PIDFILE.read_text(encoding="utf-8").strip())
    except (FileNotFoundError, ValueError, OSError):
        return None
    try:
        os.kill(pid, 0)
    except ProcessLookupError:
        PIDFILE.unlink(missing_ok=True)
        return None
    except PermissionError:
        return pid
    return pid
 def stop_running() -> bool:
    """signal a running daemon to stop. returns whether one was found."""
    pid = read_pid()
    if pid is None:
        return False
    os.kill(pid, signal.SIGTERM)
    return True
 class _PTTKey:
    """desk-only push-to-talk: 'held' while the configured key is down in the
    daemon's own terminal. there is deliberately NO global hotkey — a system-wide
    keyboard hook is the keylogger/cheat silhouette claudedo refuses to install. for
    hands-free-while-gaming use listen mode (voice trigger over the mic bridge).
    implementation reads stdin in raw mode: press the key to start capture, press it
    again (or Enter) to stop. (terminals don't deliver key-up events, so true
    hold-to-talk isn't possible from a tty — this is press-toggle, documented.)
    """
    def __init__(self) -> None:
        self._tty = sys.stdin.isatty()
    def wait_press(self, stop) -> bool:
        import select
        if not self._tty:
            log.warning("ptt mode needs a tty; falling back to a 3s timed capture")
            time.sleep(0.2)
            return not stop()
        while not stop():
            r, _, _ = select.select([sys.stdin], [], [], 0.2)
            if r:
                sys.stdin.read(1)
                return True
        return False
 class Daemon:
    """owns the capture/transcribe/inject loop and runtime mode switching."""
    def __init__(self, config: Config) -> None:
        self.config = config
        self.mode = config.mode
        self._stop = False
        self._transcriber: Transcriber | None = None
        self._device: int | None = None
        self._ptt = _PTTKey()
    def _install_signals(self) -> None:
        signal.signal(signal.SIGTERM, self._on_signal)
        signal.signal(signal.SIGINT, self._on_signal)
    def _on_signal(self, _signum, _frame) -> None:
        log.info("stop requested")
        self._stop = True
    def stopped(self) -> bool:
        return self._stop
    def _load(self) -> None:
        cfg = self.config
        self._device = audio.resolve_device(cfg.stt_device)
        self._transcriber = Transcriber(
            model=cfg.stt_model, language=cfg.stt_language,
            device=cfg.stt_compute if cfg.stt_compute in ("cpu", "cuda") else "auto",
            compute_type="auto",
        )
    def _capture(self):
        cfg = self.config
        if self.mode == "ptt":
            print("[ptt] press the capture key in this terminal, speak, then press again to stop")
            if not self._ptt.wait_press(self.stopped):
                return None
            return audio.record_while(
                cfg.samplerate, cfg.channels, self._device,
                held=lambda: not self._ptt.wait_press(self.stopped),
                max_utterance=cfg.max_utterance, min_utterance=cfg.min_utterance,
            )
        return audio.record_until_silence(
            cfg.samplerate, cfg.channels, self._device,
            silence_threshold=cfg.silence_threshold, silence_duration=cfg.silence_duration,
            min_utterance=cfg.min_utterance, max_utterance=cfg.max_utterance,
            stop=self.stopped,
        )
    def _handle(self, transcript: str) -> None:
        cfg = self.config
        require_wake = self.mode == "listen"
        action = grammar.parse(transcript, cfg.wake_phrases, cfg.match_threshold, require_wake)
        if action is None:
            log.debug("discarded (no wake/command)")
            return
        if action.name == "mode":
            new_mode = str(action.arg)
            if new_mode != self.mode:
                self.mode = new_mode
                log.info("mode -> %s", new_mode)
                self._refresh_state()
            return
        if action.name == "switch":
            session = target.set_target(str(action.arg))
            log.info("switched target -> %s", session)
            self._refresh_state()
            return
        session = target.resolve_target()
        if session is None:
            return
        if action.name == "type" and not cfg.type_autosend:
            inject.send_literal(session, str(action.arg))
            return
        inject.perform(session, action)
    def _refresh_state(self) -> None:
        write_state(os.getpid(), self.mode, target.read_active())
    def run(self) -> None:
        """run the daemon loop until a stop signal arrives."""
        _ensure_state_dir()
        PIDFILE.write_text(str(os.getpid()), encoding="utf-8")
        self._install_signals()
        try:
            self._load()
            self._refresh_state()
            log.info("claudedo running (mode=%s); say a wake phrase + command", self.mode)
            print(f"claudedo listening in {self.mode!r} mode — Ctrl-C to stop")
            while not self._stop:
                audio_chunk = self._capture()
                if self._stop:
                    break
                if audio_chunk is None:
                    continue
                transcript = self._transcriber.transcribe(audio_chunk, self.config.samplerate)
                if not transcript:
                    continue
                log.debug("heard: %s", transcript)
                self._handle(transcript)
        finally:
            PIDFILE.unlink(missing_ok=True)
            STATEFILE.unlink(missing_ok=True)
            log.info("claudedo stopped")
 def run_daemon(config: Config) -> None:
    """entry point used by the CLI ``start`` command."""
    if read_pid() is not None:
        raise RuntimeError("claudedo is already running (see `claudedo status`)")
    Daemon(config).run()