diff --git a/src/claudedo/__main__.py b/src/claudedo/__main__.py new file mode 100644 index 0000000..730c04c --- /dev/null +++ b/src/claudedo/__main__.py @@ -0,0 +1,186 @@ +"""claudedo CLI: start | stop | status | test-audio | install.""" + +from __future__ import annotations + +import argparse +import logging +import subprocess +import sys +import wave +from pathlib import Path + +from . import __version__, daemon, target +from .config import Config, ConfigError, load_config + + +def _setup_logging(verbose: bool) -> None: + logging.basicConfig( + level=logging.DEBUG if verbose else logging.INFO, + format="%(asctime)s %(levelname)s %(name)s: %(message)s", + datefmt="%H:%M:%S", + ) + + +def _load_or_die(path: str | None) -> Config: + try: + return load_config(path) + except ConfigError as exc: + print(f"config error: {exc}", file=sys.stderr) + raise SystemExit(2) + + +def cmd_start(args: argparse.Namespace) -> int: + config = _load_or_die(args.config) + if args.mode: + config.mode = args.mode + try: + daemon.run_daemon(config) + except RuntimeError as exc: + print(str(exc), file=sys.stderr) + return 1 + return 0 + + +def cmd_stop(_args: argparse.Namespace) -> int: + if daemon.stop_running(): + print("sent stop signal to claudedo") + return 0 + print("claudedo is not running") + return 1 + + +def cmd_status(_args: argparse.Namespace) -> int: + pid = daemon.read_pid() + if pid is None: + print("claudedo: not running") + return 1 + state = daemon.read_state() or {} + print(f"claudedo: running (pid {pid})") + print(f" mode: {state.get('mode', '?')}") + print(f" target: {state.get('target') or '(none — run cc to attach)'}") + return 0 + + +def _check_audio_tools() -> None: + for tool in ("pactl", "arecord"): + path = subprocess.run(["which", tool], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + mark = "ok" if path.returncode == 0 else "MISSING (run install.sh)" + print(f" {tool}: {mark}") + + +def cmd_test_audio(args: argparse.Namespace) -> int: + config = _load_or_die(args.config) + print("== claudedo test-audio ==") + print("WSLg PulseServer:", "present" if Path("/mnt/wslg/PulseServer").exists() else "MISSING") + _check_audio_tools() + + try: + pactl = subprocess.run(["pactl", "info"], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) + if pactl.returncode == 0: + for line in pactl.stdout.decode("utf-8", "replace").splitlines(): + if line.startswith("Default Source"): + print(" ", line.strip()) + except FileNotFoundError: + pass + + try: + from . import audio as audio_mod + print("\nsounddevice input devices:") + for idx, dev in enumerate(audio_mod.list_devices()): + if dev.get("max_input_channels", 0) > 0: + print(f" [{idx}] {dev['name']} ({dev['max_input_channels']}ch)") + device = audio_mod.resolve_device(config.stt_device) + print(f"\ncapturing {2}s from device={device if device is not None else 'default'} ...") + chunk = audio_mod.record_while( + config.samplerate, config.channels, device, + held=_timed_hold(2.0), max_utterance=3.0, min_utterance=0.0, + ) + except Exception as exc: # noqa: BLE001 — surface any capture failure to the user + print(f"\naudio capture FAILED: {exc}", file=sys.stderr) + print("fix-chain: install.sh apt deps + ~/.asoundrc pulse shim + Windows mic permission", + file=sys.stderr) + return 1 + + if chunk is None or chunk.size == 0: + print("captured no audio — check mic permission + RDPSource", file=sys.stderr) + return 1 + + out = Path("/tmp/claudedo_test.wav") + _write_wav(out, chunk, config.samplerate) + peak = float(abs(chunk).max()) + print(f"captured {chunk.size / config.samplerate:.1f}s, peak amplitude {peak:.3f} -> {out}") + if peak < 0.005: + print("WARNING: near-silent capture — is the mic muted / permission denied?") + return 0 + + +def _timed_hold(seconds: float): + import time + + end = [None] + + def held() -> bool: + now = time.monotonic() + if end[0] is None: + end[0] = now + seconds + return now < end[0] + + return held + + +def _write_wav(path: Path, chunk, samplerate: int) -> None: + import numpy as np + + pcm = (np.clip(chunk, -1.0, 1.0) * 32767).astype(" int: + script = Path(__file__).resolve().parents[2] / "install.sh" + if not script.is_file(): + print(f"install.sh not found at {script}", file=sys.stderr) + return 1 + return subprocess.call(["bash", str(script)]) + + +def cmd_switch(args: argparse.Namespace) -> int: + session = target.set_target(args.name) + print(f"target -> {session}") + return 0 + + +def build_parser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser(prog="claudedo", description="voice control for claude code") + p.add_argument("--version", action="version", version=f"claudedo {__version__}") + p.add_argument("-v", "--verbose", action="store_true", help="debug logging") + p.add_argument("-c", "--config", help="path to config.toml") + sub = p.add_subparsers(dest="command", required=True) + + sp = sub.add_parser("start", help="run the daemon (foreground)") + sp.add_argument("--mode", choices=("listen", "ptt"), help="override input mode") + sp.set_defaults(func=cmd_start) + + sub.add_parser("stop", help="stop a running daemon").set_defaults(func=cmd_stop) + sub.add_parser("status", help="show daemon status").set_defaults(func=cmd_status) + sub.add_parser("test-audio", help="verify the mic capture path").set_defaults(func=cmd_test_audio) + sub.add_parser("install", help="re-run the bootstrap (install.sh)").set_defaults(func=cmd_install) + + sw = sub.add_parser("switch", help="set the active target session") + sw.add_argument("name", help="project short-name (claude- prefix optional)") + sw.set_defaults(func=cmd_switch) + return p + + +def main(argv: list[str] | None = None) -> int: + parser = build_parser() + args = parser.parse_args(argv) + _setup_logging(getattr(args, "verbose", False)) + return args.func(args) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/claudedo/daemon.py b/src/claudedo/daemon.py new file mode 100644 index 0000000..c1fa749 --- /dev/null +++ b/src/claudedo/daemon.py @@ -0,0 +1,219 @@ +"""the capture -> stt -> match -> inject loop. + +privacy invariant: in listen mode, any utterance that does not start with a wake +phrase is discarded the instant grammar.parse() returns None — the transcript text +is dropped and never stored or transmitted. nothing about non-command speech is +persisted. +""" + +from __future__ import annotations + +import json +import logging +import os +import signal +import sys +import time +from pathlib import Path + +from . import audio, grammar, inject, target +from .config import Config +from .stt import Transcriber + +log = logging.getLogger(__name__) + +STATE_DIR = Path(os.environ.get("XDG_CACHE_HOME", str(Path.home() / ".cache"))) / "claudedo" +PIDFILE = STATE_DIR / "claudedo.pid" +STATEFILE = STATE_DIR / "state.json" + + +def _ensure_state_dir() -> None: + STATE_DIR.mkdir(parents=True, exist_ok=True) + + +def write_state(pid: int, mode: str, target_session: str | None) -> None: + """write the running daemon's status for `claudedo status` to read.""" + _ensure_state_dir() + STATEFILE.write_text(json.dumps({ + "pid": pid, + "mode": mode, + "target": target_session, + "since": time.time(), + }), encoding="utf-8") + + +def read_state() -> dict | None: + """read the daemon status file, or None if absent/unreadable.""" + try: + return json.loads(STATEFILE.read_text(encoding="utf-8")) + except (FileNotFoundError, json.JSONDecodeError, OSError): + return None + + +def read_pid() -> int | None: + """return the pid of a running daemon, or None (also clears stale pidfiles).""" + try: + pid = int(PIDFILE.read_text(encoding="utf-8").strip()) + except (FileNotFoundError, ValueError, OSError): + return None + try: + os.kill(pid, 0) + except ProcessLookupError: + PIDFILE.unlink(missing_ok=True) + return None + except PermissionError: + return pid + return pid + + +def stop_running() -> bool: + """signal a running daemon to stop. returns whether one was found.""" + pid = read_pid() + if pid is None: + return False + os.kill(pid, signal.SIGTERM) + return True + + +class _PTTKey: + """desk-only push-to-talk: 'held' while the configured key is down in the + daemon's own terminal. there is deliberately NO global hotkey — a system-wide + keyboard hook is the keylogger/cheat silhouette claudedo refuses to install. for + hands-free-while-gaming use listen mode (voice trigger over the mic bridge). + + implementation reads stdin in raw mode: press the key to start capture, press it + again (or Enter) to stop. (terminals don't deliver key-up events, so true + hold-to-talk isn't possible from a tty — this is press-toggle, documented.) + """ + + def __init__(self) -> None: + self._tty = sys.stdin.isatty() + + def wait_press(self, stop) -> bool: + import select + + if not self._tty: + log.warning("ptt mode needs a tty; falling back to a 3s timed capture") + time.sleep(0.2) + return not stop() + while not stop(): + r, _, _ = select.select([sys.stdin], [], [], 0.2) + if r: + sys.stdin.read(1) + return True + return False + + +class Daemon: + """owns the capture/transcribe/inject loop and runtime mode switching.""" + + def __init__(self, config: Config) -> None: + self.config = config + self.mode = config.mode + self._stop = False + self._transcriber: Transcriber | None = None + self._device: int | None = None + self._ptt = _PTTKey() + + def _install_signals(self) -> None: + signal.signal(signal.SIGTERM, self._on_signal) + signal.signal(signal.SIGINT, self._on_signal) + + def _on_signal(self, _signum, _frame) -> None: + log.info("stop requested") + self._stop = True + + def stopped(self) -> bool: + return self._stop + + def _load(self) -> None: + cfg = self.config + self._device = audio.resolve_device(cfg.stt_device) + self._transcriber = Transcriber( + model=cfg.stt_model, language=cfg.stt_language, + device=cfg.stt_compute if cfg.stt_compute in ("cpu", "cuda") else "auto", + compute_type="auto", + ) + + def _capture(self): + cfg = self.config + if self.mode == "ptt": + print("[ptt] press the capture key in this terminal, speak, then press again to stop") + if not self._ptt.wait_press(self.stopped): + return None + return audio.record_while( + cfg.samplerate, cfg.channels, self._device, + held=lambda: not self._ptt.wait_press(self.stopped), + max_utterance=cfg.max_utterance, min_utterance=cfg.min_utterance, + ) + return audio.record_until_silence( + cfg.samplerate, cfg.channels, self._device, + silence_threshold=cfg.silence_threshold, silence_duration=cfg.silence_duration, + min_utterance=cfg.min_utterance, max_utterance=cfg.max_utterance, + stop=self.stopped, + ) + + def _handle(self, transcript: str) -> None: + cfg = self.config + require_wake = self.mode == "listen" + action = grammar.parse(transcript, cfg.wake_phrases, cfg.match_threshold, require_wake) + if action is None: + log.debug("discarded (no wake/command)") + return + + if action.name == "mode": + new_mode = str(action.arg) + if new_mode != self.mode: + self.mode = new_mode + log.info("mode -> %s", new_mode) + self._refresh_state() + return + if action.name == "switch": + session = target.set_target(str(action.arg)) + log.info("switched target -> %s", session) + self._refresh_state() + return + + session = target.resolve_target() + if session is None: + return + if action.name == "type" and not cfg.type_autosend: + inject.send_literal(session, str(action.arg)) + return + inject.perform(session, action) + + def _refresh_state(self) -> None: + write_state(os.getpid(), self.mode, target.read_active()) + + def run(self) -> None: + """run the daemon loop until a stop signal arrives.""" + _ensure_state_dir() + PIDFILE.write_text(str(os.getpid()), encoding="utf-8") + self._install_signals() + try: + self._load() + self._refresh_state() + log.info("claudedo running (mode=%s); say a wake phrase + command", self.mode) + print(f"claudedo listening in {self.mode!r} mode — Ctrl-C to stop") + while not self._stop: + audio_chunk = self._capture() + if self._stop: + break + if audio_chunk is None: + continue + transcript = self._transcriber.transcribe(audio_chunk, self.config.samplerate) + if not transcript: + continue + log.debug("heard: %s", transcript) + self._handle(transcript) + finally: + PIDFILE.unlink(missing_ok=True) + STATEFILE.unlink(missing_ok=True) + log.info("claudedo stopped") + + +def run_daemon(config: Config) -> None: + """entry point used by the CLI ``start`` command.""" + if read_pid() is not None: + raise RuntimeError("claudedo is already running (see `claudedo status`)") + Daemon(config).run()