daemon: capture->stt->match->inject loop and CLI

daemon.py runs the loop with pidfile/state, runtime mode switching, and the
privacy invariant: in listen mode any non-wake utterance is dropped the instant
grammar.parse() returns None. __main__.py exposes start|stop|status|test-audio|
install|switch.

Signed-off-by: disqualifier <dev@disqualifier.me>
This commit is contained in:
disqualifier 2026-06-25 17:55:25 -04:00
parent 947b30c22e
commit 7780a8d47c
2 changed files with 405 additions and 0 deletions

186
src/claudedo/__main__.py Normal file
View File

@ -0,0 +1,186 @@
"""claudedo CLI: start | stop | status | test-audio | install."""
from __future__ import annotations
import argparse
import logging
import subprocess
import sys
import wave
from pathlib import Path
from . import __version__, daemon, target
from .config import Config, ConfigError, load_config
def _setup_logging(verbose: bool) -> None:
logging.basicConfig(
level=logging.DEBUG if verbose else logging.INFO,
format="%(asctime)s %(levelname)s %(name)s: %(message)s",
datefmt="%H:%M:%S",
)
def _load_or_die(path: str | None) -> Config:
try:
return load_config(path)
except ConfigError as exc:
print(f"config error: {exc}", file=sys.stderr)
raise SystemExit(2)
def cmd_start(args: argparse.Namespace) -> int:
config = _load_or_die(args.config)
if args.mode:
config.mode = args.mode
try:
daemon.run_daemon(config)
except RuntimeError as exc:
print(str(exc), file=sys.stderr)
return 1
return 0
def cmd_stop(_args: argparse.Namespace) -> int:
if daemon.stop_running():
print("sent stop signal to claudedo")
return 0
print("claudedo is not running")
return 1
def cmd_status(_args: argparse.Namespace) -> int:
pid = daemon.read_pid()
if pid is None:
print("claudedo: not running")
return 1
state = daemon.read_state() or {}
print(f"claudedo: running (pid {pid})")
print(f" mode: {state.get('mode', '?')}")
print(f" target: {state.get('target') or '(none — run cc to attach)'}")
return 0
def _check_audio_tools() -> None:
for tool in ("pactl", "arecord"):
path = subprocess.run(["which", tool], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
mark = "ok" if path.returncode == 0 else "MISSING (run install.sh)"
print(f" {tool}: {mark}")
def cmd_test_audio(args: argparse.Namespace) -> int:
config = _load_or_die(args.config)
print("== claudedo test-audio ==")
print("WSLg PulseServer:", "present" if Path("/mnt/wslg/PulseServer").exists() else "MISSING")
_check_audio_tools()
try:
pactl = subprocess.run(["pactl", "info"], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
if pactl.returncode == 0:
for line in pactl.stdout.decode("utf-8", "replace").splitlines():
if line.startswith("Default Source"):
print(" ", line.strip())
except FileNotFoundError:
pass
try:
from . import audio as audio_mod
print("\nsounddevice input devices:")
for idx, dev in enumerate(audio_mod.list_devices()):
if dev.get("max_input_channels", 0) > 0:
print(f" [{idx}] {dev['name']} ({dev['max_input_channels']}ch)")
device = audio_mod.resolve_device(config.stt_device)
print(f"\ncapturing {2}s from device={device if device is not None else 'default'} ...")
chunk = audio_mod.record_while(
config.samplerate, config.channels, device,
held=_timed_hold(2.0), max_utterance=3.0, min_utterance=0.0,
)
except Exception as exc: # noqa: BLE001 — surface any capture failure to the user
print(f"\naudio capture FAILED: {exc}", file=sys.stderr)
print("fix-chain: install.sh apt deps + ~/.asoundrc pulse shim + Windows mic permission",
file=sys.stderr)
return 1
if chunk is None or chunk.size == 0:
print("captured no audio — check mic permission + RDPSource", file=sys.stderr)
return 1
out = Path("/tmp/claudedo_test.wav")
_write_wav(out, chunk, config.samplerate)
peak = float(abs(chunk).max())
print(f"captured {chunk.size / config.samplerate:.1f}s, peak amplitude {peak:.3f} -> {out}")
if peak < 0.005:
print("WARNING: near-silent capture — is the mic muted / permission denied?")
return 0
def _timed_hold(seconds: float):
import time
end = [None]
def held() -> bool:
now = time.monotonic()
if end[0] is None:
end[0] = now + seconds
return now < end[0]
return held
def _write_wav(path: Path, chunk, samplerate: int) -> None:
import numpy as np
pcm = (np.clip(chunk, -1.0, 1.0) * 32767).astype("<i2")
with wave.open(str(path), "wb") as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(samplerate)
wf.writeframes(pcm.tobytes())
def cmd_install(_args: argparse.Namespace) -> int:
script = Path(__file__).resolve().parents[2] / "install.sh"
if not script.is_file():
print(f"install.sh not found at {script}", file=sys.stderr)
return 1
return subprocess.call(["bash", str(script)])
def cmd_switch(args: argparse.Namespace) -> int:
session = target.set_target(args.name)
print(f"target -> {session}")
return 0
def build_parser() -> argparse.ArgumentParser:
p = argparse.ArgumentParser(prog="claudedo", description="voice control for claude code")
p.add_argument("--version", action="version", version=f"claudedo {__version__}")
p.add_argument("-v", "--verbose", action="store_true", help="debug logging")
p.add_argument("-c", "--config", help="path to config.toml")
sub = p.add_subparsers(dest="command", required=True)
sp = sub.add_parser("start", help="run the daemon (foreground)")
sp.add_argument("--mode", choices=("listen", "ptt"), help="override input mode")
sp.set_defaults(func=cmd_start)
sub.add_parser("stop", help="stop a running daemon").set_defaults(func=cmd_stop)
sub.add_parser("status", help="show daemon status").set_defaults(func=cmd_status)
sub.add_parser("test-audio", help="verify the mic capture path").set_defaults(func=cmd_test_audio)
sub.add_parser("install", help="re-run the bootstrap (install.sh)").set_defaults(func=cmd_install)
sw = sub.add_parser("switch", help="set the active target session")
sw.add_argument("name", help="project short-name (claude- prefix optional)")
sw.set_defaults(func=cmd_switch)
return p
def main(argv: list[str] | None = None) -> int:
parser = build_parser()
args = parser.parse_args(argv)
_setup_logging(getattr(args, "verbose", False))
return args.func(args)
if __name__ == "__main__":
sys.exit(main())

219
src/claudedo/daemon.py Normal file
View File

@ -0,0 +1,219 @@
"""the capture -> stt -> match -> inject loop.
privacy invariant: in listen mode, any utterance that does not start with a wake
phrase is discarded the instant grammar.parse() returns None the transcript text
is dropped and never stored or transmitted. nothing about non-command speech is
persisted.
"""
from __future__ import annotations
import json
import logging
import os
import signal
import sys
import time
from pathlib import Path
from . import audio, grammar, inject, target
from .config import Config
from .stt import Transcriber
log = logging.getLogger(__name__)
STATE_DIR = Path(os.environ.get("XDG_CACHE_HOME", str(Path.home() / ".cache"))) / "claudedo"
PIDFILE = STATE_DIR / "claudedo.pid"
STATEFILE = STATE_DIR / "state.json"
def _ensure_state_dir() -> None:
STATE_DIR.mkdir(parents=True, exist_ok=True)
def write_state(pid: int, mode: str, target_session: str | None) -> None:
"""write the running daemon's status for `claudedo status` to read."""
_ensure_state_dir()
STATEFILE.write_text(json.dumps({
"pid": pid,
"mode": mode,
"target": target_session,
"since": time.time(),
}), encoding="utf-8")
def read_state() -> dict | None:
"""read the daemon status file, or None if absent/unreadable."""
try:
return json.loads(STATEFILE.read_text(encoding="utf-8"))
except (FileNotFoundError, json.JSONDecodeError, OSError):
return None
def read_pid() -> int | None:
"""return the pid of a running daemon, or None (also clears stale pidfiles)."""
try:
pid = int(PIDFILE.read_text(encoding="utf-8").strip())
except (FileNotFoundError, ValueError, OSError):
return None
try:
os.kill(pid, 0)
except ProcessLookupError:
PIDFILE.unlink(missing_ok=True)
return None
except PermissionError:
return pid
return pid
def stop_running() -> bool:
"""signal a running daemon to stop. returns whether one was found."""
pid = read_pid()
if pid is None:
return False
os.kill(pid, signal.SIGTERM)
return True
class _PTTKey:
"""desk-only push-to-talk: 'held' while the configured key is down in the
daemon's own terminal. there is deliberately NO global hotkey — a system-wide
keyboard hook is the keylogger/cheat silhouette claudedo refuses to install. for
hands-free-while-gaming use listen mode (voice trigger over the mic bridge).
implementation reads stdin in raw mode: press the key to start capture, press it
again (or Enter) to stop. (terminals don't deliver key-up events, so true
hold-to-talk isn't possible from a tty — this is press-toggle, documented.)
"""
def __init__(self) -> None:
self._tty = sys.stdin.isatty()
def wait_press(self, stop) -> bool:
import select
if not self._tty:
log.warning("ptt mode needs a tty; falling back to a 3s timed capture")
time.sleep(0.2)
return not stop()
while not stop():
r, _, _ = select.select([sys.stdin], [], [], 0.2)
if r:
sys.stdin.read(1)
return True
return False
class Daemon:
"""owns the capture/transcribe/inject loop and runtime mode switching."""
def __init__(self, config: Config) -> None:
self.config = config
self.mode = config.mode
self._stop = False
self._transcriber: Transcriber | None = None
self._device: int | None = None
self._ptt = _PTTKey()
def _install_signals(self) -> None:
signal.signal(signal.SIGTERM, self._on_signal)
signal.signal(signal.SIGINT, self._on_signal)
def _on_signal(self, _signum, _frame) -> None:
log.info("stop requested")
self._stop = True
def stopped(self) -> bool:
return self._stop
def _load(self) -> None:
cfg = self.config
self._device = audio.resolve_device(cfg.stt_device)
self._transcriber = Transcriber(
model=cfg.stt_model, language=cfg.stt_language,
device=cfg.stt_compute if cfg.stt_compute in ("cpu", "cuda") else "auto",
compute_type="auto",
)
def _capture(self):
cfg = self.config
if self.mode == "ptt":
print("[ptt] press the capture key in this terminal, speak, then press again to stop")
if not self._ptt.wait_press(self.stopped):
return None
return audio.record_while(
cfg.samplerate, cfg.channels, self._device,
held=lambda: not self._ptt.wait_press(self.stopped),
max_utterance=cfg.max_utterance, min_utterance=cfg.min_utterance,
)
return audio.record_until_silence(
cfg.samplerate, cfg.channels, self._device,
silence_threshold=cfg.silence_threshold, silence_duration=cfg.silence_duration,
min_utterance=cfg.min_utterance, max_utterance=cfg.max_utterance,
stop=self.stopped,
)
def _handle(self, transcript: str) -> None:
cfg = self.config
require_wake = self.mode == "listen"
action = grammar.parse(transcript, cfg.wake_phrases, cfg.match_threshold, require_wake)
if action is None:
log.debug("discarded (no wake/command)")
return
if action.name == "mode":
new_mode = str(action.arg)
if new_mode != self.mode:
self.mode = new_mode
log.info("mode -> %s", new_mode)
self._refresh_state()
return
if action.name == "switch":
session = target.set_target(str(action.arg))
log.info("switched target -> %s", session)
self._refresh_state()
return
session = target.resolve_target()
if session is None:
return
if action.name == "type" and not cfg.type_autosend:
inject.send_literal(session, str(action.arg))
return
inject.perform(session, action)
def _refresh_state(self) -> None:
write_state(os.getpid(), self.mode, target.read_active())
def run(self) -> None:
"""run the daemon loop until a stop signal arrives."""
_ensure_state_dir()
PIDFILE.write_text(str(os.getpid()), encoding="utf-8")
self._install_signals()
try:
self._load()
self._refresh_state()
log.info("claudedo running (mode=%s); say a wake phrase + command", self.mode)
print(f"claudedo listening in {self.mode!r} mode — Ctrl-C to stop")
while not self._stop:
audio_chunk = self._capture()
if self._stop:
break
if audio_chunk is None:
continue
transcript = self._transcriber.transcribe(audio_chunk, self.config.samplerate)
if not transcript:
continue
log.debug("heard: %s", transcript)
self._handle(transcript)
finally:
PIDFILE.unlink(missing_ok=True)
STATEFILE.unlink(missing_ok=True)
log.info("claudedo stopped")
def run_daemon(config: Config) -> None:
"""entry point used by the CLI ``start`` command."""
if read_pid() is not None:
raise RuntimeError("claudedo is already running (see `claudedo status`)")
Daemon(config).run()