From 17db65858e2e279e67b6a92281192e004f12706f Mon Sep 17 00:00:00 2001 From: disqualifier Date: Thu, 25 Jun 2026 19:30:36 -0400 Subject: [PATCH] =?UTF-8?q?feat:=20terminal-run=20only=20=E2=80=94=20drop?= =?UTF-8?q?=20systemd/autostart,=20start=20does=20mic-check=20+=20visible?= =?UTF-8?q?=20loop?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit terminal-run is the product, so remove all backgrounding: delete the claudedo.service unit and autostart.sh, strip the systemd step and the autostart source-line from install.sh (rc block now sources cc.sh only). claudedo start now runs a mic check first (warm-up + brief capture, aborts with guidance if silent; --skip-audio-check to bypass) then drops into a visible listen loop printing the recognition/action log: a startup banner, then heard -> matched -> target / injected per utterance, target/mode state changes, and (listen mode) non-wake speech dropped WITHOUT the transcript per the privacy invariant. Signed-off-by: disqualifier --- README.md | 37 ++++-------------- install.sh | 20 +--------- shell/autostart.sh | 18 --------- shell/claudedo.service | 14 ------- src/claudedo/__main__.py | 82 +++++++++++++++++++++++++++++----------- src/claudedo/daemon.py | 51 ++++++++++++++++++++++--- 6 files changed, 115 insertions(+), 107 deletions(-) delete mode 100644 shell/autostart.sh delete mode 100644 shell/claudedo.service diff --git a/README.md b/README.md index 1840d2f..80cc7b6 100644 --- a/README.md +++ b/README.md @@ -61,44 +61,23 @@ claudedo test-audio ## Usage -**Run it in a terminal you watch** — that's the product. The `claudedo start` -terminal is your recognition/action console (it logs what it heard, what it matched, -and what it injected); you attach to the `claude-` session in another pane to -watch the keystrokes land. Backgrounding (tmux/autostart/systemd, below) is an -optional extra, not the default — it hides the console you'd otherwise read. +**Run it in a terminal you watch — that's the product.** You launch `claudedo +start`, it does a quick mic check, then drops into a visible listen loop that prints +`heard → matched → sent` for every utterance. That terminal is your +recognition/action console; you attach to the `claude-` session in another pane +to watch the keystrokes land. There is no backgrounding/daemon mode — the whole point +is the console you read. ```bash -claudedo start # run the daemon (foreground; listen mode by default) +claudedo start # mic-check, then the visible listen loop (listen mode default) claudedo start --mode ptt # push-to-talk instead (desk-only — see Modes) +claudedo start --skip-audio-check # skip the pre-listen mic check claudedo status # running? mode? target session? claudedo stop # stop a running daemon claudedo switch # retarget to claude- claudedo test-audio # verify the mic capture path ``` -If you do want it backgrounded (optional — you lose the live console), run it in its -own tmux session: - -```bash -tmux new-session -d -s claudedo 'claudedo start' -``` - -### Autostart - -WSL has no real boot, so autostart is rc-based and **opt-in**. `install.sh` ships -`~/.config/claudedo/autostart.sh`, which starts the daemon in a `claudedo-daemon` -tmux session once per WSL session — but only when `CLAUDEDO_AUTOSTART=1` is set. -Enable it by uncommenting the `export CLAUDEDO_AUTOSTART=1` line in the cc-kit marker -block of your rc; disable it by re-commenting (or deleting the file). Watch its logs -with `tmux attach -t claudedo-daemon`. - -If your WSL runs systemd (`systemd=true` in `/etc/wsl.conf`), `install.sh` also -installs an optional user unit — enable it instead with: - -```bash -systemctl --user enable --now claudedo -``` - ### Modes - **listen (default)** — continuous capture; only acts on utterances that **start diff --git a/install.sh b/install.sh index 1abc70b..3af2b6f 100755 --- a/install.sh +++ b/install.sh @@ -92,8 +92,7 @@ say "installing the cc kit (~/.config/claudedo/cc.sh)" CONF_DIR="$HOME/.config/claudedo" mkdir -p "$CONF_DIR" install -m 0644 "$REPO_DIR/shell/cc.sh" "$CONF_DIR/cc.sh" -install -m 0644 "$REPO_DIR/shell/autostart.sh" "$CONF_DIR/autostart.sh" -echo " wrote $CONF_DIR/cc.sh and autostart.sh" +echo " wrote $CONF_DIR/cc.sh" # wire EVERY rc that exists (the user may have both zsh and bash). wired_any=0 @@ -109,9 +108,6 @@ for RC in "$HOME/.zshrc" "$HOME/.bashrc"; do cat >> "$RC" <<'CCKIT' # >>> claudedo cc kit >>> -# voice-daemon autostart is OPT-IN: uncomment the next line to enable it. -# export CLAUDEDO_AUTOSTART=1 -[ -f ~/.config/claudedo/autostart.sh ] && source ~/.config/claudedo/autostart.sh [ -f ~/.config/claudedo/cc.sh ] && source ~/.config/claudedo/cc.sh # <<< claudedo cc kit <<< CCKIT @@ -132,19 +128,7 @@ for RC in "$HOME/.zshrc" "$HOME/.bashrc"; do fi done -# 7. optional systemd user service (only if systemd-in-WSL is available) --------- -if [ -d /run/systemd/system ] && systemctl --user show-environment >/dev/null 2>&1; then - say "systemd user instance detected — installing optional claudedo.service (NOT enabled)" - mkdir -p "$HOME/.config/systemd/user" - install -m 0644 "$REPO_DIR/shell/claudedo.service" "$HOME/.config/systemd/user/claudedo.service" - systemctl --user daemon-reload 2>/dev/null || true - echo " enable it with: systemctl --user enable --now claudedo" - echo " (or use the rc-based autostart instead — CLAUDEDO_AUTOSTART=1)" -else - echo " (no systemd user instance — using rc-based autostart; that's normal on WSL)" -fi - -# 8. tmux settings for reliable send-keys (idempotent ~/.tmux.conf append) ------- +# 7. tmux settings for reliable send-keys (idempotent ~/.tmux.conf append) ------- say "configuring tmux for reliable send-keys (~/.tmux.conf)" TMUX_CONF="$HOME/.tmux.conf" TMUX_MARKER="# >>> claudedo tmux >>>" diff --git a/shell/autostart.sh b/shell/autostart.sh deleted file mode 100644 index c416fc6..0000000 --- a/shell/autostart.sh +++ /dev/null @@ -1,18 +0,0 @@ -# claudedo autostart (OPT-IN). starts the voice daemon once per WSL session in its -# own tmux session, if not already running. WSL has no real boot and usually no -# systemd, so this rc-based guard matches WSL's "starts when you open a terminal" -# model. POSIX; safe to source under bash and zsh. -# -# this only acts when CLAUDEDO_AUTOSTART=1 is set (the rc marker block gates on it), -# so sourcing it alone does nothing. to enable: export CLAUDEDO_AUTOSTART=1 before -# the cc-kit marker block in your rc. to disable: unset it (or remove this file). -# -# the daemon runs detached; watch its logs with: tmux attach -t claudedo-daemon - -if [ "${CLAUDEDO_AUTOSTART:-0}" = "1" ]; then - if command -v claudedo >/dev/null 2>&1; then - if ! tmux has-session -t claudedo-daemon 2>/dev/null; then - tmux new-session -d -s claudedo-daemon "claudedo start" - fi - fi -fi diff --git a/shell/claudedo.service b/shell/claudedo.service deleted file mode 100644 index f02affe..0000000 --- a/shell/claudedo.service +++ /dev/null @@ -1,14 +0,0 @@ -[Unit] -Description=claudedo voice-control daemon for claude code -Documentation=https://github.com/dsql/claudedo -After=default.target - -[Service] -Type=simple -ExecStart=%h/.local/bin/claudedo start -Restart=on-failure -RestartSec=3 -Environment=PULSE_SERVER=unix:/mnt/wslg/PulseServer - -[Install] -WantedBy=default.target diff --git a/src/claudedo/__main__.py b/src/claudedo/__main__.py index 5603304..d4f9beb 100644 --- a/src/claudedo/__main__.py +++ b/src/claudedo/__main__.py @@ -33,6 +33,15 @@ def cmd_start(args: argparse.Namespace) -> int: config = _load_or_die(args.config) if args.mode: config.mode = args.mode + if not args.skip_audio_check: + print("checking mic before listening (speak briefly) ...") + peak = _probe_mic(config, seconds=2.0, verbose=False) + if peak is None or peak < 0.02: + print("mic check failed — no usable input.", file=sys.stderr) + print("run `claudedo test-audio` to debug; or `claudedo start --skip-audio-check`", + file=sys.stderr) + return 1 + print(f"mic OK (peak {peak:.3f}).") try: daemon.run_daemon(config) except RuntimeError as exc: @@ -41,6 +50,45 @@ def cmd_start(args: argparse.Namespace) -> int: return 0 +def _probe_mic(config: Config, seconds: float, verbose: bool): + """warm up the mic then capture for `seconds`; return peak amplitude or None. + + None signals a hard capture failure (no PortAudio / device error) with guidance + already printed; a float (possibly ~0) is a successful capture whose level the + caller judges. shared by `start`'s precheck and `test-audio`. + """ + from . import audio as audio_mod + + try: + device = audio_mod.resolve_device(config.stt_device) + if verbose: + print("priming mic (RDPSource resumes from suspend) ...") + audio_mod.warm_up(config.samplerate, config.channels, device) + if verbose: + print(f"capturing {seconds:.0f}s from " + f"device={device if device is not None else 'default'} — speak now ...") + chunk = audio_mod.record_while( + config.samplerate, config.channels, device, + held=_timed_hold(seconds), max_utterance=seconds + 1.0, min_utterance=0.0, + ) + except Exception as exc: + print(f"audio capture FAILED: {exc}", file=sys.stderr) + print("fix-chain: install.sh apt deps + ~/.asoundrc pulse shim + Windows mic permission", + file=sys.stderr) + return None + + if chunk is None or chunk.size == 0: + print("captured no audio — check mic permission + RDPSource", file=sys.stderr) + return None + + peak = float(abs(chunk).max()) + if verbose: + out = Path("/tmp/claudedo_test.wav") + _write_wav(out, chunk, config.samplerate) + print(f"captured {chunk.size / config.samplerate:.1f}s, peak amplitude {peak:.3f} -> {out}") + return peak + + def cmd_stop(_args: argparse.Namespace) -> int: if daemon.stop_running(): print("sent stop signal to claudedo") @@ -83,36 +131,24 @@ def cmd_test_audio(args: argparse.Namespace) -> int: except FileNotFoundError: pass + from . import audio as audio_mod + print("\nsounddevice input devices:") try: - from . import audio as audio_mod - print("\nsounddevice input devices:") for idx, dev in enumerate(audio_mod.list_devices()): if dev.get("max_input_channels", 0) > 0: print(f" [{idx}] {dev['name']} ({dev['max_input_channels']}ch)") - device = audio_mod.resolve_device(config.stt_device) - print("\npriming mic (RDPSource resumes from suspend) ...") - audio_mod.warm_up(config.samplerate, config.channels, device) - print(f"capturing 3s from device={device if device is not None else 'default'} — speak now ...") - chunk = audio_mod.record_while( - config.samplerate, config.channels, device, - held=_timed_hold(3.0), max_utterance=4.0, min_utterance=0.0, - ) except Exception as exc: - print(f"\naudio capture FAILED: {exc}", file=sys.stderr) - print("fix-chain: install.sh apt deps + ~/.asoundrc pulse shim + Windows mic permission", - file=sys.stderr) - return 1 + print(f" could not list devices: {exc}", file=sys.stderr) - if chunk is None or chunk.size == 0: - print("captured no audio — check mic permission + RDPSource", file=sys.stderr) + peak = _probe_mic(config, seconds=3.0, verbose=True) + if peak is None: return 1 - - out = Path("/tmp/claudedo_test.wav") - _write_wav(out, chunk, config.samplerate) - peak = float(abs(chunk).max()) - print(f"captured {chunk.size / config.samplerate:.1f}s, peak amplitude {peak:.3f} -> {out}") - if peak < 0.005: + if peak < 0.02: print("WARNING: near-silent capture — is the mic muted / permission denied?") + print("fix-chain: Windows mic permission for desktop apps + a non-Krisp default input;") + print(" if still silent, `wsl --shutdown` then reopen to re-attach RDPSource.") + return 1 + print("mic OK.") return 0 @@ -164,6 +200,8 @@ def build_parser() -> argparse.ArgumentParser: sp = sub.add_parser("start", help="run the daemon (foreground)") sp.add_argument("--mode", choices=("listen", "ptt"), help="override input mode") + sp.add_argument("--skip-audio-check", action="store_true", + help="skip the pre-listen mic check") sp.set_defaults(func=cmd_start) sub.add_parser("stop", help="stop a running daemon").set_defaults(func=cmd_stop) diff --git a/src/claudedo/daemon.py b/src/claudedo/daemon.py index 5b80b63..47190ce 100644 --- a/src/claudedo/daemon.py +++ b/src/claudedo/daemon.py @@ -162,29 +162,67 @@ class Daemon: require_wake = self.mode == "listen" action = grammar.parse(transcript, cfg.wake_phrases, cfg.match_threshold, require_wake) if action is None: - log.debug("discarded (no wake/command)") + self._emit(f'heard: "{transcript}" -> no command matched') return if action.name == "mode": new_mode = str(action.arg) if new_mode != self.mode: self.mode = new_mode - log.info("mode -> %s", new_mode) + self._emit(f"mode -> {new_mode}") self._refresh_state() return if action.name == "switch": session = target.set_target(str(action.arg)) - log.info("switched target -> %s", session) + self._emit(f"target -> {session}") self._refresh_state() return session = target.resolve_target() if session is None: + self._emit(f'heard: "{transcript}" -> matched: {self._describe(action)} ' + f'-> ERROR no target session (did nothing)') return + self._emit(f'heard: "{transcript}" -> matched: {self._describe(action)} -> target {session}') if action.name == "type" and not cfg.type_autosend: inject.send_literal(session, str(action.arg)) + self._emit(f"injected: literal {str(action.arg)!r} -> {session}") return inject.perform(session, action) + self._emit(f"injected: {self._describe(action)} -> {session}") + + @staticmethod + def _describe(action) -> str: + if action.arg is None: + return action.name.upper() + return f"{action.name.upper()}({action.arg})" + + @staticmethod + def _emit(line: str) -> None: + """print a recognition/action line to the watched terminal""" + print(line, flush=True) + + def _has_wake(self, transcript: str) -> bool: + """true if the utterance starts with a wake phrase (listen-mode gate). + + non-wake speech is dropped without ever printing the transcript — the privacy + invariant: non-command speech is discarded, never recorded. + """ + cfg = self.config + return grammar.strip_wake(transcript, cfg.wake_phrases, cfg.match_threshold, True) is not None + + def _print_startup(self) -> None: + cfg = self.config + dev = cfg.stt_device if cfg.stt_device != "auto" else "default" + target_now = target.read_active() or "(none — run cc to attach)" + self._emit("── claudedo ─────────────────────────────────") + self._emit(f" model: {cfg.stt_model} ({cfg.stt_language})") + self._emit(f" mic: {dev}") + self._emit(f" mode: {self.mode}") + self._emit(f" target: {target_now}") + self._emit(f" wake: {', '.join(cfg.wake_phrases)}") + self._emit(" Ctrl-C to stop") + self._emit("─────────────────────────────────────────────") def _refresh_state(self) -> None: write_state(os.getpid(), self.mode, target.read_active()) @@ -197,8 +235,7 @@ class Daemon: try: self._load() self._refresh_state() - log.info("claudedo running (mode=%s); say a wake phrase + command", self.mode) - print(f"claudedo listening in {self.mode!r} mode — Ctrl-C to stop") + self._print_startup() while not self._stop: audio_chunk = self._capture() if self._stop: @@ -208,7 +245,9 @@ class Daemon: transcript = self._transcriber.transcribe(audio_chunk, self.config.samplerate) if not transcript: continue - log.debug("heard: %s", transcript) + if self.mode == "listen" and not self._has_wake(transcript): + self._emit("dropped: non-wake speech (not recorded)") + continue self._handle(transcript) finally: PIDFILE.unlink(missing_ok=True)