feat: backspace/space/erase editing commands + colored prefixed console

voice editing: 'space [<n>]' inserts spaces, 'backspace [<n>]' (alias delete)
deletes chars, 'erase' (alias clear/wipe) wipes the current input. the daemon
tracks a per-session uncommitted-input char count so backspace is capped at the
last submit boundary and erase clears exactly back to it; submit/set reset it.
keys.py gains BSpace/space; grammar gains a count parser (digits + number words).

new console.py renders every daemon line as 'HH:MM:SS [prefix] message' with
color: [<session>] for injected lines (green), [SYSTEM] for state, [VOICE] for
recognition/drops (red/dim). bump to 0.1.2.

Signed-off-by: disqualifier <dev@disqualifier.me>
This commit is contained in:
disqualifier 2026-06-26 01:17:22 -04:00
parent d734161c97
commit 3dbeea266b
8 changed files with 185 additions and 40 deletions

View File

@ -108,6 +108,9 @@ Wake phrases (listen mode), fuzzy-matched: **"claudedo"**, **"hey claude"**.
| `approve` / `deny` | allow / deny a permission prompt | | `approve` / `deny` | allow / deny a permission prompt |
| `send` / `enter` | submit (Enter) | | `send` / `enter` | submit (Enter) |
| `type <phrase>` | insert literal text, **no** submit (read-before-send; say "send") | | `type <phrase>` | insert literal text, **no** submit (read-before-send; say "send") |
| `space [<n>]` | insert n spaces (default 1) |
| `backspace [<n>]` (alias `delete`) | delete n chars (default 1), capped at the last submit boundary |
| `erase` (alias `clear`/`wipe`) | delete everything typed since the last submit/boundary |
| `mode ptt` / `mode listen` | switch input mode | | `mode ptt` / `mode listen` | switch input mode |
| `set <name>` (alias `sticky`/`switch`) | set the **sticky** target → `claude-<name>` (persists) | | `set <name>` (alias `sticky`/`switch`) | set the **sticky** target → `claude-<name>` (persists) |
| `target <name> <command>` | **one-shot** override: run that command on `claude-<name>` for this utterance only; sticky default unchanged | | `target <name> <command>` | **one-shot** override: run that command on `claude-<name>` for this utterance only; sticky default unchanged |
@ -171,11 +174,19 @@ If Claude Code changes its prompt UI, re-confirm against a live session and upda
## Config ## Config
Everything tunable lives in [`config.toml`](config.toml): wake phrases, mode + PTT Everything tunable lives in [`config.toml`](config.toml): wake phrases, mode + PTT
key, Whisper model/language/device, audio segmentation thresholds, and key, Whisper model/language/device, audio segmentation thresholds, and `[behavior]`
`type_autosend = false`. The default model is `small`; bump to `medium` if the coined (`type_autosend`, `filler_words`, `auto_target`, `print_heard`). The default model is
wake word is recognized poorly. `claudedo -c <path> ...` points at a specific config; `small`; bump to `medium` if the coined wake word is recognized poorly. `claudedo -c
otherwise it searches `$CLAUDEDO_CONFIG`, `~/.config/claudedo/config.toml`, then <path> ...` points at a specific config; otherwise it searches `$CLAUDEDO_CONFIG`,
`./config.toml`. `~/.config/claudedo/config.toml`, then `./config.toml`.
- **`auto_target`** (default `false`): with no sticky target set and exactly one
`claude-*` session running, `false` makes a bare command do nothing and ask you to
`set` one; `true` auto-targets that single session.
- **`print_heard`** (default `false`, debug): prints non-wake transcripts to the
console so you can see how Whisper renders your wake word. Turn it on to debug
detection, then off. Whisper has no token for "claudedo" — it commonly emits
"claude do" or "claude due", both of which are in the default wake list.
## Requirements ## Requirements

View File

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "claudedo" name = "claudedo"
version = "0.1.1" version = "0.1.2"
description = "voice-control daemon for claude code (local STT -> tmux send-keys)" description = "voice-control daemon for claude code (local STT -> tmux send-keys)"
readme = "README.md" readme = "README.md"
requires-python = ">=3.10" requires-python = ">=3.10"

View File

@ -1,3 +1,3 @@
"""claudedo — voice-control daemon for claude code (local STT -> tmux send-keys)""" """claudedo — voice-control daemon for claude code (local STT -> tmux send-keys)"""
__version__ = "0.1.1" __version__ = "0.1.2"

51
src/claudedo/console.py Normal file
View File

@ -0,0 +1,51 @@
"""colored, prefixed console output for the daemon's recognition/action feed.
every line is ``HH:MM:SS [PREFIX] message``. prefixes group the source: a session
name (e.g. ``[claude-libs]``) for anything injected into a tmux session, ``[SYSTEM]``
for daemon-control/state lines, and ``[VOICE]`` for STT/recognition lines. color is
opt-in via tty detection (or forced): green for successful injections, red for
drops/errors, dim for routine. falls back to plain text when stdout is not a tty.
"""
from __future__ import annotations
import sys
import time
RESET = "\033[0m"
_COLORS = {
"green": "\033[32m",
"red": "\033[31m",
"yellow": "\033[33m",
"cyan": "\033[36m",
"dim": "\033[2m",
"bold": "\033[1m",
}
SYSTEM = "SYSTEM"
VOICE = "VOICE"
class Console:
"""formats and prints daemon log lines with timestamp, prefix, and color"""
def __init__(self, color: bool | None = None, stream=None, clock=None) -> None:
self.stream = stream if stream is not None else sys.stdout
self._clock = clock or time.localtime
if color is None:
color = hasattr(self.stream, "isatty") and self.stream.isatty()
self.color = bool(color)
def _stamp(self) -> str:
t = self._clock()
return f"{t.tm_hour:02d}:{t.tm_min:02d}:{t.tm_sec:02d}"
def _paint(self, text: str, color: str | None) -> str:
if not self.color or not color or color not in _COLORS:
return text
return f"{_COLORS[color]}{text}{RESET}"
def emit(self, prefix: str, message: str, color: str | None = None) -> None:
"""print one line: ``HH:MM:SS [prefix] message`` (message optionally colored)"""
line = f"{self._stamp()} {self._paint(f'[{prefix}]', 'dim')} {self._paint(message, color)}"
print(line, file=self.stream, flush=True)

View File

@ -18,6 +18,7 @@ from pathlib import Path
from . import audio, grammar, inject, target from . import audio, grammar, inject, target
from .config import Config from .config import Config
from .console import SYSTEM, VOICE, Console
from .stt import Transcriber from .stt import Transcriber
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -114,6 +115,8 @@ class Daemon:
self._transcriber: Transcriber | None = None self._transcriber: Transcriber | None = None
self._device: int | None = None self._device: int | None = None
self._ptt = _PTTKey() self._ptt = _PTTKey()
self._pending: dict[str, int] = {}
self._console = Console()
def _install_signals(self) -> None: def _install_signals(self) -> None:
signal.signal(signal.SIGTERM, self._on_signal) signal.signal(signal.SIGTERM, self._on_signal)
@ -163,7 +166,7 @@ class Daemon:
parsed = grammar.parse(transcript, cfg.wake_phrases, cfg.match_threshold, require_wake, parsed = grammar.parse(transcript, cfg.wake_phrases, cfg.match_threshold, require_wake,
filler=cfg.filler_words) filler=cfg.filler_words)
if parsed is None or parsed.action is None: if parsed is None or parsed.action is None:
self._emit(f'heard: "{transcript}" -> no command matched') self._console.emit(VOICE, f'heard "{transcript}" -> no command matched', "yellow")
return return
action = parsed.action action = parsed.action
@ -171,36 +174,75 @@ class Daemon:
new_mode = str(action.arg) new_mode = str(action.arg)
if new_mode != self.mode: if new_mode != self.mode:
self.mode = new_mode self.mode = new_mode
self._emit(f"mode -> {new_mode}") self._console.emit(SYSTEM, f"mode -> {new_mode}", "cyan")
self._refresh_state() self._refresh_state()
return return
if action.name == "set": if action.name == "set":
session = target.set_target(str(action.arg)) session = target.set_target(str(action.arg))
self._emit(f"set sticky -> {session}") self._pending.pop(session, None)
self._console.emit(SYSTEM, f"set sticky -> {session}", "cyan")
self._refresh_state() self._refresh_state()
return return
if action.name == "unset": if action.name == "unset":
target.unset_target() target.unset_target()
self._emit("unset (cleared)") self._console.emit(SYSTEM, "unset (cleared)", "cyan")
self._refresh_state() self._refresh_state()
return return
if action.name == "list": if action.name == "list":
sessions = target.list_sessions() sessions = target.list_sessions()
self._emit("list -> " + (", ".join(sessions) if sessions else "(none running)")) self._console.emit(SYSTEM, "list -> " + (", ".join(sessions) if sessions else "(none running)"))
return return
session, reason = target.resolve(parsed.one_shot) session, reason = target.resolve(parsed.one_shot, auto_target=cfg.auto_target)
if session is None: if session is None:
self._emit(f'heard: "{transcript}" -> {reason} -> matched {self._describe(action)} ' self._console.emit(VOICE, f'heard "{transcript}" -> {reason} -> '
f'-> did nothing') f'{self._describe(action)} did nothing', "red")
return return
prefix = f'heard: "{transcript}" -> {reason} -> matched {self._describe(action)}' self._inject(session, transcript, reason, action)
if action.name == "type" and not cfg.type_autosend:
inject.send_literal(session, str(action.arg)) def _inject(self, session: str, transcript: str, reason: str, action) -> None:
self._emit(f"{prefix} -> injected literal {str(action.arg)!r} -> {session}") """run a resolved command against `session`, tracking the uncommitted-input
buffer so backspace/erase delete only back to the last submit boundary"""
heard = f'heard "{transcript}" ({reason})'
name = action.name
if name == "type":
text = str(action.arg)
inject.send_literal(session, text)
self._pending[session] = self._pending.get(session, 0) + len(text)
if self.config.type_autosend:
inject.send_named(session, inject.keys.SUBMIT)
self._pending[session] = 0
self._console.emit(session, f"{heard} -> typed {text!r}"
+ (" + send" if self.config.type_autosend else ""), "green")
return return
if name == "space":
n = int(action.arg)
inject.perform(session, action) inject.perform(session, action)
self._emit(f"{prefix} -> injected {self._describe(action)} -> {session}") self._pending[session] = self._pending.get(session, 0) + n
self._console.emit(session, f"{heard} -> space x{n}", "green")
return
if name == "backspace":
have = self._pending.get(session, 0)
n = min(int(action.arg), have)
if n:
inject.perform(session, grammar.Action("backspace", n))
self._pending[session] = have - n
self._console.emit(session, f"{heard} -> backspace x{n}"
+ ("" if n == int(action.arg) else " (capped at boundary)"), "green")
return
if name == "erase":
n = self._pending.get(session, 0)
if n:
inject.perform(session, grammar.Action("erase", n))
self._pending[session] = 0
self._console.emit(session, f"{heard} -> erase x{n} (to last boundary)", "green")
return
inject.perform(session, action)
if name == "submit":
self._pending[session] = 0
self._console.emit(session, f"{heard} -> {self._describe(action)}", "green")
@staticmethod @staticmethod
def _describe(action) -> str: def _describe(action) -> str:
@ -208,11 +250,6 @@ class Daemon:
return action.name.upper() return action.name.upper()
return f"{action.name.upper()}({action.arg})" return f"{action.name.upper()}({action.arg})"
@staticmethod
def _emit(line: str) -> None:
"""print a recognition/action line to the watched terminal"""
print(line, flush=True)
def _has_wake(self, transcript: str) -> bool: def _has_wake(self, transcript: str) -> bool:
"""true if the utterance starts with a wake phrase (listen-mode gate). """true if the utterance starts with a wake phrase (listen-mode gate).
@ -225,15 +262,11 @@ class Daemon:
def _print_startup(self) -> None: def _print_startup(self) -> None:
cfg = self.config cfg = self.config
dev = cfg.stt_device if cfg.stt_device != "auto" else "default" dev = cfg.stt_device if cfg.stt_device != "auto" else "default"
target_now = target.read_active() or "(none — run cc to attach)" target_now = target.read_active() or "(none — run cc / set <name>)"
self._emit("── claudedo ─────────────────────────────────") self._console.emit(SYSTEM, f"claudedo {self.mode} mode — Ctrl-C to stop", "bold")
self._emit(f" model: {cfg.stt_model} ({cfg.stt_language})") self._console.emit(SYSTEM, f"model {cfg.stt_model} ({cfg.stt_language}) · mic {dev} · "
self._emit(f" mic: {dev}") f"target {target_now}")
self._emit(f" mode: {self.mode}") self._console.emit(SYSTEM, "wake: " + ", ".join(cfg.wake_phrases))
self._emit(f" target: {target_now}")
self._emit(f" wake: {', '.join(cfg.wake_phrases)}")
self._emit(" Ctrl-C to stop")
self._emit("─────────────────────────────────────────────")
def _refresh_state(self) -> None: def _refresh_state(self) -> None:
write_state(os.getpid(), self.mode, target.read_active()) write_state(os.getpid(), self.mode, target.read_active())
@ -257,7 +290,10 @@ class Daemon:
if not transcript: if not transcript:
continue continue
if self.mode == "listen" and not self._has_wake(transcript): if self.mode == "listen" and not self._has_wake(transcript):
self._emit("dropped: non-wake speech (not recorded)") if self.config.print_heard:
self._console.emit(VOICE, f'heard (dropped) "{transcript}"', "red")
else:
self._console.emit(VOICE, "dropped: non-wake speech (not recorded)", "dim")
continue continue
self._handle(transcript) self._handle(transcript)
finally: finally:

View File

@ -27,6 +27,12 @@ _NUMBER_WORDS = {
_INDEX_WORDS = {"1": 1, "2": 2, "3": 3, "4": 4} _INDEX_WORDS = {"1": 1, "2": 2, "3": 3, "4": 4}
_COUNT_WORDS = {
"five": 5, "six": 6, "seven": 7, "eight": 8, "nine": 9, "ten": 10,
"eleven": 11, "twelve": 12, "thirteen": 13, "fourteen": 14, "fifteen": 15,
"sixteen": 16, "seventeen": 17, "eighteen": 18, "nineteen": 19, "twenty": 20,
}
_STICKY_VERBS = ("set", "sticky", "switch") _STICKY_VERBS = ("set", "sticky", "switch")
_ONESHOT_VERBS = ("target",) _ONESHOT_VERBS = ("target",)
_UNSET_VERBS = ("unset", "unsticky") _UNSET_VERBS = ("unset", "unsticky")
@ -39,9 +45,10 @@ DEFAULT_FILLER = ("select", "use", "choose")
class Action: class Action:
"""a matched command: a name plus an optional argument. """a matched command: a name plus an optional argument.
names: yes, no, select, approve, deny, submit, type, cancel, mode, set, unset, names: yes, no, select, approve, deny, submit, type, space, backspace, erase,
list. arg carries the select index (int), the literal text for ``type``, the mode cancel, mode, set, unset, list. arg carries the select index (int), the literal
for ``mode``, or the session short-name for ``set``. text for ``type``, the count for ``space``/``backspace`` (int), the mode for
``mode``, or the session short-name for ``set``.
""" """
name: str name: str
@ -123,6 +130,22 @@ def _fuzzy_in(token: str, options: tuple[str, ...], threshold: float) -> bool:
return any(_ratio(token, opt) >= threshold for opt in options) return any(_ratio(token, opt) >= threshold for opt in options)
def _leading_count(rest: list[str], default: int = 1) -> int:
"""read a count from the first token (digit or number word), else the default.
'backspace 3' -> 3, 'backspace ten' -> 10 (normalize maps small words to digits;
larger words come from _COUNT_WORDS), 'backspace' -> default.
"""
if not rest:
return default
tok = rest[0]
if tok.isdigit():
return max(0, int(tok))
if tok in _COUNT_WORDS:
return _COUNT_WORDS[tok]
return default
def match_command(remainder: str, threshold: float) -> Action | None: def match_command(remainder: str, threshold: float) -> Action | None:
"""map a normalized command remainder to an Action, or None if unrecognized. """map a normalized command remainder to an Action, or None if unrecognized.
@ -160,6 +183,13 @@ def match_command(remainder: str, threshold: float) -> Action | None:
text = " ".join(rest).strip() text = " ".join(rest).strip()
return Action("type", text) if text else None return Action("type", text) if text else None
if _fuzzy_in(head, ("backspace", "delete"), threshold):
return Action("backspace", _leading_count(rest, default=1))
if _fuzzy_in(head, ("space",), threshold):
return Action("space", _leading_count(rest, default=1))
if _fuzzy_in(head, ("erase", "clear", "wipe"), threshold):
return Action("erase")
if _fuzzy_in(head, ("mode",), threshold) and rest: if _fuzzy_in(head, ("mode",), threshold) and rest:
if _fuzzy_in(rest[0], ("ptt",), threshold) or "push" in rest[0]: if _fuzzy_in(rest[0], ("ptt",), threshold) or "push" in rest[0]:
return Action("mode", "ptt") return Action("mode", "ptt")

View File

@ -45,11 +45,18 @@ class OutputHandler(ABC):
def send_literal(self, session: str, text: str) -> None: def send_literal(self, session: str, text: str) -> None:
"""emit literal text into the input box without submitting (``type``)""" """emit literal text into the input box without submitting (``type``)"""
def send_repeat(self, session: str, token: str, count: int) -> None:
"""emit a named key `count` times (e.g. BSpace x n). default impl loops."""
if count <= 0:
return
self.send_named(session, [token] * count)
def perform(self, session: str, action) -> bool: def perform(self, session: str, action) -> bool:
"""resolve a grammar.Action to keystrokes and emit them. returns acted?. """resolve a grammar.Action to keystrokes and emit them. returns acted?.
``switch`` and ``mode`` are handled by the daemon (they change daemon state, ``switch``/``set``/``mode`` etc. are handled by the daemon (they change daemon
not the claude session), so they are ignored here. state, not the claude session), so they are ignored here. ``erase`` arrives
with action.arg already set to the count the daemon wants backspaced.
""" """
name = action.name name = action.name
if name == "yes": if name == "yes":
@ -72,6 +79,10 @@ class OutputHandler(ABC):
self.send_named(session, seq) self.send_named(session, seq)
elif name == "type": elif name == "type":
self.send_literal(session, str(action.arg)) self.send_literal(session, str(action.arg))
elif name == "space":
self.send_literal(session, " " * int(action.arg))
elif name in ("backspace", "erase"):
self.send_repeat(session, keys.BACKSPACE[0], int(action.arg))
else: else:
return False return False
return True return True

View File

@ -37,6 +37,12 @@ DENY = ["3"]
SUBMIT = ["Enter"] SUBMIT = ["Enter"]
CANCEL = ["Escape"] CANCEL = ["Escape"]
# BACKSPACE deletes one char left; SPACE inserts one literal space. both are emitted
# repeatedly for `backspace <n>` / `space <n>` and for `erase` (n = the daemon's
# tracked uncommitted-input count). BSpace is tmux's name for the backspace key.
BACKSPACE = ["BSpace"]
SPACE = [" "]
SELECT_BY_INDEX = { SELECT_BY_INDEX = {
1: SELECT_1, 1: SELECT_1,
2: SELECT_2, 2: SELECT_2,