feat: v0.1.4 — HELP menu, 15s cap, wake 0.65, small.en default + docs sync

commands menu now prints under a single [HELP] header with bare indented rows
(brightblue usage) instead of 15 repeated [SYSTEM] tags. raise [vad].max_seconds
10 -> 15 for long dictation. wake_fuzzy_threshold 0.6 -> 0.65 (slightly fewer false
wakes; note short spellings 'ok/okay claude' still admit some). carries the prior
small.en default, [vad].silence_ms 700, lighter (brightblue) command color, lean
injection lines, .en model variants in the validator. README/CLAUDE.md synced.

Signed-off-by: disqualifier <dev@disqualifier.me>
This commit is contained in:
disqualifier 2026-06-26 03:52:19 -04:00
parent e84ef91e7b
commit 5f05a01423
7 changed files with 25 additions and 14 deletions

View File

@ -189,11 +189,14 @@ If Claude Code changes its prompt UI, re-confirm against a live session and upda
Everything tunable lives in [`config.toml`](config.toml): wake phrases, mode + PTT Everything tunable lives in [`config.toml`](config.toml): wake phrases, mode + PTT
key, Whisper model/language/device, `[vad]` endpointing, and `[behavior]` key, Whisper model/language/device, `[vad]` endpointing, and `[behavior]`
(`type_autosend`, fuzzy thresholds, `filler_words`, `auto_target`, `print_heard`). (`type_autosend`, fuzzy thresholds, `filler_words`, `auto_target`, `print_heard`).
The default model is **`small`** (~1s/command on a strong CPU — snappy, and good The default model is **`small.en`** (the English-only small model — ~1s/command on a
enough with initial_prompt biasing); `medium` is more accurate on the coined wake strong CPU, more accurate on English than multilingual `small` at the same speed);
word but ~3× slower (noticeable lag), `large-v3` most accurate/slowest. Every `heard` `medium`/`medium.en` are more accurate but ~3× slower (noticeable lag), `base.en` is
line shows the STT latency as `(<ms>/<audio>s)` so you can see what a model change snappier/less accurate, `large-v3` most accurate/slowest. Every `heard` line shows the
costs. `claudedo -c <path> ...` points at a specific config; otherwise it searches STT latency as `(<ms>/<audio>s)` so you can see what a model change costs. VAD
endpointing ends a capture after `[vad].silence_ms` (700) of trailing silence, capped
at `max_seconds` (15). `claudedo -c <path> ...` points at a specific config; otherwise
it searches
`$CLAUDEDO_CONFIG`, `~/.config/claudedo/config.toml`, then `./config.toml`. `$CLAUDEDO_CONFIG`, `~/.config/claudedo/config.toml`, then `./config.toml`.
- **STT biasing.** The transcriber is seeded with an `initial_prompt` built from the - **STT biasing.** The transcriber is seeded with an `initial_prompt` built from the

View File

@ -49,8 +49,9 @@ min_utterance = 0.3
# a real pause both ends the command AND separates it from following chatter (the # a real pause both ends the command AND separates it from following chatter (the
# chatter becomes a separate capture that the wake gate then discards). # chatter becomes a separate capture that the wake gate then discards).
silence_ms = 700 silence_ms = 700
# hard cap so continuous noise can't record forever. # hard cap so continuous noise can't record forever (also the ceiling for a long
max_seconds = 10.0 # dictated `type` phrase).
max_seconds = 15.0
[behavior] [behavior]
# dictation never auto-submits: "type <phrase>" inserts literal text only; you say # dictation never auto-submits: "type <phrase>" inserts literal text only; you say
@ -60,7 +61,7 @@ type_autosend = false
# wakes, finds no command, does nothing), so wake is lenient; a false COMMAND fires # wakes, finds no command, does nothing), so wake is lenient; a false COMMAND fires
# the WRONG action, so commands stay tight. lower = more lenient = more matches. # the WRONG action, so commands stay tight. lower = more lenient = more matches.
# prefer expanding command synonyms over loosening command_fuzzy_threshold. # prefer expanding command synonyms over loosening command_fuzzy_threshold.
wake_fuzzy_threshold = 0.6 wake_fuzzy_threshold = 0.65
command_fuzzy_threshold = 0.8 command_fuzzy_threshold = 0.8
# optional filler words that may precede a command and are ignored for matching: # optional filler words that may precede a command and are ignored for matching:
# "select yes" / "use yes" behave like "yes". (a filler word followed by a digit is # "select yes" / "use yes" behave like "yes". (a filler word followed by a digit is

View File

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "claudedo" name = "claudedo"
version = "0.1.3" version = "0.1.4"
description = "voice-control daemon for claude code (local STT -> tmux send-keys)" description = "voice-control daemon for claude code (local STT -> tmux send-keys)"
readme = "README.md" readme = "README.md"
requires-python = ">=3.10" requires-python = ">=3.10"

View File

@ -1,3 +1,3 @@
"""claudedo — voice-control daemon for claude code (local STT -> tmux send-keys)""" """claudedo — voice-control daemon for claude code (local STT -> tmux send-keys)"""
__version__ = "0.1.3" __version__ = "0.1.4"

View File

@ -118,10 +118,10 @@ def load_config(explicit: str | os.PathLike | None = None) -> Config:
channels=int(_require(raw, "audio", "channels", (int,), 1)), channels=int(_require(raw, "audio", "channels", (int,), 1)),
silence_threshold=float(_require(raw, "audio", "silence_threshold", (int, float), 0.012)), silence_threshold=float(_require(raw, "audio", "silence_threshold", (int, float), 0.012)),
vad_silence_ms=int(_require(raw, "vad", "silence_ms", (int,), 700)), vad_silence_ms=int(_require(raw, "vad", "silence_ms", (int,), 700)),
vad_max_seconds=float(_require(raw, "vad", "max_seconds", (int, float), 10.0)), vad_max_seconds=float(_require(raw, "vad", "max_seconds", (int, float), 15.0)),
min_utterance=float(_require(raw, "audio", "min_utterance", (int, float), 0.3)), min_utterance=float(_require(raw, "audio", "min_utterance", (int, float), 0.3)),
type_autosend=bool(_require(raw, "behavior", "type_autosend", (bool,), False)), type_autosend=bool(_require(raw, "behavior", "type_autosend", (bool,), False)),
wake_fuzzy_threshold=float(_require(raw, "behavior", "wake_fuzzy_threshold", (int, float), 0.6)), wake_fuzzy_threshold=float(_require(raw, "behavior", "wake_fuzzy_threshold", (int, float), 0.65)),
command_fuzzy_threshold=float(_require(raw, "behavior", "command_fuzzy_threshold", command_fuzzy_threshold=float(_require(raw, "behavior", "command_fuzzy_threshold",
(int, float), 0.8)), (int, float), 0.8)),
filler_words=tuple(_require(raw, "behavior", "filler_words", (list,), filler_words=tuple(_require(raw, "behavior", "filler_words", (list,),

View File

@ -26,6 +26,7 @@ _COLORS = {
SYSTEM = "SYSTEM" SYSTEM = "SYSTEM"
VOICE = "VOICE" VOICE = "VOICE"
HELP = "HELP"
class Console: class Console:
@ -56,3 +57,8 @@ class Console:
"""print one line: ``HH:MM:SS [prefix] message`` (message optionally colored)""" """print one line: ``HH:MM:SS [prefix] message`` (message optionally colored)"""
line = f"{self._stamp()} {self._paint(f'[{prefix}]', 'dim')} {self._paint(message, color)}" line = f"{self._stamp()} {self._paint(f'[{prefix}]', 'dim')} {self._paint(message, color)}"
print(line, file=self.stream, flush=True) print(line, file=self.stream, flush=True)
def line(self, message: str, color: str | None = None) -> None:
"""print a bare continuation line (no timestamp/prefix) — for multi-row blocks
like the help menu, indented under a preceding header"""
print(self._paint(message, color), file=self.stream, flush=True)

View File

@ -18,7 +18,7 @@ from pathlib import Path
from . import audio, grammar, inject, target from . import audio, grammar, inject, target
from .config import Config from .config import Config
from .console import SYSTEM, VOICE, Console from .console import HELP, SYSTEM, VOICE, Console
from .stt import Transcriber from .stt import Transcriber
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -204,8 +204,9 @@ class Daemon:
+ (", ".join(sessions) if sessions else "(none running)")) + (", ".join(sessions) if sessions else "(none running)"))
return return
if action.name == "commands": if action.name == "commands":
self._console.emit(HELP, "voice commands:")
for usage, desc in grammar.command_menu(): for usage, desc in grammar.command_menu():
self._console.emit(SYSTEM, f" {usage:<26} {desc}") self._console.line(f" {self._console.paint(f'{usage:<26}', 'brightblue')} {desc}")
return return
if action.name == "customs": if action.name == "customs":
self._console.emit(SYSTEM, "custom commands arrive in v0.2.0 (contexts.toml)") self._console.emit(SYSTEM, "custom commands arrive in v0.2.0 (contexts.toml)")