feat: v0.1.4 — HELP menu, 15s cap, wake 0.65, small.en default + docs sync

commands menu now prints under a single [HELP] header with bare indented rows
(brightblue usage) instead of 15 repeated [SYSTEM] tags. raise [vad].max_seconds
10 -> 15 for long dictation. wake_fuzzy_threshold 0.6 -> 0.65 (slightly fewer false
wakes; note short spellings 'ok/okay claude' still admit some). carries the prior
small.en default, [vad].silence_ms 700, lighter (brightblue) command color, lean
injection lines, .en model variants in the validator. README/CLAUDE.md synced.

Signed-off-by: disqualifier <dev@disqualifier.me>
This commit is contained in:
disqualifier 2026-06-26 03:52:19 -04:00
parent e84ef91e7b
commit 5f05a01423
7 changed files with 25 additions and 14 deletions

View File

@ -189,11 +189,14 @@ If Claude Code changes its prompt UI, re-confirm against a live session and upda
Everything tunable lives in [`config.toml`](config.toml): wake phrases, mode + PTT
key, Whisper model/language/device, `[vad]` endpointing, and `[behavior]`
(`type_autosend`, fuzzy thresholds, `filler_words`, `auto_target`, `print_heard`).
The default model is **`small`** (~1s/command on a strong CPU — snappy, and good
enough with initial_prompt biasing); `medium` is more accurate on the coined wake
word but ~3× slower (noticeable lag), `large-v3` most accurate/slowest. Every `heard`
line shows the STT latency as `(<ms>/<audio>s)` so you can see what a model change
costs. `claudedo -c <path> ...` points at a specific config; otherwise it searches
The default model is **`small.en`** (the English-only small model — ~1s/command on a
strong CPU, more accurate on English than multilingual `small` at the same speed);
`medium`/`medium.en` are more accurate but ~3× slower (noticeable lag), `base.en` is
snappier/less accurate, `large-v3` most accurate/slowest. Every `heard` line shows the
STT latency as `(<ms>/<audio>s)` so you can see what a model change costs. VAD
endpointing ends a capture after `[vad].silence_ms` (700) of trailing silence, capped
at `max_seconds` (15). `claudedo -c <path> ...` points at a specific config; otherwise
it searches
`$CLAUDEDO_CONFIG`, `~/.config/claudedo/config.toml`, then `./config.toml`.
- **STT biasing.** The transcriber is seeded with an `initial_prompt` built from the

View File

@ -49,8 +49,9 @@ min_utterance = 0.3
# a real pause both ends the command AND separates it from following chatter (the
# chatter becomes a separate capture that the wake gate then discards).
silence_ms = 700
# hard cap so continuous noise can't record forever.
max_seconds = 10.0
# hard cap so continuous noise can't record forever (also the ceiling for a long
# dictated `type` phrase).
max_seconds = 15.0
[behavior]
# dictation never auto-submits: "type <phrase>" inserts literal text only; you say
@ -60,7 +61,7 @@ type_autosend = false
# wakes, finds no command, does nothing), so wake is lenient; a false COMMAND fires
# the WRONG action, so commands stay tight. lower = more lenient = more matches.
# prefer expanding command synonyms over loosening command_fuzzy_threshold.
wake_fuzzy_threshold = 0.6
wake_fuzzy_threshold = 0.65
command_fuzzy_threshold = 0.8
# optional filler words that may precede a command and are ignored for matching:
# "select yes" / "use yes" behave like "yes". (a filler word followed by a digit is

View File

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "claudedo"
version = "0.1.3"
version = "0.1.4"
description = "voice-control daemon for claude code (local STT -> tmux send-keys)"
readme = "README.md"
requires-python = ">=3.10"

View File

@ -1,3 +1,3 @@
"""claudedo — voice-control daemon for claude code (local STT -> tmux send-keys)"""
__version__ = "0.1.3"
__version__ = "0.1.4"

View File

@ -118,10 +118,10 @@ def load_config(explicit: str | os.PathLike | None = None) -> Config:
channels=int(_require(raw, "audio", "channels", (int,), 1)),
silence_threshold=float(_require(raw, "audio", "silence_threshold", (int, float), 0.012)),
vad_silence_ms=int(_require(raw, "vad", "silence_ms", (int,), 700)),
vad_max_seconds=float(_require(raw, "vad", "max_seconds", (int, float), 10.0)),
vad_max_seconds=float(_require(raw, "vad", "max_seconds", (int, float), 15.0)),
min_utterance=float(_require(raw, "audio", "min_utterance", (int, float), 0.3)),
type_autosend=bool(_require(raw, "behavior", "type_autosend", (bool,), False)),
wake_fuzzy_threshold=float(_require(raw, "behavior", "wake_fuzzy_threshold", (int, float), 0.6)),
wake_fuzzy_threshold=float(_require(raw, "behavior", "wake_fuzzy_threshold", (int, float), 0.65)),
command_fuzzy_threshold=float(_require(raw, "behavior", "command_fuzzy_threshold",
(int, float), 0.8)),
filler_words=tuple(_require(raw, "behavior", "filler_words", (list,),

View File

@ -26,6 +26,7 @@ _COLORS = {
SYSTEM = "SYSTEM"
VOICE = "VOICE"
HELP = "HELP"
class Console:
@ -56,3 +57,8 @@ class Console:
"""print one line: ``HH:MM:SS [prefix] message`` (message optionally colored)"""
line = f"{self._stamp()} {self._paint(f'[{prefix}]', 'dim')} {self._paint(message, color)}"
print(line, file=self.stream, flush=True)
def line(self, message: str, color: str | None = None) -> None:
"""print a bare continuation line (no timestamp/prefix) — for multi-row blocks
like the help menu, indented under a preceding header"""
print(self._paint(message, color), file=self.stream, flush=True)

View File

@ -18,7 +18,7 @@ from pathlib import Path
from . import audio, grammar, inject, target
from .config import Config
from .console import SYSTEM, VOICE, Console
from .console import HELP, SYSTEM, VOICE, Console
from .stt import Transcriber
log = logging.getLogger(__name__)
@ -204,8 +204,9 @@ class Daemon:
+ (", ".join(sessions) if sessions else "(none running)"))
return
if action.name == "commands":
self._console.emit(HELP, "voice commands:")
for usage, desc in grammar.command_menu():
self._console.emit(SYSTEM, f" {usage:<26} {desc}")
self._console.line(f" {self._console.paint(f'{usage:<26}', 'brightblue')} {desc}")
return
if action.name == "customs":
self._console.emit(SYSTEM, "custom commands arrive in v0.2.0 (contexts.toml)")