feat: v0.1.4 — HELP menu, 15s cap, wake 0.65, small.en default + docs sync
commands menu now prints under a single [HELP] header with bare indented rows (brightblue usage) instead of 15 repeated [SYSTEM] tags. raise [vad].max_seconds 10 -> 15 for long dictation. wake_fuzzy_threshold 0.6 -> 0.65 (slightly fewer false wakes; note short spellings 'ok/okay claude' still admit some). carries the prior small.en default, [vad].silence_ms 700, lighter (brightblue) command color, lean injection lines, .en model variants in the validator. README/CLAUDE.md synced. Signed-off-by: disqualifier <dev@disqualifier.me>
This commit is contained in:
parent
e84ef91e7b
commit
5f05a01423
13
README.md
13
README.md
@ -189,11 +189,14 @@ If Claude Code changes its prompt UI, re-confirm against a live session and upda
|
||||
Everything tunable lives in [`config.toml`](config.toml): wake phrases, mode + PTT
|
||||
key, Whisper model/language/device, `[vad]` endpointing, and `[behavior]`
|
||||
(`type_autosend`, fuzzy thresholds, `filler_words`, `auto_target`, `print_heard`).
|
||||
The default model is **`small`** (~1s/command on a strong CPU — snappy, and good
|
||||
enough with initial_prompt biasing); `medium` is more accurate on the coined wake
|
||||
word but ~3× slower (noticeable lag), `large-v3` most accurate/slowest. Every `heard`
|
||||
line shows the STT latency as `(<ms>/<audio>s)` so you can see what a model change
|
||||
costs. `claudedo -c <path> ...` points at a specific config; otherwise it searches
|
||||
The default model is **`small.en`** (the English-only small model — ~1s/command on a
|
||||
strong CPU, more accurate on English than multilingual `small` at the same speed);
|
||||
`medium`/`medium.en` are more accurate but ~3× slower (noticeable lag), `base.en` is
|
||||
snappier/less accurate, `large-v3` most accurate/slowest. Every `heard` line shows the
|
||||
STT latency as `(<ms>/<audio>s)` so you can see what a model change costs. VAD
|
||||
endpointing ends a capture after `[vad].silence_ms` (700) of trailing silence, capped
|
||||
at `max_seconds` (15). `claudedo -c <path> ...` points at a specific config; otherwise
|
||||
it searches
|
||||
`$CLAUDEDO_CONFIG`, `~/.config/claudedo/config.toml`, then `./config.toml`.
|
||||
|
||||
- **STT biasing.** The transcriber is seeded with an `initial_prompt` built from the
|
||||
|
||||
@ -49,8 +49,9 @@ min_utterance = 0.3
|
||||
# a real pause both ends the command AND separates it from following chatter (the
|
||||
# chatter becomes a separate capture that the wake gate then discards).
|
||||
silence_ms = 700
|
||||
# hard cap so continuous noise can't record forever.
|
||||
max_seconds = 10.0
|
||||
# hard cap so continuous noise can't record forever (also the ceiling for a long
|
||||
# dictated `type` phrase).
|
||||
max_seconds = 15.0
|
||||
|
||||
[behavior]
|
||||
# dictation never auto-submits: "type <phrase>" inserts literal text only; you say
|
||||
@ -60,7 +61,7 @@ type_autosend = false
|
||||
# wakes, finds no command, does nothing), so wake is lenient; a false COMMAND fires
|
||||
# the WRONG action, so commands stay tight. lower = more lenient = more matches.
|
||||
# prefer expanding command synonyms over loosening command_fuzzy_threshold.
|
||||
wake_fuzzy_threshold = 0.6
|
||||
wake_fuzzy_threshold = 0.65
|
||||
command_fuzzy_threshold = 0.8
|
||||
# optional filler words that may precede a command and are ignored for matching:
|
||||
# "select yes" / "use yes" behave like "yes". (a filler word followed by a digit is
|
||||
|
||||
@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "claudedo"
|
||||
version = "0.1.3"
|
||||
version = "0.1.4"
|
||||
description = "voice-control daemon for claude code (local STT -> tmux send-keys)"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
"""claudedo — voice-control daemon for claude code (local STT -> tmux send-keys)"""
|
||||
|
||||
__version__ = "0.1.3"
|
||||
__version__ = "0.1.4"
|
||||
|
||||
@ -118,10 +118,10 @@ def load_config(explicit: str | os.PathLike | None = None) -> Config:
|
||||
channels=int(_require(raw, "audio", "channels", (int,), 1)),
|
||||
silence_threshold=float(_require(raw, "audio", "silence_threshold", (int, float), 0.012)),
|
||||
vad_silence_ms=int(_require(raw, "vad", "silence_ms", (int,), 700)),
|
||||
vad_max_seconds=float(_require(raw, "vad", "max_seconds", (int, float), 10.0)),
|
||||
vad_max_seconds=float(_require(raw, "vad", "max_seconds", (int, float), 15.0)),
|
||||
min_utterance=float(_require(raw, "audio", "min_utterance", (int, float), 0.3)),
|
||||
type_autosend=bool(_require(raw, "behavior", "type_autosend", (bool,), False)),
|
||||
wake_fuzzy_threshold=float(_require(raw, "behavior", "wake_fuzzy_threshold", (int, float), 0.6)),
|
||||
wake_fuzzy_threshold=float(_require(raw, "behavior", "wake_fuzzy_threshold", (int, float), 0.65)),
|
||||
command_fuzzy_threshold=float(_require(raw, "behavior", "command_fuzzy_threshold",
|
||||
(int, float), 0.8)),
|
||||
filler_words=tuple(_require(raw, "behavior", "filler_words", (list,),
|
||||
|
||||
@ -26,6 +26,7 @@ _COLORS = {
|
||||
|
||||
SYSTEM = "SYSTEM"
|
||||
VOICE = "VOICE"
|
||||
HELP = "HELP"
|
||||
|
||||
|
||||
class Console:
|
||||
@ -56,3 +57,8 @@ class Console:
|
||||
"""print one line: ``HH:MM:SS [prefix] message`` (message optionally colored)"""
|
||||
line = f"{self._stamp()} {self._paint(f'[{prefix}]', 'dim')} {self._paint(message, color)}"
|
||||
print(line, file=self.stream, flush=True)
|
||||
|
||||
def line(self, message: str, color: str | None = None) -> None:
|
||||
"""print a bare continuation line (no timestamp/prefix) — for multi-row blocks
|
||||
like the help menu, indented under a preceding header"""
|
||||
print(self._paint(message, color), file=self.stream, flush=True)
|
||||
|
||||
@ -18,7 +18,7 @@ from pathlib import Path
|
||||
|
||||
from . import audio, grammar, inject, target
|
||||
from .config import Config
|
||||
from .console import SYSTEM, VOICE, Console
|
||||
from .console import HELP, SYSTEM, VOICE, Console
|
||||
from .stt import Transcriber
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@ -204,8 +204,9 @@ class Daemon:
|
||||
+ (", ".join(sessions) if sessions else "(none running)"))
|
||||
return
|
||||
if action.name == "commands":
|
||||
self._console.emit(HELP, "voice commands:")
|
||||
for usage, desc in grammar.command_menu():
|
||||
self._console.emit(SYSTEM, f" {usage:<26} {desc}")
|
||||
self._console.line(f" {self._console.paint(f'{usage:<26}', 'brightblue')} {desc}")
|
||||
return
|
||||
if action.name == "customs":
|
||||
self._console.emit(SYSTEM, "custom commands arrive in v0.2.0 (contexts.toml)")
|
||||
|
||||
Loading…
Reference in New Issue
Block a user