load/validate config.toml with clear errors; defaults to listen mode and the 'small' whisper model. all tunables (wake phrases, audio thresholds, type_autosend) live here, no hardcoded paths or secrets in code. Signed-off-by: disqualifier <dev@disqualifier.me>
54 lines
2.4 KiB
TOML
54 lines
2.4 KiB
TOML
# claudedo configuration. everything tunable lives here — no hardcoded paths or
|
|
# secrets in code. loaded and validated by config.py with clear errors.
|
|
|
|
[wake]
|
|
# wake phrases for listen mode. fuzzy-matched: case/space-insensitive, lenient on
|
|
# the coined word "claudedo" (whisper renders it inconsistently). number words are
|
|
# normalized to digits before command matching.
|
|
phrases = ["claudedo", "hey claude"]
|
|
|
|
[input]
|
|
# "listen" (default): continuous capture; only acts on utterances that start with a
|
|
# wake phrase; all other speech is transcribed locally and discarded immediately.
|
|
# this is the hands-free path — works while another window (a game) is focused,
|
|
# because the trigger is your voice over the mic bridge, not a Windows keyboard
|
|
# hook. no system-wide hotkey is installed by design.
|
|
# "ptt": push-to-talk; capture only while ptt_key is held. DESK-ONLY: it captures
|
|
# only while the daemon's own terminal window is focused (there is deliberately no
|
|
# global hotkey — a system-wide keyboard hook is the keylogger/cheat silhouette we
|
|
# refuse to build). use "listen" for hands-free-while-gaming.
|
|
mode = "listen"
|
|
ptt_key = "space"
|
|
|
|
[stt]
|
|
# faster-whisper model size. "small" is a good accuracy/latency balance for the
|
|
# short command grammar (~sub-second per chunk on a strong cpu). if the coined wake
|
|
# word "claudedo" is recognized poorly, bump to "medium" (slower per chunk).
|
|
model = "small"
|
|
language = "en"
|
|
# mic device: "auto", or a sounddevice device index (integer) / substring of a
|
|
# device name. run `claudedo test-audio` to list devices.
|
|
device = "auto"
|
|
# faster-whisper compute device: "auto" (cpu here), "cpu", or "cuda".
|
|
compute = "auto"
|
|
|
|
[audio]
|
|
# capture parameters. 16 kHz mono is what whisper expects.
|
|
samplerate = 16000
|
|
channels = 1
|
|
# listen-mode silence segmentation: an utterance ends after this many seconds below
|
|
# the rms threshold. keeps latency low without streaming.
|
|
silence_threshold = 0.012
|
|
silence_duration = 0.8
|
|
# ignore utterances shorter than this (clicks, coughs).
|
|
min_utterance = 0.3
|
|
# hard cap on a single utterance so a stuck stream can't grow unbounded.
|
|
max_utterance = 15.0
|
|
|
|
[behavior]
|
|
# dictation never auto-submits: "type <phrase>" inserts literal text only; you say
|
|
# "send" separately to submit (read-before-send).
|
|
type_autosend = false
|
|
# fuzzy match ratio (0..1) required to accept a wake phrase / command token.
|
|
match_threshold = 0.8
|