# claudedo configuration. everything tunable lives here — no hardcoded paths or # secrets in code. loaded and validated by config.py with clear errors. [wake] # wake phrases for listen mode. fuzzy-matched: case/space-insensitive, lenient on # the coined word "claudedo" (whisper renders it inconsistently). number words are # normalized to digits before command matching. phrases = ["claudedo", "claude do", "hey claude", "ok claude", "okay claude"] [input] # "listen" (default): continuous capture; only acts on utterances that start with a # wake phrase; all other speech is transcribed locally and discarded immediately. # this is the hands-free path — works while another window (a game) is focused, # because the trigger is your voice over the mic bridge, not a Windows keyboard # hook. no system-wide hotkey is installed by design. # "ptt": push-to-talk; capture only while ptt_key is held. DESK-ONLY: it captures # only while the daemon's own terminal window is focused (there is deliberately no # global hotkey — a system-wide keyboard hook is the keylogger/cheat silhouette we # refuse to build). use "listen" for hands-free-while-gaming. mode = "listen" ptt_key = "space" [stt] # faster-whisper model size. "small.en" is the default — the English-only small model # (~1s/command on a strong cpu, more accurate on english than multilingual "small" at # the same speed). "medium"/"medium.en" are more accurate but ~3x slower (noticeable # lag); "large-v3" is most accurate and slowest. drop to "base.en" for max snappiness # (less accurate). bump only if recognition is poor. model = "small.en" language = "en" # mic device: "auto", or a sounddevice device index (integer) / substring of a # device name. run `claudedo test-audio` to list devices. device = "auto" # faster-whisper compute device: "auto" (cpu here), "cpu", or "cuda". compute = "auto" [audio] # capture parameters. 16 kHz mono is what whisper expects. samplerate = 16000 channels = 1 # rms energy below this counts as silence (the VAD onset/endpoint floor). silence_threshold = 0.012 # ignore utterances shorter than this (clicks, coughs). min_utterance = 0.3 [vad] # Alexa-style record-until-pause endpointing (listen mode). capture starts on speech # onset and ends after this much trailing silence — the natural end of an utterance. # a real pause both ends the command AND separates it from following chatter (the # chatter becomes a separate capture that the wake gate then discards). silence_ms = 700 # hard cap so continuous noise can't record forever (also the ceiling for a long # dictated `type` phrase). max_seconds = 15.0 [behavior] # dictation never auto-submits: "type " inserts literal text only; you say # "send" separately to submit (read-before-send). type_autosend = false # fuzzy match ratios (0..1). the asymmetry is deliberate: a false WAKE is cheap (it # wakes, finds no command, does nothing), so wake is lenient; a false COMMAND fires # the WRONG action, so commands stay tight. lower = more lenient = more matches. # prefer expanding command synonyms over loosening command_fuzzy_threshold. wake_fuzzy_threshold = 0.65 command_fuzzy_threshold = 0.8 # optional filler words that may precede a command and are ignored for matching: # "select yes" / "use yes" behave like "yes". (a filler word followed by a digit is # the select command, e.g. "select 1", and is not dropped.) filler_words = ["select", "use", "choose"] # when no sticky target is set and exactly ONE claude-* session is running: # false (default) -> require an explicit `set ` or one-shot `target `; # a bare command does nothing and tells you to set one. # true -> auto-target that single session (convenience). auto_target = false # DEBUG ONLY — relaxes the privacy invariant. when true, the daemon console prints # the raw transcript of EVERY utterance, including non-wake speech it would otherwise # drop silently (shown as `heard (dropped): ""`). use it to see exactly # how Whisper renders your wake word, then turn it OFF. default false: non-wake speech # is discarded without ever printing the transcript. print_heard = false # how the `context ` command assembles the blurb + instruction. # true (default): blurb, a soft newline (Shift+Enter — needs the extended-keys tmux # settings install.sh appends), then the instruction. if Shift+Enter is at all flaky # in your terminal (it submits or does nothing), set false to flatten onto one line # with context_separator between blurb and instruction — the blank line is cosmetic, # not worth a submit risk. either way the assembled text is NEVER auto-submitted. context_multiline = true # separator inserted between blurb and instruction when context_multiline = false. context_separator = " — " [sound] # earcons — short confirmation tones on daemon events so you get eyes-free feedback # ("did it hear me?") without watching the terminal. tones are SHORT (<300ms) and quiet; # they play OUT through WSLg's PulseAudio sink (paplay-first, sounddevice fallback, then # powershell.exe). additive to the console feed — mute these and read at the desk, or # hear them eyes-free. a dead speaker never blocks/breaks a command (fire-and-forget). enabled = true # blip when a wake phrase is recognized. OFF by default: a blip right before you speak # the command can bleed into its capture, and it's chatty. turn on only if you want it. on_wake = false # positive blip when a command is recognized/injected. on_accept = true # distinct lower buzz when nothing matched or the target was missing (did nothing). on_no_match = true # rising chime when a send/submit is injected. on_submit = true # best-effort 0.0-1.0 (scaled for sounddevice, --volume for paplay; ignored by the # powershell fallback, which has no volume control). volume = 0.5 # optional per-event overrides to swap in your own .wav files, e.g.: # [sound.files] # accept = "~/sounds/my_accept.wav" [sound.files]