config: typed config loader and config.toml

load/validate config.toml with clear errors; defaults to listen mode and
the 'small' whisper model. all tunables (wake phrases, audio thresholds,
type_autosend) live here, no hardcoded paths or secrets in code.

Signed-off-by: disqualifier <dev@disqualifier.me>
This commit is contained in:
disqualifier 2026-06-25 17:55:08 -04:00
parent b6d8683e39
commit c61eb85748
2 changed files with 177 additions and 0 deletions

53
config.toml Normal file
View File

@ -0,0 +1,53 @@
# claudedo configuration. everything tunable lives here — no hardcoded paths or
# secrets in code. loaded and validated by config.py with clear errors.
[wake]
# wake phrases for listen mode. fuzzy-matched: case/space-insensitive, lenient on
# the coined word "claudedo" (whisper renders it inconsistently). number words are
# normalized to digits before command matching.
phrases = ["claudedo", "hey claude"]
[input]
# "listen" (default): continuous capture; only acts on utterances that start with a
# wake phrase; all other speech is transcribed locally and discarded immediately.
# this is the hands-free path — works while another window (a game) is focused,
# because the trigger is your voice over the mic bridge, not a Windows keyboard
# hook. no system-wide hotkey is installed by design.
# "ptt": push-to-talk; capture only while ptt_key is held. DESK-ONLY: it captures
# only while the daemon's own terminal window is focused (there is deliberately no
# global hotkey — a system-wide keyboard hook is the keylogger/cheat silhouette we
# refuse to build). use "listen" for hands-free-while-gaming.
mode = "listen"
ptt_key = "space"
[stt]
# faster-whisper model size. "small" is a good accuracy/latency balance for the
# short command grammar (~sub-second per chunk on a strong cpu). if the coined wake
# word "claudedo" is recognized poorly, bump to "medium" (slower per chunk).
model = "small"
language = "en"
# mic device: "auto", or a sounddevice device index (integer) / substring of a
# device name. run `claudedo test-audio` to list devices.
device = "auto"
# faster-whisper compute device: "auto" (cpu here), "cpu", or "cuda".
compute = "auto"
[audio]
# capture parameters. 16 kHz mono is what whisper expects.
samplerate = 16000
channels = 1
# listen-mode silence segmentation: an utterance ends after this many seconds below
# the rms threshold. keeps latency low without streaming.
silence_threshold = 0.012
silence_duration = 0.8
# ignore utterances shorter than this (clicks, coughs).
min_utterance = 0.3
# hard cap on a single utterance so a stuck stream can't grow unbounded.
max_utterance = 15.0
[behavior]
# dictation never auto-submits: "type <phrase>" inserts literal text only; you say
# "send" separately to submit (read-before-send).
type_autosend = false
# fuzzy match ratio (0..1) required to accept a wake phrase / command token.
match_threshold = 0.8

124
src/claudedo/config.py Normal file
View File

@ -0,0 +1,124 @@
"""load and validate config.toml into a typed Config object with clear errors."""
from __future__ import annotations
import logging
import os
from dataclasses import dataclass, field
from pathlib import Path
try:
import tomllib as _toml
_TOML_BINARY = True
except ModuleNotFoundError: # python < 3.11
import tomli as _toml
_TOML_BINARY = True
log = logging.getLogger(__name__)
_VALID_MODES = ("listen", "ptt")
_VALID_MODELS = ("tiny", "base", "small", "medium", "large-v2", "large-v3")
DEFAULT_CONFIG_PATHS = (
Path(os.environ.get("CLAUDEDO_CONFIG", "")) if os.environ.get("CLAUDEDO_CONFIG") else None,
Path.home() / ".config" / "claudedo" / "config.toml",
Path.cwd() / "config.toml",
)
class ConfigError(Exception):
"""raised on a missing or invalid configuration value."""
@dataclass
class Config:
"""validated claudedo configuration."""
wake_phrases: list[str]
mode: str
ptt_key: str
stt_model: str
stt_language: str
stt_device: str
stt_compute: str
samplerate: int
channels: int
silence_threshold: float
silence_duration: float
min_utterance: float
max_utterance: float
type_autosend: bool
match_threshold: float
source_path: Path | None = field(default=None)
def find_config_path(explicit: str | os.PathLike | None = None) -> Path:
"""resolve the config file path, raising ConfigError if none is found."""
candidates: list[Path] = []
if explicit:
candidates.append(Path(explicit))
candidates.extend(p for p in DEFAULT_CONFIG_PATHS if p)
for path in candidates:
if path.is_file():
return path
searched = ", ".join(str(p) for p in candidates) or "(none)"
raise ConfigError(f"no config.toml found (looked in: {searched})")
def _require(table: dict, section: str, key: str, types: tuple, default=None):
sub = table.get(section, {})
if key not in sub:
if default is not None:
return default
raise ConfigError(f"missing [{section}].{key} in config")
value = sub[key]
if not isinstance(value, types):
names = "/".join(t.__name__ for t in types)
raise ConfigError(f"[{section}].{key} must be {names}, got {type(value).__name__}")
return value
def load_config(explicit: str | os.PathLike | None = None) -> Config:
"""load config.toml from the first existing default path (or an explicit one)."""
path = find_config_path(explicit)
try:
with open(path, "rb") as fh:
raw = _toml.load(fh)
except _toml.TOMLDecodeError as exc:
raise ConfigError(f"could not parse {path}: {exc}") from exc
phrases = _require(raw, "wake", "phrases", (list,))
if not phrases or not all(isinstance(p, str) and p.strip() for p in phrases):
raise ConfigError("[wake].phrases must be a non-empty list of non-empty strings")
mode = _require(raw, "input", "mode", (str,), "listen")
if mode not in _VALID_MODES:
raise ConfigError(f"[input].mode must be one of {_VALID_MODES}, got {mode!r}")
model = _require(raw, "stt", "model", (str,), "small")
if model not in _VALID_MODELS:
log.warning("unknown stt model %r — passing through to faster-whisper", model)
cfg = Config(
wake_phrases=[p.strip() for p in phrases],
mode=mode,
ptt_key=_require(raw, "input", "ptt_key", (str,), "space"),
stt_model=model,
stt_language=_require(raw, "stt", "language", (str,), "en"),
stt_device=str(_require(raw, "stt", "device", (str, int), "auto")),
stt_compute=_require(raw, "stt", "compute", (str,), "auto"),
samplerate=int(_require(raw, "audio", "samplerate", (int,), 16000)),
channels=int(_require(raw, "audio", "channels", (int,), 1)),
silence_threshold=float(_require(raw, "audio", "silence_threshold", (int, float), 0.012)),
silence_duration=float(_require(raw, "audio", "silence_duration", (int, float), 0.8)),
min_utterance=float(_require(raw, "audio", "min_utterance", (int, float), 0.3)),
max_utterance=float(_require(raw, "audio", "max_utterance", (int, float), 15.0)),
type_autosend=bool(_require(raw, "behavior", "type_autosend", (bool,), False)),
match_threshold=float(_require(raw, "behavior", "match_threshold", (int, float), 0.8)),
source_path=path,
)
if not 0.0 < cfg.match_threshold <= 1.0:
raise ConfigError("[behavior].match_threshold must be in (0, 1]")
if cfg.samplerate <= 0 or cfg.channels <= 0:
raise ConfigError("[audio].samplerate and channels must be positive")
return cfg