config: typed config loader and config.toml

load/validate config.toml with clear errors; defaults to listen mode and the 'small' whisper model. all tunables (wake phrases, audio thresholds, type_autosend) live here, no hardcoded paths or secrets in code. Signed-off-by: disqualifier <dev@disqualifier.me>
2026-06-25 17:55:08 -04:00 · 2026-06-25 17:55:08 -04:00 · c61eb85748
commit c61eb85748
parent b6d8683e39
2 changed files with 177 additions and 0 deletions
--- a/config.toml
+++ b/config.toml
@ -0,0 +1,53 @@
+# claudedo configuration. everything tunable lives here — no hardcoded paths or
+# secrets in code. loaded and validated by config.py with clear errors.
+
+[wake]
+# wake phrases for listen mode. fuzzy-matched: case/space-insensitive, lenient on
+# the coined word "claudedo" (whisper renders it inconsistently). number words are
+# normalized to digits before command matching.
+phrases = ["claudedo", "hey claude"]
+
+[input]
+# "listen" (default): continuous capture; only acts on utterances that start with a
+#   wake phrase; all other speech is transcribed locally and discarded immediately.
+#   this is the hands-free path — works while another window (a game) is focused,
+#   because the trigger is your voice over the mic bridge, not a Windows keyboard
+#   hook. no system-wide hotkey is installed by design.
+# "ptt": push-to-talk; capture only while ptt_key is held. DESK-ONLY: it captures
+#   only while the daemon's own terminal window is focused (there is deliberately no
+#   global hotkey — a system-wide keyboard hook is the keylogger/cheat silhouette we
+#   refuse to build). use "listen" for hands-free-while-gaming.
+mode = "listen"
+ptt_key = "space"
+
+[stt]
+# faster-whisper model size. "small" is a good accuracy/latency balance for the
+# short command grammar (~sub-second per chunk on a strong cpu). if the coined wake
+# word "claudedo" is recognized poorly, bump to "medium" (slower per chunk).
+model = "small"
+language = "en"
+# mic device: "auto", or a sounddevice device index (integer) / substring of a
+# device name. run `claudedo test-audio` to list devices.
+device = "auto"
+# faster-whisper compute device: "auto" (cpu here), "cpu", or "cuda".
+compute = "auto"
+
+[audio]
+# capture parameters. 16 kHz mono is what whisper expects.
+samplerate = 16000
+channels = 1
+# listen-mode silence segmentation: an utterance ends after this many seconds below
+# the rms threshold. keeps latency low without streaming.
+silence_threshold = 0.012
+silence_duration = 0.8
+# ignore utterances shorter than this (clicks, coughs).
+min_utterance = 0.3
+# hard cap on a single utterance so a stuck stream can't grow unbounded.
+max_utterance = 15.0
+
+[behavior]
+# dictation never auto-submits: "type <phrase>" inserts literal text only; you say
+# "send" separately to submit (read-before-send).
+type_autosend = false
+# fuzzy match ratio (0..1) required to accept a wake phrase / command token.
+match_threshold = 0.8
--- a/src/claudedo/config.py
+++ b/src/claudedo/config.py
@ -0,0 +1,124 @@
+"""load and validate config.toml into a typed Config object with clear errors."""
+
+from __future__ import annotations
+
+import logging
+import os
+from dataclasses import dataclass, field
+from pathlib import Path
+
+try:
+    import tomllib as _toml
+    _TOML_BINARY = True
+except ModuleNotFoundError:  # python < 3.11
+    import tomli as _toml
+    _TOML_BINARY = True
+
+log = logging.getLogger(__name__)
+
+_VALID_MODES = ("listen", "ptt")
+_VALID_MODELS = ("tiny", "base", "small", "medium", "large-v2", "large-v3")
+
+DEFAULT_CONFIG_PATHS = (
+    Path(os.environ.get("CLAUDEDO_CONFIG", "")) if os.environ.get("CLAUDEDO_CONFIG") else None,
+    Path.home() / ".config" / "claudedo" / "config.toml",
+    Path.cwd() / "config.toml",
+)
+
+
+class ConfigError(Exception):
+    """raised on a missing or invalid configuration value."""
+
+
+@dataclass
+class Config:
+    """validated claudedo configuration."""
+
+    wake_phrases: list[str]
+    mode: str
+    ptt_key: str
+    stt_model: str
+    stt_language: str
+    stt_device: str
+    stt_compute: str
+    samplerate: int
+    channels: int
+    silence_threshold: float
+    silence_duration: float
+    min_utterance: float
+    max_utterance: float
+    type_autosend: bool
+    match_threshold: float
+    source_path: Path | None = field(default=None)
+
+
+def find_config_path(explicit: str | os.PathLike | None = None) -> Path:
+    """resolve the config file path, raising ConfigError if none is found."""
+    candidates: list[Path] = []
+    if explicit:
+        candidates.append(Path(explicit))
+    candidates.extend(p for p in DEFAULT_CONFIG_PATHS if p)
+    for path in candidates:
+        if path.is_file():
+            return path
+    searched = ", ".join(str(p) for p in candidates) or "(none)"
+    raise ConfigError(f"no config.toml found (looked in: {searched})")
+
+
+def _require(table: dict, section: str, key: str, types: tuple, default=None):
+    sub = table.get(section, {})
+    if key not in sub:
+        if default is not None:
+            return default
+        raise ConfigError(f"missing [{section}].{key} in config")
+    value = sub[key]
+    if not isinstance(value, types):
+        names = "/".join(t.__name__ for t in types)
+        raise ConfigError(f"[{section}].{key} must be {names}, got {type(value).__name__}")
+    return value
+
+
+def load_config(explicit: str | os.PathLike | None = None) -> Config:
+    """load config.toml from the first existing default path (or an explicit one)."""
+    path = find_config_path(explicit)
+    try:
+        with open(path, "rb") as fh:
+            raw = _toml.load(fh)
+    except _toml.TOMLDecodeError as exc:
+        raise ConfigError(f"could not parse {path}: {exc}") from exc
+
+    phrases = _require(raw, "wake", "phrases", (list,))
+    if not phrases or not all(isinstance(p, str) and p.strip() for p in phrases):
+        raise ConfigError("[wake].phrases must be a non-empty list of non-empty strings")
+
+    mode = _require(raw, "input", "mode", (str,), "listen")
+    if mode not in _VALID_MODES:
+        raise ConfigError(f"[input].mode must be one of {_VALID_MODES}, got {mode!r}")
+
+    model = _require(raw, "stt", "model", (str,), "small")
+    if model not in _VALID_MODELS:
+        log.warning("unknown stt model %r — passing through to faster-whisper", model)
+
+    cfg = Config(
+        wake_phrases=[p.strip() for p in phrases],
+        mode=mode,
+        ptt_key=_require(raw, "input", "ptt_key", (str,), "space"),
+        stt_model=model,
+        stt_language=_require(raw, "stt", "language", (str,), "en"),
+        stt_device=str(_require(raw, "stt", "device", (str, int), "auto")),
+        stt_compute=_require(raw, "stt", "compute", (str,), "auto"),
+        samplerate=int(_require(raw, "audio", "samplerate", (int,), 16000)),
+        channels=int(_require(raw, "audio", "channels", (int,), 1)),
+        silence_threshold=float(_require(raw, "audio", "silence_threshold", (int, float), 0.012)),
+        silence_duration=float(_require(raw, "audio", "silence_duration", (int, float), 0.8)),
+        min_utterance=float(_require(raw, "audio", "min_utterance", (int, float), 0.3)),
+        max_utterance=float(_require(raw, "audio", "max_utterance", (int, float), 15.0)),
+        type_autosend=bool(_require(raw, "behavior", "type_autosend", (bool,), False)),
+        match_threshold=float(_require(raw, "behavior", "match_threshold", (int, float), 0.8)),
+        source_path=path,
+    )
+    if not 0.0 < cfg.match_threshold <= 1.0:
+        raise ConfigError("[behavior].match_threshold must be in (0, 1]")
+    if cfg.samplerate <= 0 or cfg.channels <= 0:
+        raise ConfigError("[audio].samplerate and channels must be positive")
+    return cfg