diff --git a/config.toml b/config.toml new file mode 100644 index 0000000..f7350c4 --- /dev/null +++ b/config.toml @@ -0,0 +1,53 @@ +# claudedo configuration. everything tunable lives here — no hardcoded paths or +# secrets in code. loaded and validated by config.py with clear errors. + +[wake] +# wake phrases for listen mode. fuzzy-matched: case/space-insensitive, lenient on +# the coined word "claudedo" (whisper renders it inconsistently). number words are +# normalized to digits before command matching. +phrases = ["claudedo", "hey claude"] + +[input] +# "listen" (default): continuous capture; only acts on utterances that start with a +# wake phrase; all other speech is transcribed locally and discarded immediately. +# this is the hands-free path — works while another window (a game) is focused, +# because the trigger is your voice over the mic bridge, not a Windows keyboard +# hook. no system-wide hotkey is installed by design. +# "ptt": push-to-talk; capture only while ptt_key is held. DESK-ONLY: it captures +# only while the daemon's own terminal window is focused (there is deliberately no +# global hotkey — a system-wide keyboard hook is the keylogger/cheat silhouette we +# refuse to build). use "listen" for hands-free-while-gaming. +mode = "listen" +ptt_key = "space" + +[stt] +# faster-whisper model size. "small" is a good accuracy/latency balance for the +# short command grammar (~sub-second per chunk on a strong cpu). if the coined wake +# word "claudedo" is recognized poorly, bump to "medium" (slower per chunk). +model = "small" +language = "en" +# mic device: "auto", or a sounddevice device index (integer) / substring of a +# device name. run `claudedo test-audio` to list devices. +device = "auto" +# faster-whisper compute device: "auto" (cpu here), "cpu", or "cuda". +compute = "auto" + +[audio] +# capture parameters. 16 kHz mono is what whisper expects. +samplerate = 16000 +channels = 1 +# listen-mode silence segmentation: an utterance ends after this many seconds below +# the rms threshold. keeps latency low without streaming. +silence_threshold = 0.012 +silence_duration = 0.8 +# ignore utterances shorter than this (clicks, coughs). +min_utterance = 0.3 +# hard cap on a single utterance so a stuck stream can't grow unbounded. +max_utterance = 15.0 + +[behavior] +# dictation never auto-submits: "type " inserts literal text only; you say +# "send" separately to submit (read-before-send). +type_autosend = false +# fuzzy match ratio (0..1) required to accept a wake phrase / command token. +match_threshold = 0.8 diff --git a/src/claudedo/config.py b/src/claudedo/config.py new file mode 100644 index 0000000..5baff95 --- /dev/null +++ b/src/claudedo/config.py @@ -0,0 +1,124 @@ +"""load and validate config.toml into a typed Config object with clear errors.""" + +from __future__ import annotations + +import logging +import os +from dataclasses import dataclass, field +from pathlib import Path + +try: + import tomllib as _toml + _TOML_BINARY = True +except ModuleNotFoundError: # python < 3.11 + import tomli as _toml + _TOML_BINARY = True + +log = logging.getLogger(__name__) + +_VALID_MODES = ("listen", "ptt") +_VALID_MODELS = ("tiny", "base", "small", "medium", "large-v2", "large-v3") + +DEFAULT_CONFIG_PATHS = ( + Path(os.environ.get("CLAUDEDO_CONFIG", "")) if os.environ.get("CLAUDEDO_CONFIG") else None, + Path.home() / ".config" / "claudedo" / "config.toml", + Path.cwd() / "config.toml", +) + + +class ConfigError(Exception): + """raised on a missing or invalid configuration value.""" + + +@dataclass +class Config: + """validated claudedo configuration.""" + + wake_phrases: list[str] + mode: str + ptt_key: str + stt_model: str + stt_language: str + stt_device: str + stt_compute: str + samplerate: int + channels: int + silence_threshold: float + silence_duration: float + min_utterance: float + max_utterance: float + type_autosend: bool + match_threshold: float + source_path: Path | None = field(default=None) + + +def find_config_path(explicit: str | os.PathLike | None = None) -> Path: + """resolve the config file path, raising ConfigError if none is found.""" + candidates: list[Path] = [] + if explicit: + candidates.append(Path(explicit)) + candidates.extend(p for p in DEFAULT_CONFIG_PATHS if p) + for path in candidates: + if path.is_file(): + return path + searched = ", ".join(str(p) for p in candidates) or "(none)" + raise ConfigError(f"no config.toml found (looked in: {searched})") + + +def _require(table: dict, section: str, key: str, types: tuple, default=None): + sub = table.get(section, {}) + if key not in sub: + if default is not None: + return default + raise ConfigError(f"missing [{section}].{key} in config") + value = sub[key] + if not isinstance(value, types): + names = "/".join(t.__name__ for t in types) + raise ConfigError(f"[{section}].{key} must be {names}, got {type(value).__name__}") + return value + + +def load_config(explicit: str | os.PathLike | None = None) -> Config: + """load config.toml from the first existing default path (or an explicit one).""" + path = find_config_path(explicit) + try: + with open(path, "rb") as fh: + raw = _toml.load(fh) + except _toml.TOMLDecodeError as exc: + raise ConfigError(f"could not parse {path}: {exc}") from exc + + phrases = _require(raw, "wake", "phrases", (list,)) + if not phrases or not all(isinstance(p, str) and p.strip() for p in phrases): + raise ConfigError("[wake].phrases must be a non-empty list of non-empty strings") + + mode = _require(raw, "input", "mode", (str,), "listen") + if mode not in _VALID_MODES: + raise ConfigError(f"[input].mode must be one of {_VALID_MODES}, got {mode!r}") + + model = _require(raw, "stt", "model", (str,), "small") + if model not in _VALID_MODELS: + log.warning("unknown stt model %r — passing through to faster-whisper", model) + + cfg = Config( + wake_phrases=[p.strip() for p in phrases], + mode=mode, + ptt_key=_require(raw, "input", "ptt_key", (str,), "space"), + stt_model=model, + stt_language=_require(raw, "stt", "language", (str,), "en"), + stt_device=str(_require(raw, "stt", "device", (str, int), "auto")), + stt_compute=_require(raw, "stt", "compute", (str,), "auto"), + samplerate=int(_require(raw, "audio", "samplerate", (int,), 16000)), + channels=int(_require(raw, "audio", "channels", (int,), 1)), + silence_threshold=float(_require(raw, "audio", "silence_threshold", (int, float), 0.012)), + silence_duration=float(_require(raw, "audio", "silence_duration", (int, float), 0.8)), + min_utterance=float(_require(raw, "audio", "min_utterance", (int, float), 0.3)), + max_utterance=float(_require(raw, "audio", "max_utterance", (int, float), 15.0)), + type_autosend=bool(_require(raw, "behavior", "type_autosend", (bool,), False)), + match_threshold=float(_require(raw, "behavior", "match_threshold", (int, float), 0.8)), + source_path=path, + ) + if not 0.0 < cfg.match_threshold <= 1.0: + raise ConfigError("[behavior].match_threshold must be in (0, 1]") + if cfg.samplerate <= 0 or cfg.channels <= 0: + raise ConfigError("[audio].samplerate and channels must be positive") + return cfg