diff --git a/src/claudedo/__main__.py b/src/claudedo/__main__.py index 675f3ea..5603304 100644 --- a/src/claudedo/__main__.py +++ b/src/claudedo/__main__.py @@ -90,10 +90,12 @@ def cmd_test_audio(args: argparse.Namespace) -> int: if dev.get("max_input_channels", 0) > 0: print(f" [{idx}] {dev['name']} ({dev['max_input_channels']}ch)") device = audio_mod.resolve_device(config.stt_device) - print(f"\ncapturing {2}s from device={device if device is not None else 'default'} ...") + print("\npriming mic (RDPSource resumes from suspend) ...") + audio_mod.warm_up(config.samplerate, config.channels, device) + print(f"capturing 3s from device={device if device is not None else 'default'} — speak now ...") chunk = audio_mod.record_while( config.samplerate, config.channels, device, - held=_timed_hold(2.0), max_utterance=3.0, min_utterance=0.0, + held=_timed_hold(3.0), max_utterance=4.0, min_utterance=0.0, ) except Exception as exc: print(f"\naudio capture FAILED: {exc}", file=sys.stderr) diff --git a/src/claudedo/audio.py b/src/claudedo/audio.py index 291ab26..7775575 100644 --- a/src/claudedo/audio.py +++ b/src/claudedo/audio.py @@ -58,6 +58,30 @@ def _rms(block: np.ndarray) -> float: return float(np.sqrt(np.mean(np.square(block, dtype=np.float64)))) +def warm_up(samplerate: int, channels: int, device: int | None, + timeout: float = 3.0) -> bool: + """open a short stream and read until the source produces audio. + + WSLg's RDPSource suspends when idle and emits ~1-2s of silence while it resumes + on the next read. priming here means the first real capture isn't lost to that + warm-up gap. returns whether any non-silent block arrived before timeout (still + safe to proceed either way — a truly silent mic just returns False). + """ + import sounddevice as sd + + block_dur = 0.05 + blocksize = int(samplerate * block_dur) + deadline = time.monotonic() + timeout + with sd.InputStream(samplerate=samplerate, channels=channels, device=device, + dtype="float32", blocksize=blocksize) as stream: + while time.monotonic() < deadline: + block, _overflowed = stream.read(blocksize) + mono = block.reshape(-1) if channels == 1 else block.mean(axis=1) + if _rms(mono) > 0.0: + return True + return False + + def record_until_silence(samplerate: int, channels: int, device: int | None, silence_threshold: float, silence_duration: float, min_utterance: float, max_utterance: float, diff --git a/src/claudedo/daemon.py b/src/claudedo/daemon.py index eecccac..5b80b63 100644 --- a/src/claudedo/daemon.py +++ b/src/claudedo/daemon.py @@ -134,6 +134,10 @@ class Daemon: device=cfg.stt_compute if cfg.stt_compute in ("cpu", "cuda") else "auto", compute_type="auto", ) + if audio.warm_up(cfg.samplerate, cfg.channels, self._device): + log.info("mic warmed up (source live)") + else: + log.warning("mic warm-up saw only silence — check mic permission / RDPSource") def _capture(self): cfg = self.config