fix: prime mic to skip RDPSource resume gap

WSLg's RDPSource suspends when idle and emits ~1-2s of silence while it resumes
on the first read, so a short timed capture (test-audio) or the first utterance
after daemon start could be lost. add audio.warm_up() that opens a stream and
reads until a non-silent block arrives (or times out); call it at daemon startup
and before test-audio's capture. test-audio now primes then captures 3s.

Signed-off-by: disqualifier <dev@disqualifier.me>
This commit is contained in:
disqualifier 2026-06-25 19:09:08 -04:00
parent 84c74603e5
commit eb587692e1
3 changed files with 32 additions and 2 deletions

View File

@ -90,10 +90,12 @@ def cmd_test_audio(args: argparse.Namespace) -> int:
if dev.get("max_input_channels", 0) > 0: if dev.get("max_input_channels", 0) > 0:
print(f" [{idx}] {dev['name']} ({dev['max_input_channels']}ch)") print(f" [{idx}] {dev['name']} ({dev['max_input_channels']}ch)")
device = audio_mod.resolve_device(config.stt_device) device = audio_mod.resolve_device(config.stt_device)
print(f"\ncapturing {2}s from device={device if device is not None else 'default'} ...") print("\npriming mic (RDPSource resumes from suspend) ...")
audio_mod.warm_up(config.samplerate, config.channels, device)
print(f"capturing 3s from device={device if device is not None else 'default'} — speak now ...")
chunk = audio_mod.record_while( chunk = audio_mod.record_while(
config.samplerate, config.channels, device, config.samplerate, config.channels, device,
held=_timed_hold(2.0), max_utterance=3.0, min_utterance=0.0, held=_timed_hold(3.0), max_utterance=4.0, min_utterance=0.0,
) )
except Exception as exc: except Exception as exc:
print(f"\naudio capture FAILED: {exc}", file=sys.stderr) print(f"\naudio capture FAILED: {exc}", file=sys.stderr)

View File

@ -58,6 +58,30 @@ def _rms(block: np.ndarray) -> float:
return float(np.sqrt(np.mean(np.square(block, dtype=np.float64)))) return float(np.sqrt(np.mean(np.square(block, dtype=np.float64))))
def warm_up(samplerate: int, channels: int, device: int | None,
timeout: float = 3.0) -> bool:
"""open a short stream and read until the source produces audio.
WSLg's RDPSource suspends when idle and emits ~1-2s of silence while it resumes
on the next read. priming here means the first real capture isn't lost to that
warm-up gap. returns whether any non-silent block arrived before timeout (still
safe to proceed either way a truly silent mic just returns False).
"""
import sounddevice as sd
block_dur = 0.05
blocksize = int(samplerate * block_dur)
deadline = time.monotonic() + timeout
with sd.InputStream(samplerate=samplerate, channels=channels, device=device,
dtype="float32", blocksize=blocksize) as stream:
while time.monotonic() < deadline:
block, _overflowed = stream.read(blocksize)
mono = block.reshape(-1) if channels == 1 else block.mean(axis=1)
if _rms(mono) > 0.0:
return True
return False
def record_until_silence(samplerate: int, channels: int, device: int | None, def record_until_silence(samplerate: int, channels: int, device: int | None,
silence_threshold: float, silence_duration: float, silence_threshold: float, silence_duration: float,
min_utterance: float, max_utterance: float, min_utterance: float, max_utterance: float,

View File

@ -134,6 +134,10 @@ class Daemon:
device=cfg.stt_compute if cfg.stt_compute in ("cpu", "cuda") else "auto", device=cfg.stt_compute if cfg.stt_compute in ("cpu", "cuda") else "auto",
compute_type="auto", compute_type="auto",
) )
if audio.warm_up(cfg.samplerate, cfg.channels, self._device):
log.info("mic warmed up (source live)")
else:
log.warning("mic warm-up saw only silence — check mic permission / RDPSource")
def _capture(self): def _capture(self):
cfg = self.config cfg = self.config