diff --git a/src/claudedo/stt.py b/src/claudedo/stt.py index 35e94e8..c85530e 100644 --- a/src/claudedo/stt.py +++ b/src/claudedo/stt.py @@ -6,12 +6,77 @@ short in-memory chunk; nothing is written to disk or sent anywhere. from __future__ import annotations +import contextlib import logging +import os +import re +import sys import numpy as np log = logging.getLogger(__name__) +_NOISE = re.compile(r"GPU device discovery failed|device_discovery\.cc|DiscoverDevicesForPlatform") + + +def _quiet_backends() -> None: + """quiet onnxruntime/ctranslate2 chatter and the faster_whisper INFO log. + + faster-whisper's VAD loads an onnx model whose device discovery prints a noisy + 'GPU device discovery failed' warning on headless/WSL hosts with no GPU sysfs. + the env var + logger severity stop most onnx logging; the warning itself is + emitted at C++ init and is filtered out of stderr by _filter_stderr(). + """ + os.environ.setdefault("ORT_LOGGING_LEVEL", "3") + os.environ.setdefault("OMP_NUM_THREADS", os.environ.get("OMP_NUM_THREADS", "4")) + logging.getLogger("faster_whisper").setLevel(logging.WARNING) + try: + import onnxruntime + onnxruntime.set_default_logger_severity(3) + except Exception: + pass + + +@contextlib.contextmanager +def _filter_stderr(): + """drop onnxruntime's GPU-discovery warning lines from stderr for this block. + + a pipe temporarily replaces fd 2; a pump thread forwards every line to the real + stderr EXCEPT the known GPU-discovery noise, so real errors still surface. the + original fd is always restored on exit. + """ + import threading + + try: + stderr_fd = sys.stderr.fileno() + except (AttributeError, OSError): + yield + return + + saved_fd = os.dup(stderr_fd) + read_fd, write_fd = os.pipe() + os.dup2(write_fd, stderr_fd) + os.close(write_fd) + + def pump(): + with os.fdopen(read_fd, "rb") as reader, os.fdopen(saved_fd, "wb", closefd=False) as out: + for line in reader: + if not _NOISE.search(line.decode("utf-8", "replace")): + out.write(line) + out.flush() + + thread = threading.Thread(target=pump, daemon=True) + thread.start() + try: + yield + finally: + import time + + time.sleep(0.05) + os.dup2(saved_fd, stderr_fd) + os.close(saved_fd) + thread.join(timeout=1.0) + class Transcriber: """a loaded faster-whisper model that transcribes float32 mono audio chunks""" @@ -20,17 +85,25 @@ class Transcriber: compute_type: str = "auto") -> None: self.language = language self._model = self._load(model, device, compute_type) + self._warm() @staticmethod def _load(model: str, device: str, compute_type: str): - from faster_whisper import WhisperModel - if device == "auto": device = "cpu" if compute_type == "auto": compute_type = "int8" if device == "cpu" else "float16" log.info("loading faster-whisper model=%s device=%s compute=%s", model, device, compute_type) - return WhisperModel(model, device=device, compute_type=compute_type) + with _filter_stderr(): + _quiet_backends() + from faster_whisper import WhisperModel + return WhisperModel(model, device=device, compute_type=compute_type) + + def _warm(self) -> None: + """run one throwaway transcribe so the VAD onnx session inits now, under the + stderr filter — the GPU-discovery warning fires here once, not in the loop""" + with _filter_stderr(): + list(self._model.transcribe(np.zeros(1600, dtype=np.float32), vad_filter=True)[0]) def transcribe(self, audio: np.ndarray, samplerate: int = 16000) -> str: """transcribe a mono float32 numpy array to a stripped text string.