fix: quiet onnxruntime GPU-discovery warning and faster_whisper INFO
faster-whisper's VAD loads an onnx model that prints a 'GPU device discovery failed' warning on headless/WSL hosts and chatty INFO per transcribe. raise onnx log severity, drop the faster_whisper logger to WARNING, and filter the C++-level discovery line out of stderr during model load + a one-shot warm transcribe (so it fires once at startup, not in the hot loop). real errors still pass through. Signed-off-by: disqualifier <dev@disqualifier.me>
This commit is contained in:
parent
43b36d2a0b
commit
b05f6256c1
@ -6,12 +6,77 @@ short in-memory chunk; nothing is written to disk or sent anywhere.
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import contextlib
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_NOISE = re.compile(r"GPU device discovery failed|device_discovery\.cc|DiscoverDevicesForPlatform")
|
||||||
|
|
||||||
|
|
||||||
|
def _quiet_backends() -> None:
|
||||||
|
"""quiet onnxruntime/ctranslate2 chatter and the faster_whisper INFO log.
|
||||||
|
|
||||||
|
faster-whisper's VAD loads an onnx model whose device discovery prints a noisy
|
||||||
|
'GPU device discovery failed' warning on headless/WSL hosts with no GPU sysfs.
|
||||||
|
the env var + logger severity stop most onnx logging; the warning itself is
|
||||||
|
emitted at C++ init and is filtered out of stderr by _filter_stderr().
|
||||||
|
"""
|
||||||
|
os.environ.setdefault("ORT_LOGGING_LEVEL", "3")
|
||||||
|
os.environ.setdefault("OMP_NUM_THREADS", os.environ.get("OMP_NUM_THREADS", "4"))
|
||||||
|
logging.getLogger("faster_whisper").setLevel(logging.WARNING)
|
||||||
|
try:
|
||||||
|
import onnxruntime
|
||||||
|
onnxruntime.set_default_logger_severity(3)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@contextlib.contextmanager
|
||||||
|
def _filter_stderr():
|
||||||
|
"""drop onnxruntime's GPU-discovery warning lines from stderr for this block.
|
||||||
|
|
||||||
|
a pipe temporarily replaces fd 2; a pump thread forwards every line to the real
|
||||||
|
stderr EXCEPT the known GPU-discovery noise, so real errors still surface. the
|
||||||
|
original fd is always restored on exit.
|
||||||
|
"""
|
||||||
|
import threading
|
||||||
|
|
||||||
|
try:
|
||||||
|
stderr_fd = sys.stderr.fileno()
|
||||||
|
except (AttributeError, OSError):
|
||||||
|
yield
|
||||||
|
return
|
||||||
|
|
||||||
|
saved_fd = os.dup(stderr_fd)
|
||||||
|
read_fd, write_fd = os.pipe()
|
||||||
|
os.dup2(write_fd, stderr_fd)
|
||||||
|
os.close(write_fd)
|
||||||
|
|
||||||
|
def pump():
|
||||||
|
with os.fdopen(read_fd, "rb") as reader, os.fdopen(saved_fd, "wb", closefd=False) as out:
|
||||||
|
for line in reader:
|
||||||
|
if not _NOISE.search(line.decode("utf-8", "replace")):
|
||||||
|
out.write(line)
|
||||||
|
out.flush()
|
||||||
|
|
||||||
|
thread = threading.Thread(target=pump, daemon=True)
|
||||||
|
thread.start()
|
||||||
|
try:
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
import time
|
||||||
|
|
||||||
|
time.sleep(0.05)
|
||||||
|
os.dup2(saved_fd, stderr_fd)
|
||||||
|
os.close(saved_fd)
|
||||||
|
thread.join(timeout=1.0)
|
||||||
|
|
||||||
|
|
||||||
class Transcriber:
|
class Transcriber:
|
||||||
"""a loaded faster-whisper model that transcribes float32 mono audio chunks"""
|
"""a loaded faster-whisper model that transcribes float32 mono audio chunks"""
|
||||||
@ -20,17 +85,25 @@ class Transcriber:
|
|||||||
compute_type: str = "auto") -> None:
|
compute_type: str = "auto") -> None:
|
||||||
self.language = language
|
self.language = language
|
||||||
self._model = self._load(model, device, compute_type)
|
self._model = self._load(model, device, compute_type)
|
||||||
|
self._warm()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _load(model: str, device: str, compute_type: str):
|
def _load(model: str, device: str, compute_type: str):
|
||||||
from faster_whisper import WhisperModel
|
|
||||||
|
|
||||||
if device == "auto":
|
if device == "auto":
|
||||||
device = "cpu"
|
device = "cpu"
|
||||||
if compute_type == "auto":
|
if compute_type == "auto":
|
||||||
compute_type = "int8" if device == "cpu" else "float16"
|
compute_type = "int8" if device == "cpu" else "float16"
|
||||||
log.info("loading faster-whisper model=%s device=%s compute=%s", model, device, compute_type)
|
log.info("loading faster-whisper model=%s device=%s compute=%s", model, device, compute_type)
|
||||||
return WhisperModel(model, device=device, compute_type=compute_type)
|
with _filter_stderr():
|
||||||
|
_quiet_backends()
|
||||||
|
from faster_whisper import WhisperModel
|
||||||
|
return WhisperModel(model, device=device, compute_type=compute_type)
|
||||||
|
|
||||||
|
def _warm(self) -> None:
|
||||||
|
"""run one throwaway transcribe so the VAD onnx session inits now, under the
|
||||||
|
stderr filter — the GPU-discovery warning fires here once, not in the loop"""
|
||||||
|
with _filter_stderr():
|
||||||
|
list(self._model.transcribe(np.zeros(1600, dtype=np.float32), vad_filter=True)[0])
|
||||||
|
|
||||||
def transcribe(self, audio: np.ndarray, samplerate: int = 16000) -> str:
|
def transcribe(self, audio: np.ndarray, samplerate: int = 16000) -> str:
|
||||||
"""transcribe a mono float32 numpy array to a stripped text string.
|
"""transcribe a mono float32 numpy array to a stripped text string.
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user