Compare commits
No commits in common. "d96dc3898f8c465b993909d367b48787576fe9c4" and "43b36d2a0b32b2a0fddbe13175514d6ab5cf676e" have entirely different histories.
d96dc3898f
...
43b36d2a0b
21
README.md
21
README.md
@ -108,9 +108,6 @@ Wake phrases (listen mode), fuzzy-matched: **"claudedo"**, **"hey claude"**.
|
|||||||
| `approve` / `deny` | allow / deny a permission prompt |
|
| `approve` / `deny` | allow / deny a permission prompt |
|
||||||
| `send` / `enter` | submit (Enter) |
|
| `send` / `enter` | submit (Enter) |
|
||||||
| `type <phrase>` | insert literal text, **no** submit (read-before-send; say "send") |
|
| `type <phrase>` | insert literal text, **no** submit (read-before-send; say "send") |
|
||||||
| `space [<n>]` | insert n spaces (default 1) |
|
|
||||||
| `backspace [<n>]` (alias `delete`) | delete n chars (default 1), capped at the last submit boundary |
|
|
||||||
| `erase` (alias `clear`/`wipe`) | delete everything typed since the last submit/boundary |
|
|
||||||
| `mode ptt` / `mode listen` | switch input mode |
|
| `mode ptt` / `mode listen` | switch input mode |
|
||||||
| `set <name>` (alias `sticky`/`switch`) | set the **sticky** target → `claude-<name>` (persists) |
|
| `set <name>` (alias `sticky`/`switch`) | set the **sticky** target → `claude-<name>` (persists) |
|
||||||
| `target <name> <command>` | **one-shot** override: run that command on `claude-<name>` for this utterance only; sticky default unchanged |
|
| `target <name> <command>` | **one-shot** override: run that command on `claude-<name>` for this utterance only; sticky default unchanged |
|
||||||
@ -174,19 +171,11 @@ If Claude Code changes its prompt UI, re-confirm against a live session and upda
|
|||||||
## Config
|
## Config
|
||||||
|
|
||||||
Everything tunable lives in [`config.toml`](config.toml): wake phrases, mode + PTT
|
Everything tunable lives in [`config.toml`](config.toml): wake phrases, mode + PTT
|
||||||
key, Whisper model/language/device, audio segmentation thresholds, and `[behavior]`
|
key, Whisper model/language/device, audio segmentation thresholds, and
|
||||||
(`type_autosend`, `filler_words`, `auto_target`, `print_heard`). The default model is
|
`type_autosend = false`. The default model is `small`; bump to `medium` if the coined
|
||||||
`small`; bump to `medium` if the coined wake word is recognized poorly. `claudedo -c
|
wake word is recognized poorly. `claudedo -c <path> ...` points at a specific config;
|
||||||
<path> ...` points at a specific config; otherwise it searches `$CLAUDEDO_CONFIG`,
|
otherwise it searches `$CLAUDEDO_CONFIG`, `~/.config/claudedo/config.toml`, then
|
||||||
`~/.config/claudedo/config.toml`, then `./config.toml`.
|
`./config.toml`.
|
||||||
|
|
||||||
- **`auto_target`** (default `false`): with no sticky target set and exactly one
|
|
||||||
`claude-*` session running, `false` makes a bare command do nothing and ask you to
|
|
||||||
`set` one; `true` auto-targets that single session.
|
|
||||||
- **`print_heard`** (default `false`, debug): prints non-wake transcripts to the
|
|
||||||
console so you can see how Whisper renders your wake word. Turn it on to debug
|
|
||||||
detection, then off. Whisper has no token for "claudedo" — it commonly emits
|
|
||||||
"claude do" or "claude due", both of which are in the default wake list.
|
|
||||||
|
|
||||||
## Requirements
|
## Requirements
|
||||||
|
|
||||||
|
|||||||
13
config.toml
13
config.toml
@ -5,7 +5,7 @@
|
|||||||
# wake phrases for listen mode. fuzzy-matched: case/space-insensitive, lenient on
|
# wake phrases for listen mode. fuzzy-matched: case/space-insensitive, lenient on
|
||||||
# the coined word "claudedo" (whisper renders it inconsistently). number words are
|
# the coined word "claudedo" (whisper renders it inconsistently). number words are
|
||||||
# normalized to digits before command matching.
|
# normalized to digits before command matching.
|
||||||
phrases = ["claudedo", "claude do", "claude due", "hey claude", "ok claude", "okay claude"]
|
phrases = ["claudedo", "hey claude"]
|
||||||
|
|
||||||
[input]
|
[input]
|
||||||
# "listen" (default): continuous capture; only acts on utterances that start with a
|
# "listen" (default): continuous capture; only acts on utterances that start with a
|
||||||
@ -55,14 +55,3 @@ match_threshold = 0.8
|
|||||||
# "select yes" / "use yes" behave like "yes". (a filler word followed by a digit is
|
# "select yes" / "use yes" behave like "yes". (a filler word followed by a digit is
|
||||||
# the select command, e.g. "select 1", and is not dropped.)
|
# the select command, e.g. "select 1", and is not dropped.)
|
||||||
filler_words = ["select", "use", "choose"]
|
filler_words = ["select", "use", "choose"]
|
||||||
# when no sticky target is set and exactly ONE claude-* session is running:
|
|
||||||
# false (default) -> require an explicit `set <name>` or one-shot `target <name>`;
|
|
||||||
# a bare command does nothing and tells you to set one.
|
|
||||||
# true -> auto-target that single session (convenience).
|
|
||||||
auto_target = false
|
|
||||||
# DEBUG ONLY — relaxes the privacy invariant. when true, the daemon console prints
|
|
||||||
# the raw transcript of EVERY utterance, including non-wake speech it would otherwise
|
|
||||||
# drop silently (shown as `heard (dropped): "<transcript>"`). use it to see exactly
|
|
||||||
# how Whisper renders your wake word, then turn it OFF. default false: non-wake speech
|
|
||||||
# is discarded without ever printing the transcript.
|
|
||||||
print_heard = false
|
|
||||||
|
|||||||
@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "claudedo"
|
name = "claudedo"
|
||||||
version = "0.1.2"
|
version = "0.1.1"
|
||||||
description = "voice-control daemon for claude code (local STT -> tmux send-keys)"
|
description = "voice-control daemon for claude code (local STT -> tmux send-keys)"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.10"
|
requires-python = ">=3.10"
|
||||||
|
|||||||
@ -1,3 +1,3 @@
|
|||||||
"""claudedo — voice-control daemon for claude code (local STT -> tmux send-keys)"""
|
"""claudedo — voice-control daemon for claude code (local STT -> tmux send-keys)"""
|
||||||
|
|
||||||
__version__ = "0.1.2"
|
__version__ = "0.1.1"
|
||||||
|
|||||||
@ -50,8 +50,6 @@ class Config:
|
|||||||
type_autosend: bool
|
type_autosend: bool
|
||||||
match_threshold: float
|
match_threshold: float
|
||||||
filler_words: tuple[str, ...]
|
filler_words: tuple[str, ...]
|
||||||
auto_target: bool
|
|
||||||
print_heard: bool
|
|
||||||
source_path: Path | None = field(default=None)
|
source_path: Path | None = field(default=None)
|
||||||
|
|
||||||
|
|
||||||
@ -120,8 +118,6 @@ def load_config(explicit: str | os.PathLike | None = None) -> Config:
|
|||||||
match_threshold=float(_require(raw, "behavior", "match_threshold", (int, float), 0.8)),
|
match_threshold=float(_require(raw, "behavior", "match_threshold", (int, float), 0.8)),
|
||||||
filler_words=tuple(_require(raw, "behavior", "filler_words", (list,),
|
filler_words=tuple(_require(raw, "behavior", "filler_words", (list,),
|
||||||
["select", "use", "choose"])),
|
["select", "use", "choose"])),
|
||||||
auto_target=bool(_require(raw, "behavior", "auto_target", (bool,), False)),
|
|
||||||
print_heard=bool(_require(raw, "behavior", "print_heard", (bool,), False)),
|
|
||||||
source_path=path,
|
source_path=path,
|
||||||
)
|
)
|
||||||
if not 0.0 < cfg.match_threshold <= 1.0:
|
if not 0.0 < cfg.match_threshold <= 1.0:
|
||||||
|
|||||||
@ -1,51 +0,0 @@
|
|||||||
"""colored, prefixed console output for the daemon's recognition/action feed.
|
|
||||||
|
|
||||||
every line is ``HH:MM:SS [PREFIX] message``. prefixes group the source: a session
|
|
||||||
name (e.g. ``[claude-libs]``) for anything injected into a tmux session, ``[SYSTEM]``
|
|
||||||
for daemon-control/state lines, and ``[VOICE]`` for STT/recognition lines. color is
|
|
||||||
opt-in via tty detection (or forced): green for successful injections, red for
|
|
||||||
drops/errors, dim for routine. falls back to plain text when stdout is not a tty.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
|
|
||||||
RESET = "\033[0m"
|
|
||||||
_COLORS = {
|
|
||||||
"green": "\033[32m",
|
|
||||||
"red": "\033[31m",
|
|
||||||
"yellow": "\033[33m",
|
|
||||||
"cyan": "\033[36m",
|
|
||||||
"dim": "\033[2m",
|
|
||||||
"bold": "\033[1m",
|
|
||||||
}
|
|
||||||
|
|
||||||
SYSTEM = "SYSTEM"
|
|
||||||
VOICE = "VOICE"
|
|
||||||
|
|
||||||
|
|
||||||
class Console:
|
|
||||||
"""formats and prints daemon log lines with timestamp, prefix, and color"""
|
|
||||||
|
|
||||||
def __init__(self, color: bool | None = None, stream=None, clock=None) -> None:
|
|
||||||
self.stream = stream if stream is not None else sys.stdout
|
|
||||||
self._clock = clock or time.localtime
|
|
||||||
if color is None:
|
|
||||||
color = hasattr(self.stream, "isatty") and self.stream.isatty()
|
|
||||||
self.color = bool(color)
|
|
||||||
|
|
||||||
def _stamp(self) -> str:
|
|
||||||
t = self._clock()
|
|
||||||
return f"{t.tm_hour:02d}:{t.tm_min:02d}:{t.tm_sec:02d}"
|
|
||||||
|
|
||||||
def _paint(self, text: str, color: str | None) -> str:
|
|
||||||
if not self.color or not color or color not in _COLORS:
|
|
||||||
return text
|
|
||||||
return f"{_COLORS[color]}{text}{RESET}"
|
|
||||||
|
|
||||||
def emit(self, prefix: str, message: str, color: str | None = None) -> None:
|
|
||||||
"""print one line: ``HH:MM:SS [prefix] message`` (message optionally colored)"""
|
|
||||||
line = f"{self._stamp()} {self._paint(f'[{prefix}]', 'dim')} {self._paint(message, color)}"
|
|
||||||
print(line, file=self.stream, flush=True)
|
|
||||||
@ -18,7 +18,6 @@ from pathlib import Path
|
|||||||
|
|
||||||
from . import audio, grammar, inject, target
|
from . import audio, grammar, inject, target
|
||||||
from .config import Config
|
from .config import Config
|
||||||
from .console import SYSTEM, VOICE, Console
|
|
||||||
from .stt import Transcriber
|
from .stt import Transcriber
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
@ -115,8 +114,6 @@ class Daemon:
|
|||||||
self._transcriber: Transcriber | None = None
|
self._transcriber: Transcriber | None = None
|
||||||
self._device: int | None = None
|
self._device: int | None = None
|
||||||
self._ptt = _PTTKey()
|
self._ptt = _PTTKey()
|
||||||
self._pending: dict[str, int] = {}
|
|
||||||
self._console = Console()
|
|
||||||
|
|
||||||
def _install_signals(self) -> None:
|
def _install_signals(self) -> None:
|
||||||
signal.signal(signal.SIGTERM, self._on_signal)
|
signal.signal(signal.SIGTERM, self._on_signal)
|
||||||
@ -166,7 +163,7 @@ class Daemon:
|
|||||||
parsed = grammar.parse(transcript, cfg.wake_phrases, cfg.match_threshold, require_wake,
|
parsed = grammar.parse(transcript, cfg.wake_phrases, cfg.match_threshold, require_wake,
|
||||||
filler=cfg.filler_words)
|
filler=cfg.filler_words)
|
||||||
if parsed is None or parsed.action is None:
|
if parsed is None or parsed.action is None:
|
||||||
self._console.emit(VOICE, f'heard "{transcript}" -> no command matched', "yellow")
|
self._emit(f'heard: "{transcript}" -> no command matched')
|
||||||
return
|
return
|
||||||
action = parsed.action
|
action = parsed.action
|
||||||
|
|
||||||
@ -174,75 +171,36 @@ class Daemon:
|
|||||||
new_mode = str(action.arg)
|
new_mode = str(action.arg)
|
||||||
if new_mode != self.mode:
|
if new_mode != self.mode:
|
||||||
self.mode = new_mode
|
self.mode = new_mode
|
||||||
self._console.emit(SYSTEM, f"mode -> {new_mode}", "cyan")
|
self._emit(f"mode -> {new_mode}")
|
||||||
self._refresh_state()
|
self._refresh_state()
|
||||||
return
|
return
|
||||||
if action.name == "set":
|
if action.name == "set":
|
||||||
session = target.set_target(str(action.arg))
|
session = target.set_target(str(action.arg))
|
||||||
self._pending.pop(session, None)
|
self._emit(f"set sticky -> {session}")
|
||||||
self._console.emit(SYSTEM, f"set sticky -> {session}", "cyan")
|
|
||||||
self._refresh_state()
|
self._refresh_state()
|
||||||
return
|
return
|
||||||
if action.name == "unset":
|
if action.name == "unset":
|
||||||
target.unset_target()
|
target.unset_target()
|
||||||
self._console.emit(SYSTEM, "unset (cleared)", "cyan")
|
self._emit("unset (cleared)")
|
||||||
self._refresh_state()
|
self._refresh_state()
|
||||||
return
|
return
|
||||||
if action.name == "list":
|
if action.name == "list":
|
||||||
sessions = target.list_sessions()
|
sessions = target.list_sessions()
|
||||||
self._console.emit(SYSTEM, "list -> " + (", ".join(sessions) if sessions else "(none running)"))
|
self._emit("list -> " + (", ".join(sessions) if sessions else "(none running)"))
|
||||||
return
|
return
|
||||||
|
|
||||||
session, reason = target.resolve(parsed.one_shot, auto_target=cfg.auto_target)
|
session, reason = target.resolve(parsed.one_shot)
|
||||||
if session is None:
|
if session is None:
|
||||||
self._console.emit(VOICE, f'heard "{transcript}" -> {reason} -> '
|
self._emit(f'heard: "{transcript}" -> {reason} -> matched {self._describe(action)} '
|
||||||
f'{self._describe(action)} did nothing', "red")
|
f'-> did nothing')
|
||||||
return
|
return
|
||||||
self._inject(session, transcript, reason, action)
|
prefix = f'heard: "{transcript}" -> {reason} -> matched {self._describe(action)}'
|
||||||
|
if action.name == "type" and not cfg.type_autosend:
|
||||||
def _inject(self, session: str, transcript: str, reason: str, action) -> None:
|
inject.send_literal(session, str(action.arg))
|
||||||
"""run a resolved command against `session`, tracking the uncommitted-input
|
self._emit(f"{prefix} -> injected literal {str(action.arg)!r} -> {session}")
|
||||||
buffer so backspace/erase delete only back to the last submit boundary"""
|
|
||||||
heard = f'heard "{transcript}" ({reason})'
|
|
||||||
name = action.name
|
|
||||||
|
|
||||||
if name == "type":
|
|
||||||
text = str(action.arg)
|
|
||||||
inject.send_literal(session, text)
|
|
||||||
self._pending[session] = self._pending.get(session, 0) + len(text)
|
|
||||||
if self.config.type_autosend:
|
|
||||||
inject.send_named(session, inject.keys.SUBMIT)
|
|
||||||
self._pending[session] = 0
|
|
||||||
self._console.emit(session, f"{heard} -> typed {text!r}"
|
|
||||||
+ (" + send" if self.config.type_autosend else ""), "green")
|
|
||||||
return
|
return
|
||||||
if name == "space":
|
|
||||||
n = int(action.arg)
|
|
||||||
inject.perform(session, action)
|
inject.perform(session, action)
|
||||||
self._pending[session] = self._pending.get(session, 0) + n
|
self._emit(f"{prefix} -> injected {self._describe(action)} -> {session}")
|
||||||
self._console.emit(session, f"{heard} -> space x{n}", "green")
|
|
||||||
return
|
|
||||||
if name == "backspace":
|
|
||||||
have = self._pending.get(session, 0)
|
|
||||||
n = min(int(action.arg), have)
|
|
||||||
if n:
|
|
||||||
inject.perform(session, grammar.Action("backspace", n))
|
|
||||||
self._pending[session] = have - n
|
|
||||||
self._console.emit(session, f"{heard} -> backspace x{n}"
|
|
||||||
+ ("" if n == int(action.arg) else " (capped at boundary)"), "green")
|
|
||||||
return
|
|
||||||
if name == "erase":
|
|
||||||
n = self._pending.get(session, 0)
|
|
||||||
if n:
|
|
||||||
inject.perform(session, grammar.Action("erase", n))
|
|
||||||
self._pending[session] = 0
|
|
||||||
self._console.emit(session, f"{heard} -> erase x{n} (to last boundary)", "green")
|
|
||||||
return
|
|
||||||
|
|
||||||
inject.perform(session, action)
|
|
||||||
if name == "submit":
|
|
||||||
self._pending[session] = 0
|
|
||||||
self._console.emit(session, f"{heard} -> {self._describe(action)}", "green")
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _describe(action) -> str:
|
def _describe(action) -> str:
|
||||||
@ -250,6 +208,11 @@ class Daemon:
|
|||||||
return action.name.upper()
|
return action.name.upper()
|
||||||
return f"{action.name.upper()}({action.arg})"
|
return f"{action.name.upper()}({action.arg})"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _emit(line: str) -> None:
|
||||||
|
"""print a recognition/action line to the watched terminal"""
|
||||||
|
print(line, flush=True)
|
||||||
|
|
||||||
def _has_wake(self, transcript: str) -> bool:
|
def _has_wake(self, transcript: str) -> bool:
|
||||||
"""true if the utterance starts with a wake phrase (listen-mode gate).
|
"""true if the utterance starts with a wake phrase (listen-mode gate).
|
||||||
|
|
||||||
@ -262,11 +225,15 @@ class Daemon:
|
|||||||
def _print_startup(self) -> None:
|
def _print_startup(self) -> None:
|
||||||
cfg = self.config
|
cfg = self.config
|
||||||
dev = cfg.stt_device if cfg.stt_device != "auto" else "default"
|
dev = cfg.stt_device if cfg.stt_device != "auto" else "default"
|
||||||
target_now = target.read_active() or "(none — run cc / set <name>)"
|
target_now = target.read_active() or "(none — run cc to attach)"
|
||||||
self._console.emit(SYSTEM, f"claudedo {self.mode} mode — Ctrl-C to stop", "bold")
|
self._emit("── claudedo ─────────────────────────────────")
|
||||||
self._console.emit(SYSTEM, f"model {cfg.stt_model} ({cfg.stt_language}) · mic {dev} · "
|
self._emit(f" model: {cfg.stt_model} ({cfg.stt_language})")
|
||||||
f"target {target_now}")
|
self._emit(f" mic: {dev}")
|
||||||
self._console.emit(SYSTEM, "wake: " + ", ".join(cfg.wake_phrases))
|
self._emit(f" mode: {self.mode}")
|
||||||
|
self._emit(f" target: {target_now}")
|
||||||
|
self._emit(f" wake: {', '.join(cfg.wake_phrases)}")
|
||||||
|
self._emit(" Ctrl-C to stop")
|
||||||
|
self._emit("─────────────────────────────────────────────")
|
||||||
|
|
||||||
def _refresh_state(self) -> None:
|
def _refresh_state(self) -> None:
|
||||||
write_state(os.getpid(), self.mode, target.read_active())
|
write_state(os.getpid(), self.mode, target.read_active())
|
||||||
@ -290,10 +257,7 @@ class Daemon:
|
|||||||
if not transcript:
|
if not transcript:
|
||||||
continue
|
continue
|
||||||
if self.mode == "listen" and not self._has_wake(transcript):
|
if self.mode == "listen" and not self._has_wake(transcript):
|
||||||
if self.config.print_heard:
|
self._emit("dropped: non-wake speech (not recorded)")
|
||||||
self._console.emit(VOICE, f'heard (dropped) "{transcript}"', "red")
|
|
||||||
else:
|
|
||||||
self._console.emit(VOICE, "dropped: non-wake speech (not recorded)", "dim")
|
|
||||||
continue
|
continue
|
||||||
self._handle(transcript)
|
self._handle(transcript)
|
||||||
finally:
|
finally:
|
||||||
|
|||||||
@ -27,12 +27,6 @@ _NUMBER_WORDS = {
|
|||||||
|
|
||||||
_INDEX_WORDS = {"1": 1, "2": 2, "3": 3, "4": 4}
|
_INDEX_WORDS = {"1": 1, "2": 2, "3": 3, "4": 4}
|
||||||
|
|
||||||
_COUNT_WORDS = {
|
|
||||||
"five": 5, "six": 6, "seven": 7, "eight": 8, "nine": 9, "ten": 10,
|
|
||||||
"eleven": 11, "twelve": 12, "thirteen": 13, "fourteen": 14, "fifteen": 15,
|
|
||||||
"sixteen": 16, "seventeen": 17, "eighteen": 18, "nineteen": 19, "twenty": 20,
|
|
||||||
}
|
|
||||||
|
|
||||||
_STICKY_VERBS = ("set", "sticky", "switch")
|
_STICKY_VERBS = ("set", "sticky", "switch")
|
||||||
_ONESHOT_VERBS = ("target",)
|
_ONESHOT_VERBS = ("target",)
|
||||||
_UNSET_VERBS = ("unset", "unsticky")
|
_UNSET_VERBS = ("unset", "unsticky")
|
||||||
@ -45,10 +39,9 @@ DEFAULT_FILLER = ("select", "use", "choose")
|
|||||||
class Action:
|
class Action:
|
||||||
"""a matched command: a name plus an optional argument.
|
"""a matched command: a name plus an optional argument.
|
||||||
|
|
||||||
names: yes, no, select, approve, deny, submit, type, space, backspace, erase,
|
names: yes, no, select, approve, deny, submit, type, cancel, mode, set, unset,
|
||||||
cancel, mode, set, unset, list. arg carries the select index (int), the literal
|
list. arg carries the select index (int), the literal text for ``type``, the mode
|
||||||
text for ``type``, the count for ``space``/``backspace`` (int), the mode for
|
for ``mode``, or the session short-name for ``set``.
|
||||||
``mode``, or the session short-name for ``set``.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
name: str
|
name: str
|
||||||
@ -130,22 +123,6 @@ def _fuzzy_in(token: str, options: tuple[str, ...], threshold: float) -> bool:
|
|||||||
return any(_ratio(token, opt) >= threshold for opt in options)
|
return any(_ratio(token, opt) >= threshold for opt in options)
|
||||||
|
|
||||||
|
|
||||||
def _leading_count(rest: list[str], default: int = 1) -> int:
|
|
||||||
"""read a count from the first token (digit or number word), else the default.
|
|
||||||
|
|
||||||
'backspace 3' -> 3, 'backspace ten' -> 10 (normalize maps small words to digits;
|
|
||||||
larger words come from _COUNT_WORDS), 'backspace' -> default.
|
|
||||||
"""
|
|
||||||
if not rest:
|
|
||||||
return default
|
|
||||||
tok = rest[0]
|
|
||||||
if tok.isdigit():
|
|
||||||
return max(0, int(tok))
|
|
||||||
if tok in _COUNT_WORDS:
|
|
||||||
return _COUNT_WORDS[tok]
|
|
||||||
return default
|
|
||||||
|
|
||||||
|
|
||||||
def match_command(remainder: str, threshold: float) -> Action | None:
|
def match_command(remainder: str, threshold: float) -> Action | None:
|
||||||
"""map a normalized command remainder to an Action, or None if unrecognized.
|
"""map a normalized command remainder to an Action, or None if unrecognized.
|
||||||
|
|
||||||
@ -183,13 +160,6 @@ def match_command(remainder: str, threshold: float) -> Action | None:
|
|||||||
text = " ".join(rest).strip()
|
text = " ".join(rest).strip()
|
||||||
return Action("type", text) if text else None
|
return Action("type", text) if text else None
|
||||||
|
|
||||||
if _fuzzy_in(head, ("backspace", "delete"), threshold):
|
|
||||||
return Action("backspace", _leading_count(rest, default=1))
|
|
||||||
if _fuzzy_in(head, ("space",), threshold):
|
|
||||||
return Action("space", _leading_count(rest, default=1))
|
|
||||||
if _fuzzy_in(head, ("erase", "clear", "wipe"), threshold):
|
|
||||||
return Action("erase")
|
|
||||||
|
|
||||||
if _fuzzy_in(head, ("mode",), threshold) and rest:
|
if _fuzzy_in(head, ("mode",), threshold) and rest:
|
||||||
if _fuzzy_in(rest[0], ("ptt",), threshold) or "push" in rest[0]:
|
if _fuzzy_in(rest[0], ("ptt",), threshold) or "push" in rest[0]:
|
||||||
return Action("mode", "ptt")
|
return Action("mode", "ptt")
|
||||||
|
|||||||
@ -45,18 +45,11 @@ class OutputHandler(ABC):
|
|||||||
def send_literal(self, session: str, text: str) -> None:
|
def send_literal(self, session: str, text: str) -> None:
|
||||||
"""emit literal text into the input box without submitting (``type``)"""
|
"""emit literal text into the input box without submitting (``type``)"""
|
||||||
|
|
||||||
def send_repeat(self, session: str, token: str, count: int) -> None:
|
|
||||||
"""emit a named key `count` times (e.g. BSpace x n). default impl loops."""
|
|
||||||
if count <= 0:
|
|
||||||
return
|
|
||||||
self.send_named(session, [token] * count)
|
|
||||||
|
|
||||||
def perform(self, session: str, action) -> bool:
|
def perform(self, session: str, action) -> bool:
|
||||||
"""resolve a grammar.Action to keystrokes and emit them. returns acted?.
|
"""resolve a grammar.Action to keystrokes and emit them. returns acted?.
|
||||||
|
|
||||||
``switch``/``set``/``mode`` etc. are handled by the daemon (they change daemon
|
``switch`` and ``mode`` are handled by the daemon (they change daemon state,
|
||||||
state, not the claude session), so they are ignored here. ``erase`` arrives
|
not the claude session), so they are ignored here.
|
||||||
with action.arg already set to the count the daemon wants backspaced.
|
|
||||||
"""
|
"""
|
||||||
name = action.name
|
name = action.name
|
||||||
if name == "yes":
|
if name == "yes":
|
||||||
@ -79,10 +72,6 @@ class OutputHandler(ABC):
|
|||||||
self.send_named(session, seq)
|
self.send_named(session, seq)
|
||||||
elif name == "type":
|
elif name == "type":
|
||||||
self.send_literal(session, str(action.arg))
|
self.send_literal(session, str(action.arg))
|
||||||
elif name == "space":
|
|
||||||
self.send_literal(session, " " * int(action.arg))
|
|
||||||
elif name in ("backspace", "erase"):
|
|
||||||
self.send_repeat(session, keys.BACKSPACE[0], int(action.arg))
|
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|||||||
@ -37,12 +37,6 @@ DENY = ["3"]
|
|||||||
SUBMIT = ["Enter"]
|
SUBMIT = ["Enter"]
|
||||||
CANCEL = ["Escape"]
|
CANCEL = ["Escape"]
|
||||||
|
|
||||||
# BACKSPACE deletes one char left; SPACE inserts one literal space. both are emitted
|
|
||||||
# repeatedly for `backspace <n>` / `space <n>` and for `erase` (n = the daemon's
|
|
||||||
# tracked uncommitted-input count). BSpace is tmux's name for the backspace key.
|
|
||||||
BACKSPACE = ["BSpace"]
|
|
||||||
SPACE = [" "]
|
|
||||||
|
|
||||||
SELECT_BY_INDEX = {
|
SELECT_BY_INDEX = {
|
||||||
1: SELECT_1,
|
1: SELECT_1,
|
||||||
2: SELECT_2,
|
2: SELECT_2,
|
||||||
|
|||||||
@ -6,77 +6,12 @@ short in-memory chunk; nothing is written to disk or sent anywhere.
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import contextlib
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
_NOISE = re.compile(r"GPU device discovery failed|device_discovery\.cc|DiscoverDevicesForPlatform")
|
|
||||||
|
|
||||||
|
|
||||||
def _quiet_backends() -> None:
|
|
||||||
"""quiet onnxruntime/ctranslate2 chatter and the faster_whisper INFO log.
|
|
||||||
|
|
||||||
faster-whisper's VAD loads an onnx model whose device discovery prints a noisy
|
|
||||||
'GPU device discovery failed' warning on headless/WSL hosts with no GPU sysfs.
|
|
||||||
the env var + logger severity stop most onnx logging; the warning itself is
|
|
||||||
emitted at C++ init and is filtered out of stderr by _filter_stderr().
|
|
||||||
"""
|
|
||||||
os.environ.setdefault("ORT_LOGGING_LEVEL", "3")
|
|
||||||
os.environ.setdefault("OMP_NUM_THREADS", os.environ.get("OMP_NUM_THREADS", "4"))
|
|
||||||
logging.getLogger("faster_whisper").setLevel(logging.WARNING)
|
|
||||||
try:
|
|
||||||
import onnxruntime
|
|
||||||
onnxruntime.set_default_logger_severity(3)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@contextlib.contextmanager
|
|
||||||
def _filter_stderr():
|
|
||||||
"""drop onnxruntime's GPU-discovery warning lines from stderr for this block.
|
|
||||||
|
|
||||||
a pipe temporarily replaces fd 2; a pump thread forwards every line to the real
|
|
||||||
stderr EXCEPT the known GPU-discovery noise, so real errors still surface. the
|
|
||||||
original fd is always restored on exit.
|
|
||||||
"""
|
|
||||||
import threading
|
|
||||||
|
|
||||||
try:
|
|
||||||
stderr_fd = sys.stderr.fileno()
|
|
||||||
except (AttributeError, OSError):
|
|
||||||
yield
|
|
||||||
return
|
|
||||||
|
|
||||||
saved_fd = os.dup(stderr_fd)
|
|
||||||
read_fd, write_fd = os.pipe()
|
|
||||||
os.dup2(write_fd, stderr_fd)
|
|
||||||
os.close(write_fd)
|
|
||||||
|
|
||||||
def pump():
|
|
||||||
with os.fdopen(read_fd, "rb") as reader, os.fdopen(saved_fd, "wb", closefd=False) as out:
|
|
||||||
for line in reader:
|
|
||||||
if not _NOISE.search(line.decode("utf-8", "replace")):
|
|
||||||
out.write(line)
|
|
||||||
out.flush()
|
|
||||||
|
|
||||||
thread = threading.Thread(target=pump, daemon=True)
|
|
||||||
thread.start()
|
|
||||||
try:
|
|
||||||
yield
|
|
||||||
finally:
|
|
||||||
import time
|
|
||||||
|
|
||||||
time.sleep(0.05)
|
|
||||||
os.dup2(saved_fd, stderr_fd)
|
|
||||||
os.close(saved_fd)
|
|
||||||
thread.join(timeout=1.0)
|
|
||||||
|
|
||||||
|
|
||||||
class Transcriber:
|
class Transcriber:
|
||||||
"""a loaded faster-whisper model that transcribes float32 mono audio chunks"""
|
"""a loaded faster-whisper model that transcribes float32 mono audio chunks"""
|
||||||
@ -85,26 +20,18 @@ class Transcriber:
|
|||||||
compute_type: str = "auto") -> None:
|
compute_type: str = "auto") -> None:
|
||||||
self.language = language
|
self.language = language
|
||||||
self._model = self._load(model, device, compute_type)
|
self._model = self._load(model, device, compute_type)
|
||||||
self._warm()
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _load(model: str, device: str, compute_type: str):
|
def _load(model: str, device: str, compute_type: str):
|
||||||
|
from faster_whisper import WhisperModel
|
||||||
|
|
||||||
if device == "auto":
|
if device == "auto":
|
||||||
device = "cpu"
|
device = "cpu"
|
||||||
if compute_type == "auto":
|
if compute_type == "auto":
|
||||||
compute_type = "int8" if device == "cpu" else "float16"
|
compute_type = "int8" if device == "cpu" else "float16"
|
||||||
log.info("loading faster-whisper model=%s device=%s compute=%s", model, device, compute_type)
|
log.info("loading faster-whisper model=%s device=%s compute=%s", model, device, compute_type)
|
||||||
with _filter_stderr():
|
|
||||||
_quiet_backends()
|
|
||||||
from faster_whisper import WhisperModel
|
|
||||||
return WhisperModel(model, device=device, compute_type=compute_type)
|
return WhisperModel(model, device=device, compute_type=compute_type)
|
||||||
|
|
||||||
def _warm(self) -> None:
|
|
||||||
"""run one throwaway transcribe so the VAD onnx session inits now, under the
|
|
||||||
stderr filter — the GPU-discovery warning fires here once, not in the loop"""
|
|
||||||
with _filter_stderr():
|
|
||||||
list(self._model.transcribe(np.zeros(1600, dtype=np.float32), vad_filter=True)[0])
|
|
||||||
|
|
||||||
def transcribe(self, audio: np.ndarray, samplerate: int = 16000) -> str:
|
def transcribe(self, audio: np.ndarray, samplerate: int = 16000) -> str:
|
||||||
"""transcribe a mono float32 numpy array to a stripped text string.
|
"""transcribe a mono float32 numpy array to a stripped text string.
|
||||||
|
|
||||||
|
|||||||
@ -85,7 +85,7 @@ def list_sessions() -> list[str]:
|
|||||||
return sorted(n for n in names if n.startswith(SESSION_PREFIX))
|
return sorted(n for n in names if n.startswith(SESSION_PREFIX))
|
||||||
|
|
||||||
|
|
||||||
def resolve(one_shot: str | None = None, auto_target: bool = False) -> tuple[str | None, str]:
|
def resolve(one_shot: str | None = None) -> tuple[str | None, str]:
|
||||||
"""resolve the destination session and a short reason describing the choice.
|
"""resolve the destination session and a short reason describing the choice.
|
||||||
|
|
||||||
single source of truth for targeting, used by both the voice and CLI paths.
|
single source of truth for targeting, used by both the voice and CLI paths.
|
||||||
@ -95,9 +95,7 @@ def resolve(one_shot: str | None = None, auto_target: bool = False) -> tuple[str
|
|||||||
1. one-shot present -> claude-<name> for THIS command only; never falls through
|
1. one-shot present -> claude-<name> for THIS command only; never falls through
|
||||||
to a different session if it doesn't exist (explicit beats convenience).
|
to a different session if it doesn't exist (explicit beats convenience).
|
||||||
2. sticky set + exists -> use it.
|
2. sticky set + exists -> use it.
|
||||||
3. nothing sticky, exactly one claude-* session:
|
3. nothing sticky, exactly one claude-* session -> auto-use it.
|
||||||
auto_target=True -> auto-use it;
|
|
||||||
auto_target=False -> require an explicit set/target, do nothing.
|
|
||||||
4. nothing sticky, multiple sessions -> ambiguous, do nothing.
|
4. nothing sticky, multiple sessions -> ambiguous, do nothing.
|
||||||
5. nothing sticky, zero sessions -> do nothing.
|
5. nothing sticky, zero sessions -> do nothing.
|
||||||
"""
|
"""
|
||||||
@ -115,9 +113,7 @@ def resolve(one_shot: str | None = None, auto_target: bool = False) -> tuple[str
|
|||||||
|
|
||||||
sessions = list_sessions()
|
sessions = list_sessions()
|
||||||
if len(sessions) == 1:
|
if len(sessions) == 1:
|
||||||
if auto_target:
|
|
||||||
return sessions[0], f"auto-target {sessions[0]} (only session)"
|
return sessions[0], f"auto-target {sessions[0]} (only session)"
|
||||||
return None, f"no target set ({sessions[0]} running — set one)"
|
|
||||||
if len(sessions) > 1:
|
if len(sessions) > 1:
|
||||||
return None, f"no target set, {len(sessions)} sessions (set one)"
|
return None, f"no target set, {len(sessions)} sessions (set one)"
|
||||||
return None, "no claude sessions"
|
return None, "no claude sessions"
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user