feat: commands/customs menu, green heard-echo, snappier VAD

add voice 'commands' (alias help/menu) printing the command menu and 'customs'
(alias custom) stubbed for v0.2.0. echo every recognized command as a green
'heard "..." -> ACTION' line before acting, so you see what landed; the result line
then reports target + keystrokes. lower [vad].silence_ms default 800 -> 500 for a
snappier endpoint after you stop talking.

Signed-off-by: disqualifier <dev@disqualifier.me>
This commit is contained in:
disqualifier 2026-06-26 02:32:28 -04:00
parent 4abdfd56bc
commit 8e20b7eb0b
5 changed files with 56 additions and 10 deletions

View File

@ -125,6 +125,8 @@ the wake phrase is optional.
| `target <name> <command>` | **one-shot** override: run that command on `claude-<name>` for this utterance only; sticky default unchanged |
| `unset` (alias `unsticky`) | clear the sticky target |
| `list` | list running `claude-*` sessions to the daemon console |
| `commands` (alias `help`/`menu`) | print the voice-command menu to the console |
| `customs` (alias `custom`) | custom commands — arriving in v0.2.0 (stub for now) |
| `cancel` / `escape` | back out of a prompt |
Optional filler (`select` / `use` / `choose`) may precede any command and is ignored:

View File

@ -46,7 +46,7 @@ min_utterance = 0.3
# onset and ends after this much trailing silence — the natural end of an utterance.
# a real pause both ends the command AND separates it from following chatter (the
# chatter becomes a separate capture that the wake gate then discards).
silence_ms = 800
silence_ms = 500
# hard cap so continuous noise can't record forever.
max_seconds = 10.0

View File

@ -114,7 +114,7 @@ def load_config(explicit: str | os.PathLike | None = None) -> Config:
samplerate=int(_require(raw, "audio", "samplerate", (int,), 16000)),
channels=int(_require(raw, "audio", "channels", (int,), 1)),
silence_threshold=float(_require(raw, "audio", "silence_threshold", (int, float), 0.012)),
vad_silence_ms=int(_require(raw, "vad", "silence_ms", (int,), 800)),
vad_silence_ms=int(_require(raw, "vad", "silence_ms", (int,), 500)),
vad_max_seconds=float(_require(raw, "vad", "max_seconds", (int, float), 10.0)),
min_utterance=float(_require(raw, "audio", "min_utterance", (int, float), 0.3)),
type_autosend=bool(_require(raw, "behavior", "type_autosend", (bool,), False)),

View File

@ -171,6 +171,9 @@ class Daemon:
return
action = parsed.action
# a command was recognized — echo what we heard (green) before acting.
self._console.emit(VOICE, f'heard "{transcript}" -> {self._describe(action)}', "green")
if action.name == "mode":
new_mode = str(action.arg)
if new_mode != self.mode:
@ -193,6 +196,13 @@ class Daemon:
sessions = target.list_sessions()
self._console.emit(SYSTEM, "list -> " + (", ".join(sessions) if sessions else "(none running)"))
return
if action.name == "commands":
for usage, desc in grammar.command_menu():
self._console.emit(SYSTEM, f" {usage:<26} {desc}")
return
if action.name == "customs":
self._console.emit(SYSTEM, "custom commands arrive in v0.2.0 (contexts.toml)")
return
if action.name == "debug":
self._console.emit(VOICE, f'debug: "{action.arg}"', "yellow")
return
@ -206,8 +216,11 @@ class Daemon:
def _inject(self, session: str, transcript: str, reason: str, action) -> None:
"""run a resolved command against `session`, tracking the uncommitted-input
buffer so backspace/erase delete only back to the last submit boundary"""
heard = f'heard "{transcript}" ({reason})'
buffer so backspace/erase delete only back to the last submit boundary.
the 'heard ...' echo is already printed by _handle; these lines report the
target (reason) and the keystrokes actually injected.
"""
name = action.name
if name == "type":
@ -217,14 +230,14 @@ class Daemon:
if self.config.type_autosend:
inject.send_named(session, inject.keys.SUBMIT)
self._pending[session] = 0
self._console.emit(session, f"{heard} -> typed {text!r}"
self._console.emit(session, f"{reason} -> typed {text!r}"
+ (" + send" if self.config.type_autosend else ""), "green")
return
if name == "space":
n = int(action.arg)
inject.perform(session, action)
self._pending[session] = self._pending.get(session, 0) + n
self._console.emit(session, f"{heard} -> space x{n}", "green")
self._console.emit(session, f"{reason} -> space x{n}", "green")
return
if name == "backspace":
have = self._pending.get(session, 0)
@ -232,7 +245,7 @@ class Daemon:
if n:
inject.perform(session, grammar.Action("backspace", n))
self._pending[session] = have - n
self._console.emit(session, f"{heard} -> backspace x{n}"
self._console.emit(session, f"{reason} -> backspace x{n}"
+ ("" if n == int(action.arg) else " (capped at boundary)"), "green")
return
if name == "erase":
@ -240,13 +253,13 @@ class Daemon:
if n:
inject.perform(session, grammar.Action("erase", n))
self._pending[session] = 0
self._console.emit(session, f"{heard} -> erase x{n} (to last boundary)", "green")
self._console.emit(session, f"{reason} -> erase x{n} (to last boundary)", "green")
return
inject.perform(session, action)
if name == "submit":
self._pending[session] = 0
self._console.emit(session, f"{heard} -> {self._describe(action)}", "green")
self._console.emit(session, f"{reason} -> injected {self._describe(action)}", "green")
@staticmethod
def _describe(action) -> str:

View File

@ -50,6 +50,8 @@ _STICKY_VERBS = ("set", "sticky", "switch")
_ONESHOT_VERBS = ("target",)
_UNSET_VERBS = ("unset", "unsticky")
_LIST_VERBS = ("list", "sessions")
_COMMANDS_VERBS = ("commands", "help", "menu")
_CUSTOMS_VERBS = ("customs", "custom")
_SELECT_VERBS = ("select", "option", "choose", "number")
# every command/synonym word, for biasing the STT toward the vocabulary we expect.
@ -57,7 +59,7 @@ _COMMAND_WORDS = (
_YES_VERBS + _NO_VERBS + _APPROVE_VERBS + _DENY_VERBS + _SUBMIT_VERBS
+ _CANCEL_VERBS + _TYPE_VERBS + _BACKSPACE_VERBS + _SPACE_VERBS + _ADD_VERBS
+ _ERASE_VERBS + _DEBUG_VERBS + _MODE_VERBS + _STICKY_VERBS + _ONESHOT_VERBS + _UNSET_VERBS
+ _LIST_VERBS + _SELECT_VERBS + ("ptt", "listen")
+ _LIST_VERBS + _COMMANDS_VERBS + _CUSTOMS_VERBS + _SELECT_VERBS + ("ptt", "listen")
+ ("one", "two", "three", "four")
)
DEFAULT_FILLER = ("select", "use", "choose")
@ -121,6 +123,31 @@ def initial_prompt(wake_phrases: list[str]) -> str:
return ", ".join(vocabulary(wake_phrases))
def command_menu() -> list[tuple[str, str]]:
"""the voice command menu as (usage, description) rows, for the `commands` cmd.
a small curated list keyed off the verb groups the speakable command surface,
NOT the cc shell kit.
"""
return [
("yes / no", "answer a yes/no prompt"),
("one..four", "pick numbered option 1-4"),
("approve / deny", "allow / deny a permission prompt"),
("send", "submit (Enter)"),
("cancel", "back out (Escape)"),
("type <text>", "insert literal text (no submit)"),
("space [n] / add a space", "insert n spaces"),
("backspace [n]", "delete n chars (to last submit)"),
("erase", "wipe the current input"),
("debug <text>", "echo to console (no inject)"),
("set <name>", "sticky target -> claude-<name>"),
("target <name> <cmd>", "one-shot to another session"),
("unset / list", "clear sticky / list sessions"),
("mode ptt|listen", "switch input mode"),
("commands / customs", "this menu / custom commands (v0.2.0)"),
]
def _ratio(a: str, b: str) -> float:
return SequenceMatcher(None, a, b).ratio()
@ -252,6 +279,10 @@ def match_command(remainder: str, threshold: float) -> Action | None:
return Action("set", name) if name else None
if _fuzzy_in(head, _UNSET_VERBS, threshold) and not rest:
return Action("unset")
if _fuzzy_in(head, _CUSTOMS_VERBS, threshold):
return Action("customs")
if _fuzzy_in(head, _COMMANDS_VERBS, threshold):
return Action("commands")
if _fuzzy_in(head, _LIST_VERBS, threshold):
return Action("list")