feat: add --normalize compressor + limiter for input audio

Adds a feedforward dynamic range compressor with a brick-wall limiter
applied in the audio callback. Quiet speech gets +12 dB makeup gain,
loud bursts are attenuated 4:1 above -20 dBFS, and the output is
hard-limited at -1 dBFS so nothing clips. Enabled via --normalize/-n
on `cohere on` and `cohere transcribe`.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-06-06 22:56:21 +08:00
parent 853b5523e5
commit 58fa7526fb
7 changed files with 86 additions and 11 deletions
+7 -3
View File
@@ -26,6 +26,7 @@ def on(
language: str = typer.Option("en", "--lang", "-l", help="Language code"),
pause: float = typer.Option(0.3, "--pause", "-p", help="Seconds of silence before sending text"),
device: str = typer.Option(None, "--device", "-d", help="Input device index or name substring (see `cohere devices`)"),
normalize: bool = typer.Option(False, "--normalize", "-n", help="Enable compressor + limiter to even out loudness"),
foreground: bool = typer.Option(False, "--fg", help="Run in foreground (don't daemonize)"),
):
"""Start transcribing and typing into your focused window."""
@@ -36,7 +37,7 @@ def on(
if foreground:
from ..daemon import run_daemon
console.print("[green]Starting cohere (foreground)...[/green]")
run_daemon(language, pause=pause, device=_parse_device(device))
run_daemon(language, pause=pause, device=_parse_device(device), normalize=normalize)
return
console.print("[green]Starting cohere daemon...[/green]")
@@ -46,6 +47,8 @@ def on(
cmd += ["--pause", str(pause)]
if device is not None:
cmd += ["--device", device]
if normalize:
cmd += ["--normalize"]
subprocess.Popen(
cmd,
start_new_session=True,
@@ -103,6 +106,7 @@ def transcribe(
language: str = typer.Option("en", "--lang", "-l", help="Language code"),
pause: float = typer.Option(0.3, "--pause", "-p", help="Seconds of silence before sending text"),
device: str = typer.Option(None, "--device", "-d", help="Input device index or name substring (see `cohere devices`)"),
normalize: bool = typer.Option(False, "--normalize", "-n", help="Enable compressor + limiter to even out loudness"),
):
"""One-shot transcription (file, mic, or stream to terminal)."""
from ..model import load_model, transcribe_audio
@@ -113,12 +117,12 @@ def transcribe(
if stream:
from ..stream import stream_transcribe
processor, model = load_model()
stream_transcribe(processor, model, language, silence_frames=pause_seconds_to_frames(pause), device=dev)
stream_transcribe(processor, model, language, silence_frames=pause_seconds_to_frames(pause), device=dev, normalize=normalize)
elif mic is not None:
from ..model import record_audio
processor, model = load_model()
try:
audio = record_audio(mic, device=dev)
audio = record_audio(mic, device=dev, normalize=normalize)
console.print("Transcribing...")
text = transcribe_audio(processor, model, audio, language)
console.print(f"\n{text}\n")