feat: add --normalize compressor + limiter for input audio
Adds a feedforward dynamic range compressor with a brick-wall limiter applied in the audio callback. Quiet speech gets +12 dB makeup gain, loud bursts are attenuated 4:1 above -20 dBFS, and the output is hard-limited at -1 dBFS so nothing clips. Enabled via --normalize/-n on `cohere on` and `cohere transcribe`. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -26,6 +26,7 @@ def on(
|
||||
language: str = typer.Option("en", "--lang", "-l", help="Language code"),
|
||||
pause: float = typer.Option(0.3, "--pause", "-p", help="Seconds of silence before sending text"),
|
||||
device: str = typer.Option(None, "--device", "-d", help="Input device index or name substring (see `cohere devices`)"),
|
||||
normalize: bool = typer.Option(False, "--normalize", "-n", help="Enable compressor + limiter to even out loudness"),
|
||||
foreground: bool = typer.Option(False, "--fg", help="Run in foreground (don't daemonize)"),
|
||||
):
|
||||
"""Start transcribing and typing into your focused window."""
|
||||
@@ -36,7 +37,7 @@ def on(
|
||||
if foreground:
|
||||
from ..daemon import run_daemon
|
||||
console.print("[green]Starting cohere (foreground)...[/green]")
|
||||
run_daemon(language, pause=pause, device=_parse_device(device))
|
||||
run_daemon(language, pause=pause, device=_parse_device(device), normalize=normalize)
|
||||
return
|
||||
|
||||
console.print("[green]Starting cohere daemon...[/green]")
|
||||
@@ -46,6 +47,8 @@ def on(
|
||||
cmd += ["--pause", str(pause)]
|
||||
if device is not None:
|
||||
cmd += ["--device", device]
|
||||
if normalize:
|
||||
cmd += ["--normalize"]
|
||||
subprocess.Popen(
|
||||
cmd,
|
||||
start_new_session=True,
|
||||
@@ -103,6 +106,7 @@ def transcribe(
|
||||
language: str = typer.Option("en", "--lang", "-l", help="Language code"),
|
||||
pause: float = typer.Option(0.3, "--pause", "-p", help="Seconds of silence before sending text"),
|
||||
device: str = typer.Option(None, "--device", "-d", help="Input device index or name substring (see `cohere devices`)"),
|
||||
normalize: bool = typer.Option(False, "--normalize", "-n", help="Enable compressor + limiter to even out loudness"),
|
||||
):
|
||||
"""One-shot transcription (file, mic, or stream to terminal)."""
|
||||
from ..model import load_model, transcribe_audio
|
||||
@@ -113,12 +117,12 @@ def transcribe(
|
||||
if stream:
|
||||
from ..stream import stream_transcribe
|
||||
processor, model = load_model()
|
||||
stream_transcribe(processor, model, language, silence_frames=pause_seconds_to_frames(pause), device=dev)
|
||||
stream_transcribe(processor, model, language, silence_frames=pause_seconds_to_frames(pause), device=dev, normalize=normalize)
|
||||
elif mic is not None:
|
||||
from ..model import record_audio
|
||||
processor, model = load_model()
|
||||
try:
|
||||
audio = record_audio(mic, device=dev)
|
||||
audio = record_audio(mic, device=dev, normalize=normalize)
|
||||
console.print("Transcribing...")
|
||||
text = transcribe_audio(processor, model, audio, language)
|
||||
console.print(f"\n{text}\n")
|
||||
|
||||
Reference in New Issue
Block a user