feat: add --normalize compressor + limiter for input audio

Adds a feedforward dynamic range compressor with a brick-wall limiter applied in the audio callback. Quiet speech gets +12 dB makeup gain, loud bursts are attenuated 4:1 above -20 dBFS, and the output is hard-limited at -1 dBFS so nothing clips. Enabled via --normalize/-n on `cohere on` and `cohere transcribe`. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-06 22:56:21 +08:00
parent 853b5523e5
commit 58fa7526fb
7 changed files with 86 additions and 11 deletions
@@ -26,6 +26,7 @@ def on(
    language: str = typer.Option("en", "--lang", "-l", help="Language code"),
    pause: float = typer.Option(0.3, "--pause", "-p", help="Seconds of silence before sending text"),
    device: str = typer.Option(None, "--device", "-d", help="Input device index or name substring (see `cohere devices`)"),
+    normalize: bool = typer.Option(False, "--normalize", "-n", help="Enable compressor + limiter to even out loudness"),
    foreground: bool = typer.Option(False, "--fg", help="Run in foreground (don't daemonize)"),
 ):
    """Start transcribing and typing into your focused window."""
@@ -36,7 +37,7 @@ def on(
    if foreground:
        from ..daemon import run_daemon
        console.print("[green]Starting cohere (foreground)...[/green]")
-        run_daemon(language, pause=pause, device=_parse_device(device))
+        run_daemon(language, pause=pause, device=_parse_device(device), normalize=normalize)
        return

    console.print("[green]Starting cohere daemon...[/green]")
@@ -46,6 +47,8 @@ def on(
        cmd += ["--pause", str(pause)]
    if device is not None:
        cmd += ["--device", device]
+    if normalize:
+        cmd += ["--normalize"]
    subprocess.Popen(
        cmd,
        start_new_session=True,
@@ -103,6 +106,7 @@ def transcribe(
    language: str = typer.Option("en", "--lang", "-l", help="Language code"),
    pause: float = typer.Option(0.3, "--pause", "-p", help="Seconds of silence before sending text"),
    device: str = typer.Option(None, "--device", "-d", help="Input device index or name substring (see `cohere devices`)"),
+    normalize: bool = typer.Option(False, "--normalize", "-n", help="Enable compressor + limiter to even out loudness"),
 ):
    """One-shot transcription (file, mic, or stream to terminal)."""
    from ..model import load_model, transcribe_audio
@@ -113,12 +117,12 @@ def transcribe(
    if stream:
        from ..stream import stream_transcribe
        processor, model = load_model()
-        stream_transcribe(processor, model, language, silence_frames=pause_seconds_to_frames(pause), device=dev)
+        stream_transcribe(processor, model, language, silence_frames=pause_seconds_to_frames(pause), device=dev, normalize=normalize)
    elif mic is not None:
        from ..model import record_audio
        processor, model = load_model()
        try:
-            audio = record_audio(mic, device=dev)
+            audio = record_audio(mic, device=dev, normalize=normalize)
            console.print("Transcribing...")
            text = transcribe_audio(processor, model, audio, language)
            console.print(f"\n{text}\n")