feat: make silence pause duration configurable via --pause flag

Default is 0.3s for responsive typing. Configurable on both `cohere on --pause` and `cohere transcribe --stream --pause`. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-30 21:12:26 +08:00
parent 92d8ba28d0
commit f083e424c9
5 changed files with 26 additions and 13 deletions
@@ -15,6 +15,7 @@ console = Console()
@app.command()
 def on(
    language: str = typer.Option("en", "--lang", "-l", help="Language code"),
+    pause: float = typer.Option(0.3, "--pause", "-p", help="Seconds of silence before sending text"),
    foreground: bool = typer.Option(False, "--fg", help="Run in foreground (don't daemonize)"),
 ):
    """Start transcribing and typing into your focused window."""
@@ -25,13 +26,16 @@ def on(
    if foreground:
        from ..daemon import run_daemon
        console.print("[green]Starting cohere (foreground)...[/green]")
-        run_daemon(language)
+        run_daemon(language, pause=pause)
        return

    console.print("[green]Starting cohere daemon...[/green]")
    os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True)
+    cmd = [sys.executable, "-m", "cohere_transcribe.daemon_main", "--lang", language]
+    if pause != 0.3:
+        cmd += ["--pause", str(pause)]
    subprocess.Popen(
-        [sys.executable, "-m", "cohere_transcribe.daemon_main", "--lang", language],
+        cmd,
        start_new_session=True,
        stdin=subprocess.DEVNULL,
        stdout=open(os.path.join(os.path.dirname(STATE_FILE), "daemon.log"), "a"),
@@ -85,14 +89,16 @@ def transcribe(
    mic: int = typer.Option(None, "--mic", "-m", help="Record from mic for N seconds"),
    stream: bool = typer.Option(False, "--stream", "-s", help="Live streaming mode (prints to terminal)"),
    language: str = typer.Option("en", "--lang", "-l", help="Language code"),
+    pause: float = typer.Option(0.3, "--pause", "-p", help="Seconds of silence before sending text"),
 ):
    """One-shot transcription (file, mic, or stream to terminal)."""
    from ..model import load_model, transcribe_audio
+    from ..vad import pause_seconds_to_frames

    if stream:
        from ..stream import stream_transcribe
        processor, model = load_model()
-        stream_transcribe(processor, model, language)
+        stream_transcribe(processor, model, language, silence_frames=pause_seconds_to_frames(pause))
    elif mic is not None:
        from ..model import record_audio
        processor, model = load_model()