cohere-transcribe/src/cohere_transcribe/cli/cli.py

import os
import subprocess
import sys
import time

import typer
from rich.console import Console

from ..daemon import STATE_FILE, is_running, read_state, stop_daemon

app = typer.Typer(help="Cohere live transcription — speaks into your keyboard.")
console = Console()


def _parse_device(value: str | None):
    if value is None:
        return None
    try:
        return int(value)
    except ValueError:
        return value


@app.command()
def on(
    language: str = typer.Option("en", "--lang", "-l", help="Language code"),
    pause: float = typer.Option(0.3, "--pause", "-p", help="Seconds of silence before sending text"),
    device: str = typer.Option(None, "--device", "-d", help="Input device index or name substring (see `cohere devices`)"),
    normalize: bool = typer.Option(False, "--normalize", "-n", help="Enable compressor + limiter to even out loudness"),
    foreground: bool = typer.Option(False, "--fg", help="Run in foreground (don't daemonize)"),
):
    """Start transcribing and typing into your focused window."""
    if is_running():
        console.print("[yellow]Already running.[/yellow]")
        raise typer.Exit(1)

    if foreground:
        from ..daemon import run_daemon
        console.print("[green]Starting cohere (foreground)...[/green]")
        run_daemon(language, pause=pause, device=_parse_device(device), normalize=normalize)
        return

    console.print("[green]Starting cohere daemon...[/green]")
    os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True)
    cmd = [sys.executable, "-m", "cohere_transcribe.daemon_main", "--lang", language]
    if pause != 0.3:
        cmd += ["--pause", str(pause)]
    if device is not None:
        cmd += ["--device", device]
    if normalize:
        cmd += ["--normalize"]
    subprocess.Popen(
        cmd,
        start_new_session=True,
        stdin=subprocess.DEVNULL,
        stdout=open(os.path.join(os.path.dirname(STATE_FILE), "daemon.log"), "a"),
        stderr=subprocess.STDOUT,
    )

    for _ in range(50):
        time.sleep(0.1)
        if is_running():
            break

    if is_running():
        console.print("[green]Cohere is on — speak and it types.[/green]")
    else:
        console.print("[red]Failed to start daemon. Check ~/.local/state/cohere/daemon.log[/red]")
        raise typer.Exit(1)


@app.command()
def off():
    """Stop transcribing."""
    if not is_running():
        console.print("[yellow]Not running.[/yellow]")
        raise typer.Exit(0)

    if stop_daemon():
        console.print("[red]Cohere is off.[/red]")
    else:
        console.print("[red]Failed to stop daemon.[/red]")
        raise typer.Exit(1)


@app.command()
def status():
    """Show whether cohere is running."""
    state = read_state()
    running = is_running()

    if running:
        started = state.get("started_at", 0)
        elapsed = time.time() - started
        minutes = int(elapsed) // 60
        console.print(f"[green]ON[/green] — running for {minutes}m")
    else:
        console.print("[dim]OFF[/dim]")


@app.command()
def transcribe(
    audio_file: str = typer.Argument(None, help="Audio file to transcribe"),
    mic: int = typer.Option(None, "--mic", "-m", help="Record from mic for N seconds"),
    stream: bool = typer.Option(False, "--stream", "-s", help="Live streaming mode (prints to terminal)"),
    language: str = typer.Option("en", "--lang", "-l", help="Language code"),
    pause: float = typer.Option(0.3, "--pause", "-p", help="Seconds of silence before sending text"),
    device: str = typer.Option(None, "--device", "-d", help="Input device index or name substring (see `cohere devices`)"),
    normalize: bool = typer.Option(False, "--normalize", "-n", help="Enable compressor + limiter to even out loudness"),
):
    """One-shot transcription (file, mic, or stream to terminal)."""
    from ..model import load_model, transcribe_audio
    from ..vad import pause_seconds_to_frames

    dev = _parse_device(device)

    if stream:
        from ..stream import stream_transcribe
        processor, model = load_model()
        stream_transcribe(processor, model, language, silence_frames=pause_seconds_to_frames(pause), device=dev, normalize=normalize)
    elif mic is not None:
        from ..model import record_audio
        processor, model = load_model()
        try:
            audio = record_audio(mic, device=dev, normalize=normalize)
            console.print("Transcribing...")
            text = transcribe_audio(processor, model, audio, language)
            console.print(f"\n{text}\n")
        except OSError as e:
            console.print(f"[red]Microphone error: {e}[/red]")
            raise typer.Exit(1)
    elif audio_file:
        from transformers.audio_utils import load_audio as load_audio_file
        from ..model import SAMPLE_RATE
        processor, model = load_model()
        audio = load_audio_file(audio_file, sampling_rate=SAMPLE_RATE)
        text = transcribe_audio(processor, model, audio, language)
        console.print(f"\n{text}\n")
    else:
        console.print("[yellow]Provide an audio file, --mic, or --stream[/yellow]")
        raise typer.Exit(1)


@app.command()
def devices():
    """List available audio input devices."""
    import sounddevice as sd

    default_in = sd.default.device[0]
    for idx, dev in enumerate(sd.query_devices()):
        if dev["max_input_channels"] <= 0:
            continue
        marker = "[green]*[/green]" if idx == default_in else " "
        hostapi = sd.query_hostapis(dev["hostapi"])["name"]
        console.print(
            f"{marker} [bold]{idx:>2}[/bold]  {dev['name']}  "
            f"[dim]({dev['max_input_channels']}ch, {int(dev['default_samplerate'])}Hz, {hostapi})[/dim]"
        )
    console.print(
        "\n[dim]Tip: indices can shift between runs on PipeWire. "
        "Prefer [bold]-d pipewire[/bold] (uses PipeWire's default source) or pass a name substring like [bold]-d Sipeed[/bold].[/dim]"
    )


def main():
    app()