import os import subprocess import sys import time import typer from rich.console import Console from ..daemon import STATE_FILE, is_running, read_state, stop_daemon app = typer.Typer(help="Cohere live transcription — speaks into your keyboard.") console = Console() def _parse_device(value: str | None): if value is None: return None try: return int(value) except ValueError: return value @app.command() def on( language: str = typer.Option("en", "--lang", "-l", help="Language code"), pause: float = typer.Option(0.3, "--pause", "-p", help="Seconds of silence before sending text"), device: str = typer.Option(None, "--device", "-d", help="Input device index or name substring (see `cohere devices`)"), normalize: bool = typer.Option(False, "--normalize", "-n", help="Enable compressor + limiter to even out loudness"), foreground: bool = typer.Option(False, "--fg", help="Run in foreground (don't daemonize)"), ): """Start transcribing and typing into your focused window.""" if is_running(): console.print("[yellow]Already running.[/yellow]") raise typer.Exit(1) if foreground: from ..daemon import run_daemon console.print("[green]Starting cohere (foreground)...[/green]") run_daemon(language, pause=pause, device=_parse_device(device), normalize=normalize) return console.print("[green]Starting cohere daemon...[/green]") os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True) cmd = [sys.executable, "-m", "cohere_transcribe.daemon_main", "--lang", language] if pause != 0.3: cmd += ["--pause", str(pause)] if device is not None: cmd += ["--device", device] if normalize: cmd += ["--normalize"] subprocess.Popen( cmd, start_new_session=True, stdin=subprocess.DEVNULL, stdout=open(os.path.join(os.path.dirname(STATE_FILE), "daemon.log"), "a"), stderr=subprocess.STDOUT, ) for _ in range(50): time.sleep(0.1) if is_running(): break if is_running(): console.print("[green]Cohere is on — speak and it types.[/green]") else: console.print("[red]Failed to start daemon. Check ~/.local/state/cohere/daemon.log[/red]") raise typer.Exit(1) @app.command() def off(): """Stop transcribing.""" if not is_running(): console.print("[yellow]Not running.[/yellow]") raise typer.Exit(0) if stop_daemon(): console.print("[red]Cohere is off.[/red]") else: console.print("[red]Failed to stop daemon.[/red]") raise typer.Exit(1) @app.command() def status(): """Show whether cohere is running.""" state = read_state() running = is_running() if running: started = state.get("started_at", 0) elapsed = time.time() - started minutes = int(elapsed) // 60 console.print(f"[green]ON[/green] — running for {minutes}m") else: console.print("[dim]OFF[/dim]") @app.command() def transcribe( audio_file: str = typer.Argument(None, help="Audio file to transcribe"), mic: int = typer.Option(None, "--mic", "-m", help="Record from mic for N seconds"), stream: bool = typer.Option(False, "--stream", "-s", help="Live streaming mode (prints to terminal)"), language: str = typer.Option("en", "--lang", "-l", help="Language code"), pause: float = typer.Option(0.3, "--pause", "-p", help="Seconds of silence before sending text"), device: str = typer.Option(None, "--device", "-d", help="Input device index or name substring (see `cohere devices`)"), normalize: bool = typer.Option(False, "--normalize", "-n", help="Enable compressor + limiter to even out loudness"), ): """One-shot transcription (file, mic, or stream to terminal).""" from ..model import load_model, transcribe_audio from ..vad import pause_seconds_to_frames dev = _parse_device(device) if stream: from ..stream import stream_transcribe processor, model = load_model() stream_transcribe(processor, model, language, silence_frames=pause_seconds_to_frames(pause), device=dev, normalize=normalize) elif mic is not None: from ..model import record_audio processor, model = load_model() try: audio = record_audio(mic, device=dev, normalize=normalize) console.print("Transcribing...") text = transcribe_audio(processor, model, audio, language) console.print(f"\n{text}\n") except OSError as e: console.print(f"[red]Microphone error: {e}[/red]") raise typer.Exit(1) elif audio_file: from transformers.audio_utils import load_audio as load_audio_file from ..model import SAMPLE_RATE processor, model = load_model() audio = load_audio_file(audio_file, sampling_rate=SAMPLE_RATE) text = transcribe_audio(processor, model, audio, language) console.print(f"\n{text}\n") else: console.print("[yellow]Provide an audio file, --mic, or --stream[/yellow]") raise typer.Exit(1) @app.command() def devices(): """List available audio input devices.""" import sounddevice as sd default_in = sd.default.device[0] for idx, dev in enumerate(sd.query_devices()): if dev["max_input_channels"] <= 0: continue marker = "[green]*[/green]" if idx == default_in else " " hostapi = sd.query_hostapis(dev["hostapi"])["name"] console.print( f"{marker} [bold]{idx:>2}[/bold] {dev['name']} " f"[dim]({dev['max_input_channels']}ch, {int(dev['default_samplerate'])}Hz, {hostapi})[/dim]" ) console.print( "\n[dim]Tip: indices can shift between runs on PipeWire. " "Prefer [bold]-d pipewire[/bold] (uses PipeWire's default source) or pass a name substring like [bold]-d Sipeed[/bold].[/dim]" ) def main(): app()