feat: add --device flag and devices command for mic selection
Lets the user pick an input device by index or name substring. Adds `cohere devices` for listing. For devices that don't support 16kHz natively (e.g. Sipeed MicArray hw at 48kHz), captures at the device's native rate and resamples to 16kHz via scipy.signal.resample_poly. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -12,10 +12,20 @@ app = typer.Typer(help="Cohere live transcription — speaks into your keyboard.
|
||||
console = Console()
|
||||
|
||||
|
||||
def _parse_device(value: str | None):
|
||||
if value is None:
|
||||
return None
|
||||
try:
|
||||
return int(value)
|
||||
except ValueError:
|
||||
return value
|
||||
|
||||
|
||||
@app.command()
|
||||
def on(
|
||||
language: str = typer.Option("en", "--lang", "-l", help="Language code"),
|
||||
pause: float = typer.Option(0.3, "--pause", "-p", help="Seconds of silence before sending text"),
|
||||
device: str = typer.Option(None, "--device", "-d", help="Input device index or name substring (see `cohere devices`)"),
|
||||
foreground: bool = typer.Option(False, "--fg", help="Run in foreground (don't daemonize)"),
|
||||
):
|
||||
"""Start transcribing and typing into your focused window."""
|
||||
@@ -26,7 +36,7 @@ def on(
|
||||
if foreground:
|
||||
from ..daemon import run_daemon
|
||||
console.print("[green]Starting cohere (foreground)...[/green]")
|
||||
run_daemon(language, pause=pause)
|
||||
run_daemon(language, pause=pause, device=_parse_device(device))
|
||||
return
|
||||
|
||||
console.print("[green]Starting cohere daemon...[/green]")
|
||||
@@ -34,6 +44,8 @@ def on(
|
||||
cmd = [sys.executable, "-m", "cohere_transcribe.daemon_main", "--lang", language]
|
||||
if pause != 0.3:
|
||||
cmd += ["--pause", str(pause)]
|
||||
if device is not None:
|
||||
cmd += ["--device", device]
|
||||
subprocess.Popen(
|
||||
cmd,
|
||||
start_new_session=True,
|
||||
@@ -90,20 +102,23 @@ def transcribe(
|
||||
stream: bool = typer.Option(False, "--stream", "-s", help="Live streaming mode (prints to terminal)"),
|
||||
language: str = typer.Option("en", "--lang", "-l", help="Language code"),
|
||||
pause: float = typer.Option(0.3, "--pause", "-p", help="Seconds of silence before sending text"),
|
||||
device: str = typer.Option(None, "--device", "-d", help="Input device index or name substring (see `cohere devices`)"),
|
||||
):
|
||||
"""One-shot transcription (file, mic, or stream to terminal)."""
|
||||
from ..model import load_model, transcribe_audio
|
||||
from ..vad import pause_seconds_to_frames
|
||||
|
||||
dev = _parse_device(device)
|
||||
|
||||
if stream:
|
||||
from ..stream import stream_transcribe
|
||||
processor, model = load_model()
|
||||
stream_transcribe(processor, model, language, silence_frames=pause_seconds_to_frames(pause))
|
||||
stream_transcribe(processor, model, language, silence_frames=pause_seconds_to_frames(pause), device=dev)
|
||||
elif mic is not None:
|
||||
from ..model import record_audio
|
||||
processor, model = load_model()
|
||||
try:
|
||||
audio = record_audio(mic)
|
||||
audio = record_audio(mic, device=dev)
|
||||
console.print("Transcribing...")
|
||||
text = transcribe_audio(processor, model, audio, language)
|
||||
console.print(f"\n{text}\n")
|
||||
@@ -122,5 +137,26 @@ def transcribe(
|
||||
raise typer.Exit(1)
|
||||
|
||||
|
||||
@app.command()
|
||||
def devices():
|
||||
"""List available audio input devices."""
|
||||
import sounddevice as sd
|
||||
|
||||
default_in = sd.default.device[0]
|
||||
for idx, dev in enumerate(sd.query_devices()):
|
||||
if dev["max_input_channels"] <= 0:
|
||||
continue
|
||||
marker = "[green]*[/green]" if idx == default_in else " "
|
||||
hostapi = sd.query_hostapis(dev["hostapi"])["name"]
|
||||
console.print(
|
||||
f"{marker} [bold]{idx:>2}[/bold] {dev['name']} "
|
||||
f"[dim]({dev['max_input_channels']}ch, {int(dev['default_samplerate'])}Hz, {hostapi})[/dim]"
|
||||
)
|
||||
console.print(
|
||||
"\n[dim]Tip: indices can shift between runs on PipeWire. "
|
||||
"Prefer [bold]-d pipewire[/bold] (uses PipeWire's default source) or pass a name substring like [bold]-d Sipeed[/bold].[/dim]"
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
app()
|
||||
|
||||
Reference in New Issue
Block a user