c487ba8c08
Mic bumps produce transient spikes that pass VAD onset detection but contain no real speech — the model hallucinates "thank you" from them. Added MIN_SPEECH_SECONDS (0.3s) filter to discard segments where the actual speech portion is too short. Added a Jupyter notebook (notebooks/audio_debug.ipynb) for real-time audio visualization: streams RMS + peak amplitude into a live Plotly FigureWidget, then provides post-hoc waveform inspection, segment playback, and side-by-side segment comparison. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
38 lines
817 B
TOML
38 lines
817 B
TOML
[project]
|
|
name = "cohere-transcribe"
|
|
version = "0.1.0"
|
|
description = "Live speech transcription using Cohere ASR"
|
|
readme = "README.md"
|
|
requires-python = ">=3.14"
|
|
dependencies = [
|
|
"accelerate>=1.13.0",
|
|
"huggingface-hub>=1.16.1",
|
|
"librosa>=0.11.0",
|
|
"protobuf>=7.35.0",
|
|
"sentencepiece>=0.2.1",
|
|
"sounddevice>=0.5.5",
|
|
"soundfile>=0.13.1",
|
|
"torch>=2.12.0",
|
|
"transformers>=5.9.0",
|
|
"typer[all]>=0.15.0",
|
|
]
|
|
|
|
[project.scripts]
|
|
cohere = "cohere_transcribe.cli:main"
|
|
cohere-transcribe = "cohere_transcribe.cli:main"
|
|
|
|
[build-system]
|
|
requires = ["hatchling"]
|
|
build-backend = "hatchling.build"
|
|
|
|
[tool.hatch.build.targets.wheel]
|
|
packages = ["src/cohere_transcribe"]
|
|
|
|
[dependency-groups]
|
|
dev = [
|
|
"anywidget>=0.11.0",
|
|
"ipywidgets>=8.1.8",
|
|
"jupyterlab>=4.5.7",
|
|
"plotly>=6.7.0",
|
|
]
|