Simplify to audio file input (mic requires PortAudio on NixOS)

This commit is contained in:
2026-05-26 01:49:52 +08:00
parent 14abcb89f2
commit 8b88489a53
3 changed files with 30 additions and 10 deletions
+1
View File
@@ -10,6 +10,7 @@ dependencies = [
"librosa>=0.11.0",
"protobuf>=7.35.0",
"sentencepiece>=0.2.1",
"sounddevice>=0.5.5",
"soundfile>=0.13.1",
"torch>=2.12.0",
"transformers>=5.9.0",
+11 -10
View File
@@ -10,21 +10,22 @@ model = CohereAsrForConditionalGeneration.from_pretrained(
device_map="auto"
)
# Download demo audio from Hugging Face
def transcribe_audio(audio, language="en"):
inputs = processor(audio, sampling_rate=16000, return_tensors="pt", language=language)
inputs.to(model.device, dtype=model.dtype)
outputs = model.generate(**inputs, max_new_tokens=256)
text = processor.decode(outputs, skip_special_tokens=True)
return text
# Use demo audio file from Hugging Face
print("Loading demo audio...")
audio_file = hf_hub_download(
repo_id="CohereLabs/cohere-transcribe-03-2026",
filename="demo/voxpopuli_test_en_demo.wav",
)
# Load audio
audio = load_audio(audio_file, sampling_rate=16000)
# Process and transcribe
print("Transcribing...")
inputs = processor(audio, sampling_rate=16000, return_tensors="pt", language="en")
inputs.to(model.device, dtype=model.dtype)
outputs = model.generate(**inputs, max_new_tokens=256)
text = processor.decode(outputs, skip_special_tokens=True)
text = transcribe_audio(audio)
print(f"\nTranscription:\n{text}\n")
Generated
+18
View File
@@ -199,6 +199,7 @@ dependencies = [
{ name = "librosa" },
{ name = "protobuf" },
{ name = "sentencepiece" },
{ name = "sounddevice" },
{ name = "soundfile" },
{ name = "torch" },
{ name = "transformers" },
@@ -211,6 +212,7 @@ requires-dist = [
{ name = "librosa", specifier = ">=0.11.0" },
{ name = "protobuf", specifier = ">=7.35.0" },
{ name = "sentencepiece", specifier = ">=0.2.1" },
{ name = "sounddevice", specifier = ">=0.5.5" },
{ name = "soundfile", specifier = ">=0.13.1" },
{ name = "torch", specifier = ">=2.12.0" },
{ name = "transformers", specifier = ">=5.9.0" },
@@ -1077,6 +1079,22 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" },
]
[[package]]
name = "sounddevice"
version = "0.5.5"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "cffi" },
]
sdist = { url = "https://files.pythonhosted.org/packages/2a/f9/2592608737553638fca98e21e54bfec40bf577bb98a61b2770c912aab25e/sounddevice-0.5.5.tar.gz", hash = "sha256:22487b65198cb5bf2208755105b524f78ad173e5ab6b445bdab1c989f6698df3", size = 143191, upload-time = "2026-01-23T18:36:43.529Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/1e/0a/478e441fd049002cf308520c0d62dd8333e7c6cc8d997f0dda07b9fbcc46/sounddevice-0.5.5-py3-none-any.whl", hash = "sha256:30ff99f6c107f49d25ad16a45cacd8d91c25a1bcdd3e81a206b921a3a6405b1f", size = 32807, upload-time = "2026-01-23T18:36:35.649Z" },
{ url = "https://files.pythonhosted.org/packages/56/f9/c037c35f6d0b6bc3bc7bfb314f1d6f1f9a341328ef47cd63fc4f850a7b27/sounddevice-0.5.5-py3-none-macosx_10_6_x86_64.macosx_10_6_universal2.whl", hash = "sha256:05eb9fd6c54c38d67741441c19164c0dae8ce80453af2d8c4ad2e7823d15b722", size = 108557, upload-time = "2026-01-23T18:36:37.41Z" },
{ url = "https://files.pythonhosted.org/packages/88/a1/d19dd9889cd4bce2e233c4fac007cd8daaf5b9fe6e6a5d432cf17be0b807/sounddevice-0.5.5-py3-none-win32.whl", hash = "sha256:1234cc9b4c9df97b6cbe748146ae0ec64dd7d6e44739e8e42eaa5b595313a103", size = 317765, upload-time = "2026-01-23T18:36:39.047Z" },
{ url = "https://files.pythonhosted.org/packages/c3/0e/002ed7c4c1c2ab69031f78989d3b789fee3a7fba9e586eb2b81688bf4961/sounddevice-0.5.5-py3-none-win_amd64.whl", hash = "sha256:cfc6b2c49fb7f555591c78cb8ecf48d6a637fd5b6e1db5fec6ed9365d64b3519", size = 365324, upload-time = "2026-01-23T18:36:40.496Z" },
{ url = "https://files.pythonhosted.org/packages/4e/39/a61d4b83a7746b70d23d9173be688c0c6bfc7173772344b7442c2c155497/sounddevice-0.5.5-py3-none-win_arm64.whl", hash = "sha256:3861901ddd8230d2e0e8ae62ac320cdd4c688d81df89da036dcb812f757bb3e6", size = 317115, upload-time = "2026-01-23T18:36:42.235Z" },
]
[[package]]
name = "soundfile"
version = "0.13.1"