Add Cohere Transcribe demo with uv + Python 3.14

2026-05-26 01:35:10 +08:00
commit 82fe21fe41
7 changed files with 1327 additions and 0 deletions
@@ -0,0 +1,10 @@
 # Python-generated files
 __pycache__/
 *.py[oc]
 build/
 dist/
 wheels/
 *.egg-info
 # Virtual environments
 .venv
@@ -0,0 +1 @@
 3.14
@@ -0,0 +1,6 @@
 def main():
    print("Hello from cohere!")
 if __name__ == "__main__":
    main()
@@ -0,0 +1,15 @@
 [project]
 name = "cohere"
 version = "0.1.0"
 description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.14"
 dependencies = [
    "huggingface-hub>=1.16.1",
    "librosa>=0.11.0",
    "protobuf>=7.35.0",
    "sentencepiece>=0.2.1",
    "soundfile>=0.13.1",
    "torch>=2.12.0",
    "transformers>=5.9.0",
 ]
@@ -0,0 +1,30 @@
 from transformers import AutoProcessor, CohereAsrForConditionalGeneration
 from transformers.audio_utils import load_audio
 from huggingface_hub import hf_hub_download
 # Load model
 print("Loading model...")
 processor = AutoProcessor.from_pretrained("CohereLabs/cohere-transcribe-03-2026")
 model = CohereAsrForConditionalGeneration.from_pretrained(
    "CohereLabs/cohere-transcribe-03-2026",
    device_map="auto"
 )
 # Download demo audio from Hugging Face
 audio_file = hf_hub_download(
    repo_id="CohereLabs/cohere-transcribe-03-2026",
    filename="demo/voxpopuli_test_en_demo.wav",
 )
 # Load audio
 audio = load_audio(audio_file, sampling_rate=16000)
 # Process and transcribe
 print("Transcribing...")
 inputs = processor(audio, sampling_rate=16000, return_tensors="pt", language="en")
 inputs.to(model.device, dtype=model.dtype)
 outputs = model.generate(**inputs, max_new_tokens=256)
 text = processor.decode(outputs, skip_special_tokens=True)
 print(f"\nTranscription:\n{text}\n")