feat: implement live streaming transcription with VAD

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-29 02:46:13 +08:00
parent d62fcdd1cd
commit 747a4772b6
+61 -1
View File
@@ -1,5 +1,8 @@
import sys
import argparse
import queue
import threading
import time
import numpy as np
import sounddevice as sd
from transformers import AutoProcessor, CohereAsrForConditionalGeneration
@@ -138,7 +141,64 @@ class VADStateMachine:
def stream_transcribe(processor, model, language):
print("TODO: streaming mode")
threshold = calibrate_silence()
vad = VADStateMachine(threshold)
seg_queue = queue.Queue()
stop_event = threading.Event()
start_time = time.monotonic()
def transcription_worker():
while not stop_event.is_set() or not seg_queue.empty():
try:
seg_start, audio = seg_queue.get(timeout=0.5)
except queue.Empty:
continue
minutes = int(seg_start) // 60
seconds = int(seg_start) % 60
text = transcribe_audio(processor, model, audio, language)
if text.strip():
print(f"[{minutes:02d}:{seconds:02d}] {text.strip()}")
worker = threading.Thread(target=transcription_worker, daemon=True)
worker.start()
frame_buf = np.empty(0, dtype="float32")
def audio_callback(indata, frames, time_info, status):
nonlocal frame_buf
if stop_event.is_set():
return
frame_buf = np.append(frame_buf, indata[:, 0])
while len(frame_buf) >= FRAME_SIZE:
frame = frame_buf[:FRAME_SIZE]
frame_buf = frame_buf[FRAME_SIZE:]
elapsed = time.monotonic() - start_time
result = vad.process_frame(frame, elapsed)
if result is not None:
seg_queue.put(result)
print("Listening... (Ctrl+C to stop)")
stream = sd.InputStream(
samplerate=SAMPLE_RATE, channels=1, dtype="float32",
callback=audio_callback, blocksize=FRAME_SIZE,
)
try:
with stream:
while True:
time.sleep(0.1)
except KeyboardInterrupt:
pass
stop_event.set()
# Flush any remaining speech segment
if vad.speaking and vad.segment:
elapsed = time.monotonic() - start_time
seg_queue.put((vad.segment_start_time, np.concatenate(vad.segment)))
worker.join(timeout=30)
print("\nDone.")
if __name__ == "__main__":