Files
openclaw-backups/.venvs/transcribe/transcribe_vosk.py
2026-02-21 07:01:51 +00:00

23 lines
769 B
Python

import sys, json, wave, subprocess, os
from vosk import Model, KaldiRecognizer
audio_in=sys.argv[1]
model_path='/home/openclaw/.openclaw/workspace/.cache/vosk/vosk-model-small-en-us-0.15'
wav='/tmp/in.wav'
subprocess.check_call(['ffmpeg','-y','-i',audio_in,'-ar','16000','-ac','1','-f','wav',wav], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
wf=wave.open(wav,'rb')
model=Model(model_path)
rec=KaldiRecognizer(model, wf.getframerate())
rec.SetWords(True)
text=[]
while True:
data=wf.readframes(4000)
if len(data)==0:
break
if rec.AcceptWaveform(data):
r=json.loads(rec.Result())
if r.get('text'): text.append(r['text'])
r=json.loads(rec.FinalResult())
if r.get('text'): text.append(r['text'])
print(' '.join(text).strip())