23 lines
769 B
Python
23 lines
769 B
Python
import sys, json, wave, subprocess, os
|
|
from vosk import Model, KaldiRecognizer
|
|
|
|
audio_in=sys.argv[1]
|
|
model_path='/home/openclaw/.openclaw/workspace/.cache/vosk/vosk-model-small-en-us-0.15'
|
|
wav='/tmp/in.wav'
|
|
subprocess.check_call(['ffmpeg','-y','-i',audio_in,'-ar','16000','-ac','1','-f','wav',wav], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
wf=wave.open(wav,'rb')
|
|
model=Model(model_path)
|
|
rec=KaldiRecognizer(model, wf.getframerate())
|
|
rec.SetWords(True)
|
|
text=[]
|
|
while True:
|
|
data=wf.readframes(4000)
|
|
if len(data)==0:
|
|
break
|
|
if rec.AcceptWaveform(data):
|
|
r=json.loads(rec.Result())
|
|
if r.get('text'): text.append(r['text'])
|
|
r=json.loads(rec.FinalResult())
|
|
if r.get('text'): text.append(r['text'])
|
|
print(' '.join(text).strip())
|