91 lines
1.6 KiB
Bash
Executable File
91 lines
1.6 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
usage() {
|
|
cat >&2 <<'EOF'
|
|
Usage:
|
|
transcribe.sh <audio-file> [--model whisper-large-v3] [--out /path/to/out.txt] [--language en] [--prompt "hint"] [--json]
|
|
|
|
Models:
|
|
whisper-large-v3 (default) - Fastest and most accurate
|
|
whisper-large-v2
|
|
whisper-base
|
|
EOF
|
|
exit 2
|
|
}
|
|
|
|
if [[ "${1:-}" == "" || "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
|
|
usage
|
|
fi
|
|
|
|
in="${1:-}"
|
|
shift || true
|
|
|
|
model="whisper-large-v3"
|
|
out=""
|
|
language=""
|
|
prompt=""
|
|
response_format="text"
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--model)
|
|
model="${2:-}"
|
|
shift 2
|
|
;;
|
|
--out)
|
|
out="${2:-}"
|
|
shift 2
|
|
;;
|
|
--language)
|
|
language="${2:-}"
|
|
shift 2
|
|
;;
|
|
--prompt)
|
|
prompt="${2:-}"
|
|
shift 2
|
|
;;
|
|
--json)
|
|
response_format="json"
|
|
shift 1
|
|
;;
|
|
*)
|
|
echo "Unknown arg: $1" >&2
|
|
usage
|
|
;;
|
|
esac
|
|
done
|
|
|
|
if [[ ! -f "$in" ]]; then
|
|
echo "File not found: $in" >&2
|
|
exit 1
|
|
fi
|
|
|
|
if [[ "${GROQ_API_KEY:-}" == "" ]]; then
|
|
echo "Missing GROQ_API_KEY" >&2
|
|
exit 1
|
|
fi
|
|
|
|
if [[ "$out" == "" ]]; then
|
|
base="${in%.*}"
|
|
if [[ "$response_format" == "json" ]]; then
|
|
out="${base}.json"
|
|
else
|
|
out="${base}.txt"
|
|
fi
|
|
fi
|
|
|
|
mkdir -p "$(dirname "$out")"
|
|
|
|
curl -sS https://api.groq.com/openai/v1/audio/transcriptions \
|
|
-H "Authorization: Bearer $GROQ_API_KEY" \
|
|
-H "Accept: application/json" \
|
|
-F "file=@${in}" \
|
|
-F "model=${model}" \
|
|
-F "response_format=${response_format}" \
|
|
${language:+-F "language=${language}"} \
|
|
${prompt:+-F "prompt=${prompt}"} \
|
|
>"$out"
|
|
|
|
echo "$out"
|