AI Newsletter Digest improvements: fixed QP soft line break decoding, URL extraction, and content cleaning
This commit is contained in:
38
skills/groq-whisper/SKILL.md
Normal file
38
skills/groq-whisper/SKILL.md
Normal file
@@ -0,0 +1,38 @@
|
||||
# Groq Whisper API (free)
|
||||
|
||||
Transcribe audio files using Groq's free Whisper inference API.
|
||||
|
||||
## Quick start
|
||||
|
||||
```bash
|
||||
{baseDir}/scripts/transcribe.sh /path/to/audio.m4a
|
||||
```
|
||||
|
||||
Defaults:
|
||||
- Model: `whisper-large-v3` (Groq's fastest whisper model)
|
||||
- Output: `<input>.txt`
|
||||
|
||||
## Useful flags
|
||||
|
||||
```bash
|
||||
{baseDir}/scripts/transcribe.sh /path/to/audio.ogg --model whisper-large-v3 --out /tmp/transcript.txt
|
||||
{baseDir}/scripts/transcribe.sh /path/to/audio.m4a --language en
|
||||
{baseDir}/scripts/transcribe.sh /path/to/audio.m4a --prompt "Speaker names: Peter, Daniel"
|
||||
{baseDir}/scripts/transcribe.sh /path/to/audio.m4a --json --out /tmp/transcript.json
|
||||
```
|
||||
|
||||
## API key
|
||||
|
||||
Uses the `GROQ_API_KEY` environment variable (already configured on the gateway).
|
||||
|
||||
## Models available
|
||||
|
||||
- `whisper-large-v3` - Latest and fastest on Groq (recommended)
|
||||
- `whisper-large-v2` - Slightly older but still fast
|
||||
- `whisper-base` - Faster but less accurate
|
||||
|
||||
## Why Groq?
|
||||
|
||||
- **Free** — no per-minute charges
|
||||
- **Fast** — Groq's LPU delivers near-real-time transcription
|
||||
- **No quota limits** — generous free tier
|
||||
90
skills/groq-whisper/scripts/transcribe.sh
Executable file
90
skills/groq-whisper/scripts/transcribe.sh
Executable file
@@ -0,0 +1,90 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
usage() {
|
||||
cat >&2 <<'EOF'
|
||||
Usage:
|
||||
transcribe.sh <audio-file> [--model whisper-large-v3] [--out /path/to/out.txt] [--language en] [--prompt "hint"] [--json]
|
||||
|
||||
Models:
|
||||
whisper-large-v3 (default) - Fastest and most accurate
|
||||
whisper-large-v2
|
||||
whisper-base
|
||||
EOF
|
||||
exit 2
|
||||
}
|
||||
|
||||
if [[ "${1:-}" == "" || "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
in="${1:-}"
|
||||
shift || true
|
||||
|
||||
model="whisper-large-v3"
|
||||
out=""
|
||||
language=""
|
||||
prompt=""
|
||||
response_format="text"
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--model)
|
||||
model="${2:-}"
|
||||
shift 2
|
||||
;;
|
||||
--out)
|
||||
out="${2:-}"
|
||||
shift 2
|
||||
;;
|
||||
--language)
|
||||
language="${2:-}"
|
||||
shift 2
|
||||
;;
|
||||
--prompt)
|
||||
prompt="${2:-}"
|
||||
shift 2
|
||||
;;
|
||||
--json)
|
||||
response_format="json"
|
||||
shift 1
|
||||
;;
|
||||
*)
|
||||
echo "Unknown arg: $1" >&2
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ! -f "$in" ]]; then
|
||||
echo "File not found: $in" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "${GROQ_API_KEY:-}" == "" ]]; then
|
||||
echo "Missing GROQ_API_KEY" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ "$out" == "" ]]; then
|
||||
base="${in%.*}"
|
||||
if [[ "$response_format" == "json" ]]; then
|
||||
out="${base}.json"
|
||||
else
|
||||
out="${base}.txt"
|
||||
fi
|
||||
fi
|
||||
|
||||
mkdir -p "$(dirname "$out")"
|
||||
|
||||
curl -sS https://api.groq.com/openai/v1/audio/transcriptions \
|
||||
-H "Authorization: Bearer $GROQ_API_KEY" \
|
||||
-H "Accept: application/json" \
|
||||
-F "file=@${in}" \
|
||||
-F "model=${model}" \
|
||||
-F "response_format=${response_format}" \
|
||||
${language:+-F "language=${language}"} \
|
||||
${prompt:+-F "prompt=${prompt}"} \
|
||||
>"$out"
|
||||
|
||||
echo "$out"
|
||||
Reference in New Issue
Block a user