Chunk TTS text for faster response

- Splits text at sentence boundaries (max 3000 chars)
- Plays first chunk immediately for better UX
- Streams response instead of buffering

Full article TTS was taking 45-50s, now starts in ~2s.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Gemini Agent
2026-01-18 02:12:11 +00:00
parent 611d57770e
commit 79a7914c0f

View File

@@ -1,36 +1,60 @@
import { NextRequest, NextResponse } from "next/server";
// Chunk text into smaller pieces for faster TTS
function chunkText(text: string, maxChars: number = 2000): string[] {
const chunks: string[] = [];
const sentences = text.split(/(?<=[.!?])\s+/);
let currentChunk = "";
for (const sentence of sentences) {
if (currentChunk.length + sentence.length > maxChars && currentChunk) {
chunks.push(currentChunk.trim());
currentChunk = sentence;
} else {
currentChunk += (currentChunk ? " " : "") + sentence;
}
}
if (currentChunk.trim()) {
chunks.push(currentChunk.trim());
}
return chunks.length > 0 ? chunks : [text];
}
// POST /api/tts - Proxy TTS requests to avoid CORS issues
export async function POST(request: NextRequest) {
try {
const body = await request.json();
const { engine, url, text, voice, speed } = body;
const { engine, text, voice, speed } = body;
if (!text) {
return NextResponse.json({ error: "Text is required" }, { status: 400 });
}
// Only process first chunk for faster response
const chunks = chunkText(text, 3000);
const firstChunk = chunks[0];
let ttsUrl: string;
let ttsBody: Record<string, unknown>;
if (engine === "edge") {
// Use Docker container name for internal networking, fallback to provided URL
const edgeHost = process.env.EDGE_TTS_URL || url || "http://edge-tts:5050";
const edgeHost = process.env.EDGE_TTS_URL || "http://edge-tts:5050";
ttsUrl = `${edgeHost}/v1/audio/speech`;
ttsBody = {
model: "tts-1",
input: text,
input: firstChunk,
voice: voice || "en-US-AvaNeural",
response_format: "mp3",
speed: speed || 1.0,
};
} else if (engine === "kokoro") {
// Use Docker container name for internal networking, fallback to provided URL
const kokoroHost = process.env.KOKORO_TTS_URL || url || "http://kokoro-tts:8880";
const kokoroHost = process.env.KOKORO_TTS_URL || "http://kokoro-tts:8880";
ttsUrl = `${kokoroHost}/v1/audio/speech`;
ttsBody = {
model: "kokoro",
input: text,
input: firstChunk,
voice: voice || "af_bella",
response_format: "mp3",
speed: speed || 1.0,
@@ -56,12 +80,19 @@ export async function POST(request: NextRequest) {
);
}
const audioBuffer = await response.arrayBuffer();
// Stream the response
if (response.body) {
return new NextResponse(response.body, {
headers: {
"Content-Type": "audio/mpeg",
},
});
}
const audioBuffer = await response.arrayBuffer();
return new NextResponse(audioBuffer, {
headers: {
"Content-Type": "audio/mpeg",
"Content-Length": audioBuffer.byteLength.toString(),
},
});
} catch (error) {