diff --git a/src/app/api/tts/route.ts b/src/app/api/tts/route.ts index 6bc87f9..db122d9 100644 --- a/src/app/api/tts/route.ts +++ b/src/app/api/tts/route.ts @@ -1,36 +1,60 @@ import { NextRequest, NextResponse } from "next/server"; +// Chunk text into smaller pieces for faster TTS +function chunkText(text: string, maxChars: number = 2000): string[] { + const chunks: string[] = []; + const sentences = text.split(/(?<=[.!?])\s+/); + let currentChunk = ""; + + for (const sentence of sentences) { + if (currentChunk.length + sentence.length > maxChars && currentChunk) { + chunks.push(currentChunk.trim()); + currentChunk = sentence; + } else { + currentChunk += (currentChunk ? " " : "") + sentence; + } + } + + if (currentChunk.trim()) { + chunks.push(currentChunk.trim()); + } + + return chunks.length > 0 ? chunks : [text]; +} + // POST /api/tts - Proxy TTS requests to avoid CORS issues export async function POST(request: NextRequest) { try { const body = await request.json(); - const { engine, url, text, voice, speed } = body; + const { engine, text, voice, speed } = body; if (!text) { return NextResponse.json({ error: "Text is required" }, { status: 400 }); } + // Only process first chunk for faster response + const chunks = chunkText(text, 3000); + const firstChunk = chunks[0]; + let ttsUrl: string; let ttsBody: Record; if (engine === "edge") { - // Use Docker container name for internal networking, fallback to provided URL - const edgeHost = process.env.EDGE_TTS_URL || url || "http://edge-tts:5050"; + const edgeHost = process.env.EDGE_TTS_URL || "http://edge-tts:5050"; ttsUrl = `${edgeHost}/v1/audio/speech`; ttsBody = { model: "tts-1", - input: text, + input: firstChunk, voice: voice || "en-US-AvaNeural", response_format: "mp3", speed: speed || 1.0, }; } else if (engine === "kokoro") { - // Use Docker container name for internal networking, fallback to provided URL - const kokoroHost = process.env.KOKORO_TTS_URL || url || "http://kokoro-tts:8880"; + const kokoroHost = process.env.KOKORO_TTS_URL || "http://kokoro-tts:8880"; ttsUrl = `${kokoroHost}/v1/audio/speech`; ttsBody = { model: "kokoro", - input: text, + input: firstChunk, voice: voice || "af_bella", response_format: "mp3", speed: speed || 1.0, @@ -56,12 +80,19 @@ export async function POST(request: NextRequest) { ); } - const audioBuffer = await response.arrayBuffer(); + // Stream the response + if (response.body) { + return new NextResponse(response.body, { + headers: { + "Content-Type": "audio/mpeg", + }, + }); + } + const audioBuffer = await response.arrayBuffer(); return new NextResponse(audioBuffer, { headers: { "Content-Type": "audio/mpeg", - "Content-Length": audioBuffer.byteLength.toString(), }, }); } catch (error) {