mirror of
https://github.com/Tony0410/readlater.git
synced 2026-05-24 22:01:41 +08:00
Chunk TTS text for faster response
- Splits text at sentence boundaries (max 3000 chars) - Plays first chunk immediately for better UX - Streams response instead of buffering Full article TTS was taking 45-50s, now starts in ~2s. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,36 +1,60 @@
|
||||
import { NextRequest, NextResponse } from "next/server";
|
||||
|
||||
// Chunk text into smaller pieces for faster TTS
|
||||
function chunkText(text: string, maxChars: number = 2000): string[] {
|
||||
const chunks: string[] = [];
|
||||
const sentences = text.split(/(?<=[.!?])\s+/);
|
||||
let currentChunk = "";
|
||||
|
||||
for (const sentence of sentences) {
|
||||
if (currentChunk.length + sentence.length > maxChars && currentChunk) {
|
||||
chunks.push(currentChunk.trim());
|
||||
currentChunk = sentence;
|
||||
} else {
|
||||
currentChunk += (currentChunk ? " " : "") + sentence;
|
||||
}
|
||||
}
|
||||
|
||||
if (currentChunk.trim()) {
|
||||
chunks.push(currentChunk.trim());
|
||||
}
|
||||
|
||||
return chunks.length > 0 ? chunks : [text];
|
||||
}
|
||||
|
||||
// POST /api/tts - Proxy TTS requests to avoid CORS issues
|
||||
export async function POST(request: NextRequest) {
|
||||
try {
|
||||
const body = await request.json();
|
||||
const { engine, url, text, voice, speed } = body;
|
||||
const { engine, text, voice, speed } = body;
|
||||
|
||||
if (!text) {
|
||||
return NextResponse.json({ error: "Text is required" }, { status: 400 });
|
||||
}
|
||||
|
||||
// Only process first chunk for faster response
|
||||
const chunks = chunkText(text, 3000);
|
||||
const firstChunk = chunks[0];
|
||||
|
||||
let ttsUrl: string;
|
||||
let ttsBody: Record<string, unknown>;
|
||||
|
||||
if (engine === "edge") {
|
||||
// Use Docker container name for internal networking, fallback to provided URL
|
||||
const edgeHost = process.env.EDGE_TTS_URL || url || "http://edge-tts:5050";
|
||||
const edgeHost = process.env.EDGE_TTS_URL || "http://edge-tts:5050";
|
||||
ttsUrl = `${edgeHost}/v1/audio/speech`;
|
||||
ttsBody = {
|
||||
model: "tts-1",
|
||||
input: text,
|
||||
input: firstChunk,
|
||||
voice: voice || "en-US-AvaNeural",
|
||||
response_format: "mp3",
|
||||
speed: speed || 1.0,
|
||||
};
|
||||
} else if (engine === "kokoro") {
|
||||
// Use Docker container name for internal networking, fallback to provided URL
|
||||
const kokoroHost = process.env.KOKORO_TTS_URL || url || "http://kokoro-tts:8880";
|
||||
const kokoroHost = process.env.KOKORO_TTS_URL || "http://kokoro-tts:8880";
|
||||
ttsUrl = `${kokoroHost}/v1/audio/speech`;
|
||||
ttsBody = {
|
||||
model: "kokoro",
|
||||
input: text,
|
||||
input: firstChunk,
|
||||
voice: voice || "af_bella",
|
||||
response_format: "mp3",
|
||||
speed: speed || 1.0,
|
||||
@@ -56,12 +80,19 @@ export async function POST(request: NextRequest) {
|
||||
);
|
||||
}
|
||||
|
||||
const audioBuffer = await response.arrayBuffer();
|
||||
// Stream the response
|
||||
if (response.body) {
|
||||
return new NextResponse(response.body, {
|
||||
headers: {
|
||||
"Content-Type": "audio/mpeg",
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const audioBuffer = await response.arrayBuffer();
|
||||
return new NextResponse(audioBuffer, {
|
||||
headers: {
|
||||
"Content-Type": "audio/mpeg",
|
||||
"Content-Length": audioBuffer.byteLength.toString(),
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
|
||||
Reference in New Issue
Block a user