mirror of
https://github.com/Tony0410/readlater.git
synced 2026-05-24 22:01:41 +08:00
Chunk TTS text for faster response
- Splits text at sentence boundaries (max 3000 chars) - Plays first chunk immediately for better UX - Streams response instead of buffering Full article TTS was taking 45-50s, now starts in ~2s. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,36 +1,60 @@
|
|||||||
import { NextRequest, NextResponse } from "next/server";
|
import { NextRequest, NextResponse } from "next/server";
|
||||||
|
|
||||||
|
// Chunk text into smaller pieces for faster TTS
|
||||||
|
function chunkText(text: string, maxChars: number = 2000): string[] {
|
||||||
|
const chunks: string[] = [];
|
||||||
|
const sentences = text.split(/(?<=[.!?])\s+/);
|
||||||
|
let currentChunk = "";
|
||||||
|
|
||||||
|
for (const sentence of sentences) {
|
||||||
|
if (currentChunk.length + sentence.length > maxChars && currentChunk) {
|
||||||
|
chunks.push(currentChunk.trim());
|
||||||
|
currentChunk = sentence;
|
||||||
|
} else {
|
||||||
|
currentChunk += (currentChunk ? " " : "") + sentence;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (currentChunk.trim()) {
|
||||||
|
chunks.push(currentChunk.trim());
|
||||||
|
}
|
||||||
|
|
||||||
|
return chunks.length > 0 ? chunks : [text];
|
||||||
|
}
|
||||||
|
|
||||||
// POST /api/tts - Proxy TTS requests to avoid CORS issues
|
// POST /api/tts - Proxy TTS requests to avoid CORS issues
|
||||||
export async function POST(request: NextRequest) {
|
export async function POST(request: NextRequest) {
|
||||||
try {
|
try {
|
||||||
const body = await request.json();
|
const body = await request.json();
|
||||||
const { engine, url, text, voice, speed } = body;
|
const { engine, text, voice, speed } = body;
|
||||||
|
|
||||||
if (!text) {
|
if (!text) {
|
||||||
return NextResponse.json({ error: "Text is required" }, { status: 400 });
|
return NextResponse.json({ error: "Text is required" }, { status: 400 });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Only process first chunk for faster response
|
||||||
|
const chunks = chunkText(text, 3000);
|
||||||
|
const firstChunk = chunks[0];
|
||||||
|
|
||||||
let ttsUrl: string;
|
let ttsUrl: string;
|
||||||
let ttsBody: Record<string, unknown>;
|
let ttsBody: Record<string, unknown>;
|
||||||
|
|
||||||
if (engine === "edge") {
|
if (engine === "edge") {
|
||||||
// Use Docker container name for internal networking, fallback to provided URL
|
const edgeHost = process.env.EDGE_TTS_URL || "http://edge-tts:5050";
|
||||||
const edgeHost = process.env.EDGE_TTS_URL || url || "http://edge-tts:5050";
|
|
||||||
ttsUrl = `${edgeHost}/v1/audio/speech`;
|
ttsUrl = `${edgeHost}/v1/audio/speech`;
|
||||||
ttsBody = {
|
ttsBody = {
|
||||||
model: "tts-1",
|
model: "tts-1",
|
||||||
input: text,
|
input: firstChunk,
|
||||||
voice: voice || "en-US-AvaNeural",
|
voice: voice || "en-US-AvaNeural",
|
||||||
response_format: "mp3",
|
response_format: "mp3",
|
||||||
speed: speed || 1.0,
|
speed: speed || 1.0,
|
||||||
};
|
};
|
||||||
} else if (engine === "kokoro") {
|
} else if (engine === "kokoro") {
|
||||||
// Use Docker container name for internal networking, fallback to provided URL
|
const kokoroHost = process.env.KOKORO_TTS_URL || "http://kokoro-tts:8880";
|
||||||
const kokoroHost = process.env.KOKORO_TTS_URL || url || "http://kokoro-tts:8880";
|
|
||||||
ttsUrl = `${kokoroHost}/v1/audio/speech`;
|
ttsUrl = `${kokoroHost}/v1/audio/speech`;
|
||||||
ttsBody = {
|
ttsBody = {
|
||||||
model: "kokoro",
|
model: "kokoro",
|
||||||
input: text,
|
input: firstChunk,
|
||||||
voice: voice || "af_bella",
|
voice: voice || "af_bella",
|
||||||
response_format: "mp3",
|
response_format: "mp3",
|
||||||
speed: speed || 1.0,
|
speed: speed || 1.0,
|
||||||
@@ -56,12 +80,19 @@ export async function POST(request: NextRequest) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
const audioBuffer = await response.arrayBuffer();
|
// Stream the response
|
||||||
|
if (response.body) {
|
||||||
|
return new NextResponse(response.body, {
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "audio/mpeg",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const audioBuffer = await response.arrayBuffer();
|
||||||
return new NextResponse(audioBuffer, {
|
return new NextResponse(audioBuffer, {
|
||||||
headers: {
|
headers: {
|
||||||
"Content-Type": "audio/mpeg",
|
"Content-Type": "audio/mpeg",
|
||||||
"Content-Length": audioBuffer.byteLength.toString(),
|
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
|||||||
Reference in New Issue
Block a user