import { v4 as uuidv4 } from 'uuid'; import { AudioSegment } from '../types'; /** * Splits a long text string into manageable segments for audio generation. * It prioritizes splitting by newlines (paragraphs), then by sentence endings * if a paragraph is too long. */ export const segmentText = (fullText: string): AudioSegment[] => { if (!fullText) return []; // 1. Split by double newlines (paragraphs) const rawParagraphs = fullText.split(/\n\s*\n/); const segments: AudioSegment[] = []; for (const rawPara of rawParagraphs) { const cleanPara = rawPara.trim(); if (!cleanPara) continue; // If paragraph is reasonably sized (< 500 chars), keep it as one if (cleanPara.length < 500) { segments.push({ id: uuidv4(), text: cleanPara, isLoading: false, hasError: false }); } else { // If paragraph is huge, split by sentences to avoid timeouts const sentences = cleanPara.match(/[^.!?]+[.!?]+["']?|.+/g) || [cleanPara]; let currentChunk = ""; for (const sentence of sentences) { if (currentChunk.length + sentence.length > 400) { segments.push({ id: uuidv4(), text: currentChunk.trim(), isLoading: false, hasError: false }); currentChunk = sentence; } else { currentChunk += " " + sentence; } } if (currentChunk.trim()) { segments.push({ id: uuidv4(), text: currentChunk.trim(), isLoading: false, hasError: false }); } } } return segments; };