import { v4 as uuidv4 } from 'uuid'; import { AudioSegment } from '../types'; // Progressive chunking constants const CHUNK_SIZE_START = 150; // Very small for first few segments (fast gen) const CHUNK_SIZE_RAMP = 400; // Medium size const CHUNK_SIZE_FULL = 1000; // Full size for efficiency const RAMP_UP_COUNT = 2; // How many small segments before ramping up /** * Splits text into segments that start small and get larger. * This allows playback to begin almost immediately (streaming feel) * while maintaining efficiency for the rest of the article. */ export const segmentText = (fullText: string): AudioSegment[] => { if (!fullText) return []; const segments: AudioSegment[] = []; // 1. Split by rough sentence structure first to avoid breaking mid-sentence // We match delimiters but keep them attached to the previous sentence const rawSentences = fullText.match(/[^.!?]+[.!?]+["']?|.+/g) || [fullText]; let currentBuffer = ""; let segmentCount = 0; for (const sentence of rawSentences) { const cleanSentence = sentence.trim(); if (!cleanSentence) continue; // Determine target size based on how far into the article we are let targetSize = CHUNK_SIZE_FULL; if (segmentCount === 0) targetSize = CHUNK_SIZE_START; // First segment: Super fast else if (segmentCount <= RAMP_UP_COUNT) targetSize = CHUNK_SIZE_RAMP; // Next few: Medium // Predicted size if we add this sentence const predictedSize = currentBuffer.length + cleanSentence.length; // If buffer is empty, just add it if (currentBuffer.length === 0) { currentBuffer = cleanSentence; } // If adding makes it too big, flush current buffer else if (predictedSize > targetSize) { segments.push(createSegment(currentBuffer)); segmentCount++; currentBuffer = cleanSentence; } // Otherwise, append else { currentBuffer += " " + cleanSentence; } } // Flush remaining if (currentBuffer) { segments.push(createSegment(currentBuffer)); } return segments; }; const createSegment = (text: string): AudioSegment => ({ id: uuidv4(), text: text.trim(), isLoading: false, hasError: false });