News-reader-pro/services/textUtils.ts


import { v4 as uuidv4 } from 'uuid';
import { AudioSegment } from '../types';

/**
 * Splits a long text string into manageable segments for audio generation.
 * It prioritizes splitting by newlines (paragraphs), then by sentence endings
 * if a paragraph is too long.
 */
export const segmentText = (fullText: string): AudioSegment[] => {
  if (!fullText) return [];

  // 1. Split by double newlines (paragraphs)
  const rawParagraphs = fullText.split(/\n\s*\n/);

  const segments: AudioSegment[] = [];

  for (const rawPara of rawParagraphs) {
    const cleanPara = rawPara.trim();
    if (!cleanPara) continue;

    // If paragraph is reasonably sized (< 500 chars), keep it as one
    if (cleanPara.length < 500) {
      segments.push({
        id: uuidv4(),
        text: cleanPara,
        isLoading: false,
        hasError: false
      });
    } else {
      // If paragraph is huge, split by sentences to avoid timeouts
      const sentences = cleanPara.match(/[^.!?]+[.!?]+["']?|.+/g) || [cleanPara];
      let currentChunk = "";

      for (const sentence of sentences) {
        if (currentChunk.length + sentence.length > 400) {
          segments.push({
            id: uuidv4(),
            text: currentChunk.trim(),
            isLoading: false,
            hasError: false
          });
          currentChunk = sentence;
        } else {
          currentChunk += " " + sentence;
        }
      }
      if (currentChunk.trim()) {
        segments.push({
          id: uuidv4(),
          text: currentChunk.trim(),
          isLoading: false,
          hasError: false
        });
      }
    }
  }

  return segments;
};