feat: Segment article text for improved playback

Splits article content into smaller audio segments. This allows for more granular control over playback, faster processing, and improved user experience by enabling auto-scrolling to the currently read segment. Updates `types.ts` to include `AudioSegment` interface and modify `Article` to hold `segments`, `currentSegmentIndex`, and `audioUrl` per segment. Introduces `segmentText` utility in `services/textUtils.ts` for robust text segmentation logic. Modifies `App.tsx` to utilize the new segmentation approach for fetching and processing audio. Enhances `components/ReaderView.tsx` to display and auto-scroll through segmented text, highlighting the current segment during playback.
2026-05-24 21:31:44 +08:00 · 2025-11-19 19:57:43 +08:00
parent 0775104b69
commit 78f1e0e93c
4 changed files with 345 additions and 153 deletions
--- a/services/textUtils.ts
+++ b/services/textUtils.ts
@@ -0,0 +1,60 @@
+
+import { v4 as uuidv4 } from 'uuid';
+import { AudioSegment } from '../types';
+
+/**
+ * Splits a long text string into manageable segments for audio generation.
+ * It prioritizes splitting by newlines (paragraphs), then by sentence endings
+ * if a paragraph is too long.
+ */
+export const segmentText = (fullText: string): AudioSegment[] => {
+  if (!fullText) return [];
+
+  // 1. Split by double newlines (paragraphs)
+  const rawParagraphs = fullText.split(/\n\s*\n/);
+  
+  const segments: AudioSegment[] = [];
+
+  for (const rawPara of rawParagraphs) {
+    const cleanPara = rawPara.trim();
+    if (!cleanPara) continue;
+
+    // If paragraph is reasonably sized (< 500 chars), keep it as one
+    if (cleanPara.length < 500) {
+      segments.push({
+        id: uuidv4(),
+        text: cleanPara,
+        isLoading: false,
+        hasError: false
+      });
+    } else {
+      // If paragraph is huge, split by sentences to avoid timeouts
+      const sentences = cleanPara.match(/[^.!?]+[.!?]+["']?|.+/g) || [cleanPara];
+      let currentChunk = "";
+
+      for (const sentence of sentences) {
+        if (currentChunk.length + sentence.length > 400) {
+          segments.push({
+            id: uuidv4(),
+            text: currentChunk.trim(),
+            isLoading: false,
+            hasError: false
+          });
+          currentChunk = sentence;
+        } else {
+          currentChunk += " " + sentence;
+        }
+      }
+      if (currentChunk.trim()) {
+        segments.push({
+          id: uuidv4(),
+          text: currentChunk.trim(),
+          isLoading: false,
+          hasError: false
+        });
+      }
+    }
+  }
+
+  return segments;
+};