feat: Segment article text for improved playback

Splits article content into smaller audio segments. This allows for more granular control over playback, faster processing, and improved user experience by enabling auto-scrolling to the currently read segment.

Updates `types.ts` to include `AudioSegment` interface and modify `Article` to hold `segments`, `currentSegmentIndex`, and `audioUrl` per segment.

Introduces `segmentText` utility in `services/textUtils.ts` for robust text segmentation logic.

Modifies `App.tsx` to utilize the new segmentation approach for fetching and processing audio.

Enhances `components/ReaderView.tsx` to display and auto-scroll through segmented text, highlighting the current segment during playback.
This commit is contained in:
Anthony
2025-11-19 19:57:43 +08:00
parent 0775104b69
commit 78f1e0e93c
4 changed files with 345 additions and 153 deletions

60
services/textUtils.ts Normal file
View File

@@ -0,0 +1,60 @@
import { v4 as uuidv4 } from 'uuid';
import { AudioSegment } from '../types';
/**
* Splits a long text string into manageable segments for audio generation.
* It prioritizes splitting by newlines (paragraphs), then by sentence endings
* if a paragraph is too long.
*/
export const segmentText = (fullText: string): AudioSegment[] => {
if (!fullText) return [];
// 1. Split by double newlines (paragraphs)
const rawParagraphs = fullText.split(/\n\s*\n/);
const segments: AudioSegment[] = [];
for (const rawPara of rawParagraphs) {
const cleanPara = rawPara.trim();
if (!cleanPara) continue;
// If paragraph is reasonably sized (< 500 chars), keep it as one
if (cleanPara.length < 500) {
segments.push({
id: uuidv4(),
text: cleanPara,
isLoading: false,
hasError: false
});
} else {
// If paragraph is huge, split by sentences to avoid timeouts
const sentences = cleanPara.match(/[^.!?]+[.!?]+["']?|.+/g) || [cleanPara];
let currentChunk = "";
for (const sentence of sentences) {
if (currentChunk.length + sentence.length > 400) {
segments.push({
id: uuidv4(),
text: currentChunk.trim(),
isLoading: false,
hasError: false
});
currentChunk = sentence;
} else {
currentChunk += " " + sentence;
}
}
if (currentChunk.trim()) {
segments.push({
id: uuidv4(),
text: currentChunk.trim(),
isLoading: false,
hasError: false
});
}
}
}
return segments;
};