Files
news-reader-actions-test/services/textUtils.ts
Anthony 78f1e0e93c feat: Segment article text for improved playback
Splits article content into smaller audio segments. This allows for more granular control over playback, faster processing, and improved user experience by enabling auto-scrolling to the currently read segment.

Updates `types.ts` to include `AudioSegment` interface and modify `Article` to hold `segments`, `currentSegmentIndex`, and `audioUrl` per segment.

Introduces `segmentText` utility in `services/textUtils.ts` for robust text segmentation logic.

Modifies `App.tsx` to utilize the new segmentation approach for fetching and processing audio.

Enhances `components/ReaderView.tsx` to display and auto-scroll through segmented text, highlighting the current segment during playback.
2025-11-19 19:57:43 +08:00

61 lines
1.6 KiB
TypeScript

import { v4 as uuidv4 } from 'uuid';
import { AudioSegment } from '../types';
/**
* Splits a long text string into manageable segments for audio generation.
* It prioritizes splitting by newlines (paragraphs), then by sentence endings
* if a paragraph is too long.
*/
export const segmentText = (fullText: string): AudioSegment[] => {
if (!fullText) return [];
// 1. Split by double newlines (paragraphs)
const rawParagraphs = fullText.split(/\n\s*\n/);
const segments: AudioSegment[] = [];
for (const rawPara of rawParagraphs) {
const cleanPara = rawPara.trim();
if (!cleanPara) continue;
// If paragraph is reasonably sized (< 500 chars), keep it as one
if (cleanPara.length < 500) {
segments.push({
id: uuidv4(),
text: cleanPara,
isLoading: false,
hasError: false
});
} else {
// If paragraph is huge, split by sentences to avoid timeouts
const sentences = cleanPara.match(/[^.!?]+[.!?]+["']?|.+/g) || [cleanPara];
let currentChunk = "";
for (const sentence of sentences) {
if (currentChunk.length + sentence.length > 400) {
segments.push({
id: uuidv4(),
text: currentChunk.trim(),
isLoading: false,
hasError: false
});
currentChunk = sentence;
} else {
currentChunk += " " + sentence;
}
}
if (currentChunk.trim()) {
segments.push({
id: uuidv4(),
text: currentChunk.trim(),
isLoading: false,
hasError: false
});
}
}
}
return segments;
};