mirror of
https://github.com/Tony0410/News-reader-pro.git
synced 2026-05-24 21:31:44 +08:00
Splits article content into smaller audio segments. This allows for more granular control over playback, faster processing, and improved user experience by enabling auto-scrolling to the currently read segment. Updates `types.ts` to include `AudioSegment` interface and modify `Article` to hold `segments`, `currentSegmentIndex`, and `audioUrl` per segment. Introduces `segmentText` utility in `services/textUtils.ts` for robust text segmentation logic. Modifies `App.tsx` to utilize the new segmentation approach for fetching and processing audio. Enhances `components/ReaderView.tsx` to display and auto-scroll through segmented text, highlighting the current segment during playback.
61 lines
1.6 KiB
TypeScript
61 lines
1.6 KiB
TypeScript
|
|
import { v4 as uuidv4 } from 'uuid';
|
|
import { AudioSegment } from '../types';
|
|
|
|
/**
|
|
* Splits a long text string into manageable segments for audio generation.
|
|
* It prioritizes splitting by newlines (paragraphs), then by sentence endings
|
|
* if a paragraph is too long.
|
|
*/
|
|
export const segmentText = (fullText: string): AudioSegment[] => {
|
|
if (!fullText) return [];
|
|
|
|
// 1. Split by double newlines (paragraphs)
|
|
const rawParagraphs = fullText.split(/\n\s*\n/);
|
|
|
|
const segments: AudioSegment[] = [];
|
|
|
|
for (const rawPara of rawParagraphs) {
|
|
const cleanPara = rawPara.trim();
|
|
if (!cleanPara) continue;
|
|
|
|
// If paragraph is reasonably sized (< 500 chars), keep it as one
|
|
if (cleanPara.length < 500) {
|
|
segments.push({
|
|
id: uuidv4(),
|
|
text: cleanPara,
|
|
isLoading: false,
|
|
hasError: false
|
|
});
|
|
} else {
|
|
// If paragraph is huge, split by sentences to avoid timeouts
|
|
const sentences = cleanPara.match(/[^.!?]+[.!?]+["']?|.+/g) || [cleanPara];
|
|
let currentChunk = "";
|
|
|
|
for (const sentence of sentences) {
|
|
if (currentChunk.length + sentence.length > 400) {
|
|
segments.push({
|
|
id: uuidv4(),
|
|
text: currentChunk.trim(),
|
|
isLoading: false,
|
|
hasError: false
|
|
});
|
|
currentChunk = sentence;
|
|
} else {
|
|
currentChunk += " " + sentence;
|
|
}
|
|
}
|
|
if (currentChunk.trim()) {
|
|
segments.push({
|
|
id: uuidv4(),
|
|
text: currentChunk.trim(),
|
|
isLoading: false,
|
|
hasError: false
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
return segments;
|
|
};
|