feat: Segment article text for improved playback
Splits article content into smaller audio segments. This allows for more granular control over playback, faster processing, and improved user experience by enabling auto-scrolling to the currently read segment. Updates `types.ts` to include `AudioSegment` interface and modify `Article` to hold `segments`, `currentSegmentIndex`, and `audioUrl` per segment. Introduces `segmentText` utility in `services/textUtils.ts` for robust text segmentation logic. Modifies `App.tsx` to utilize the new segmentation approach for fetching and processing audio. Enhances `components/ReaderView.tsx` to display and auto-scroll through segmented text, highlighting the current segment during playback.
This commit is contained in:
60
services/textUtils.ts
Normal file
60
services/textUtils.ts
Normal file
@@ -0,0 +1,60 @@
|
||||
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
import { AudioSegment } from '../types';
|
||||
|
||||
/**
|
||||
* Splits a long text string into manageable segments for audio generation.
|
||||
* It prioritizes splitting by newlines (paragraphs), then by sentence endings
|
||||
* if a paragraph is too long.
|
||||
*/
|
||||
export const segmentText = (fullText: string): AudioSegment[] => {
|
||||
if (!fullText) return [];
|
||||
|
||||
// 1. Split by double newlines (paragraphs)
|
||||
const rawParagraphs = fullText.split(/\n\s*\n/);
|
||||
|
||||
const segments: AudioSegment[] = [];
|
||||
|
||||
for (const rawPara of rawParagraphs) {
|
||||
const cleanPara = rawPara.trim();
|
||||
if (!cleanPara) continue;
|
||||
|
||||
// If paragraph is reasonably sized (< 500 chars), keep it as one
|
||||
if (cleanPara.length < 500) {
|
||||
segments.push({
|
||||
id: uuidv4(),
|
||||
text: cleanPara,
|
||||
isLoading: false,
|
||||
hasError: false
|
||||
});
|
||||
} else {
|
||||
// If paragraph is huge, split by sentences to avoid timeouts
|
||||
const sentences = cleanPara.match(/[^.!?]+[.!?]+["']?|.+/g) || [cleanPara];
|
||||
let currentChunk = "";
|
||||
|
||||
for (const sentence of sentences) {
|
||||
if (currentChunk.length + sentence.length > 400) {
|
||||
segments.push({
|
||||
id: uuidv4(),
|
||||
text: currentChunk.trim(),
|
||||
isLoading: false,
|
||||
hasError: false
|
||||
});
|
||||
currentChunk = sentence;
|
||||
} else {
|
||||
currentChunk += " " + sentence;
|
||||
}
|
||||
}
|
||||
if (currentChunk.trim()) {
|
||||
segments.push({
|
||||
id: uuidv4(),
|
||||
text: currentChunk.trim(),
|
||||
isLoading: false,
|
||||
hasError: false
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return segments;
|
||||
};
|
||||
Reference in New Issue
Block a user