Files
news-reader-actions-test/services/textUtils.ts
Anthony 417d48ffdf feat: Improve text segmentation for faster playback
Implement a progressive text segmentation strategy. The first few segments are intentionally kept very short to allow playback to start almost immediately, creating a more responsive feel. As more segments are processed, their length gradually increases to optimize audio generation efficiency for the remainder of the article. Additionally, the title is now prepended as the very first segment. The buffer ahead is also increased to 5 segments to ensure content is ready.

Further refinements include:
- Enhanced voice descriptions in constants.
- Improved segment styling in ReaderView for better visual active state indication.
2025-11-19 20:15:39 +08:00

70 lines
2.2 KiB
TypeScript

import { v4 as uuidv4 } from 'uuid';
import { AudioSegment } from '../types';
// Progressive chunking constants
const CHUNK_SIZE_START = 150; // Very small for first few segments (fast gen)
const CHUNK_SIZE_RAMP = 400; // Medium size
const CHUNK_SIZE_FULL = 1000; // Full size for efficiency
const RAMP_UP_COUNT = 2; // How many small segments before ramping up
/**
* Splits text into segments that start small and get larger.
* This allows playback to begin almost immediately (streaming feel)
* while maintaining efficiency for the rest of the article.
*/
export const segmentText = (fullText: string): AudioSegment[] => {
if (!fullText) return [];
const segments: AudioSegment[] = [];
// 1. Split by rough sentence structure first to avoid breaking mid-sentence
// We match delimiters but keep them attached to the previous sentence
const rawSentences = fullText.match(/[^.!?]+[.!?]+["']?|.+/g) || [fullText];
let currentBuffer = "";
let segmentCount = 0;
for (const sentence of rawSentences) {
const cleanSentence = sentence.trim();
if (!cleanSentence) continue;
// Determine target size based on how far into the article we are
let targetSize = CHUNK_SIZE_FULL;
if (segmentCount === 0) targetSize = CHUNK_SIZE_START; // First segment: Super fast
else if (segmentCount <= RAMP_UP_COUNT) targetSize = CHUNK_SIZE_RAMP; // Next few: Medium
// Predicted size if we add this sentence
const predictedSize = currentBuffer.length + cleanSentence.length;
// If buffer is empty, just add it
if (currentBuffer.length === 0) {
currentBuffer = cleanSentence;
}
// If adding makes it too big, flush current buffer
else if (predictedSize > targetSize) {
segments.push(createSegment(currentBuffer));
segmentCount++;
currentBuffer = cleanSentence;
}
// Otherwise, append
else {
currentBuffer += " " + cleanSentence;
}
}
// Flush remaining
if (currentBuffer) {
segments.push(createSegment(currentBuffer));
}
return segments;
};
const createSegment = (text: string): AudioSegment => ({
id: uuidv4(),
text: text.trim(),
isLoading: false,
hasError: false
});