feat: Segment article text for improved playback

Splits article content into smaller audio segments. This allows for more granular control over playback, faster processing, and improved user experience by enabling auto-scrolling to the currently read segment.

Updates `types.ts` to include `AudioSegment` interface and modify `Article` to hold `segments`, `currentSegmentIndex`, and `audioUrl` per segment.

Introduces `segmentText` utility in `services/textUtils.ts` for robust text segmentation logic.

Modifies `App.tsx` to utilize the new segmentation approach for fetching and processing audio.

Enhances `components/ReaderView.tsx` to display and auto-scroll through segmented text, highlighting the current segment during playback.
This commit is contained in:
Anthony
2025-11-19 19:57:43 +08:00
parent 0775104b69
commit 78f1e0e93c
4 changed files with 345 additions and 153 deletions

354
App.tsx
View File

@@ -1,10 +1,12 @@
import React, { useState, useRef, useEffect, useCallback } from 'react';
import { v4 as uuidv4 } from 'uuid';
import { Plus, Play, Pause, SkipForward, SkipBack, Volume2, Gauge, Layout } from 'lucide-react';
import { Article, PlaybackStatus, PlayerState, VoiceName } from './types';
import { Article, PlaybackStatus, PlayerState, VoiceName, AudioSegment } from './types';
import { AVAILABLE_VOICES, MIN_SPEED, MAX_SPEED, SPEED_STEP } from './constants';
import { extractArticleContent, generateSpeechFromText } from './services/geminiService';
import { base64ToUint8Array, createWavBlob } from './services/audioUtils';
import { segmentText } from './services/textUtils';
import { QueueItem } from './components/QueueItem';
import { VoiceSelector } from './components/VoiceSelector';
import { ReaderView } from './components/ReaderView';
@@ -13,7 +15,7 @@ export default function App() {
// -- State --
const [inputUrl, setInputUrl] = useState('');
const [queue, setQueue] = useState<Article[]>([]);
// Selected article for reading (defaults to playing article)
// Selected article for viewing text (separate from playing)
const [viewId, setViewId] = useState<string | null>(null);
const [playerState, setPlayerState] = useState<PlayerState>({
@@ -25,141 +27,248 @@ export default function App() {
// -- Refs --
const audioRef = useRef<HTMLAudioElement>(new Audio());
const audioSrcRef = useRef<string | null>(null);
// Track active processing to prevent duplicate fetch calls
const processingRef = useRef<Set<string>>(new Set());
// -- Helpers --
const getCurrentArticle = () => queue.find(a => a.id === playerState.currentArticleId);
const getViewingArticle = () => {
// If user manually selected an article to view, show that.
// Otherwise show the currently playing one.
// Otherwise show the first one.
if (viewId) return queue.find(a => a.id === viewId);
if (playerState.currentArticleId) return queue.find(a => a.id === playerState.currentArticleId);
if (queue.length > 0) return queue[0];
return null;
};
const updateArticleStatus = (id: string, status: PlaybackStatus, errorMessage?: string, audioUrl?: string, title?: string, text?: string) => {
setQueue(prev => prev.map(item => {
if (item.id !== id) return item;
return {
...item,
status,
errorMessage,
audioUrl: audioUrl || item.audioUrl,
title: title || item.title,
text: text || item.text
};
// -- State Updaters --
const updateArticle = (id: string, updates: Partial<Article>) => {
setQueue(prev => prev.map(item => item.id === id ? { ...item, ...updates } : item));
};
const updateSegment = (articleId: string, segmentId: string, updates: Partial<AudioSegment>) => {
setQueue(prev => prev.map(article => {
if (article.id !== articleId) return article;
const newSegments = article.segments.map(seg =>
seg.id === segmentId ? { ...seg, ...updates } : seg
);
return { ...article, segments: newSegments };
}));
};
// -- Audio Generation Pipeline --
/**
* Fetches audio for a specific segment.
*/
const processSegmentAudio = useCallback(async (articleId: string, segmentId: string, text: string, voice: VoiceName) => {
const uniqueKey = `${articleId}-${segmentId}`;
if (processingRef.current.has(uniqueKey)) return;
processingRef.current.add(uniqueKey);
updateSegment(articleId, segmentId, { isLoading: true });
try {
const base64Audio = await generateSpeechFromText(text, voice);
const pcmData = base64ToUint8Array(base64Audio);
const wavBlob = createWavBlob(pcmData);
const audioUrl = URL.createObjectURL(wavBlob);
updateSegment(articleId, segmentId, { audioUrl, isLoading: false });
} catch (error) {
console.error("Segment generation failed", error);
updateSegment(articleId, segmentId, { isLoading: false, hasError: true });
} finally {
processingRef.current.delete(uniqueKey);
}
}, []);
/**
* Manages the buffer. ensure current segment + next 2 are ready.
*/
const manageBuffer = useCallback(async (article: Article) => {
const currentIndex = article.currentSegmentIndex;
const segmentsToBuffer = article.segments.slice(currentIndex, currentIndex + 3);
for (const seg of segmentsToBuffer) {
if (!seg.audioUrl && !seg.isLoading && !seg.hasError) {
// No await here - we want them to fire in parallel/background
processSegmentAudio(article.id, seg.id, seg.text, playerState.selectedVoice);
}
}
}, [playerState.selectedVoice, processSegmentAudio]);
// -- Handlers --
// 1. Add URL to Queue
const handleAddUrl = async () => {
if (!inputUrl.trim()) return;
const id = uuidv4();
const newArticle: Article = {
id,
url: inputUrl,
title: 'Fetching info...',
text: '',
segments: [],
currentSegmentIndex: 0,
status: PlaybackStatus.LOADING_TEXT
};
setQueue(prev => [...prev, newArticle]);
setInputUrl('');
// Auto view the new article while loading
if (!playerState.isPlaying) {
setViewId(id);
}
if (!playerState.isPlaying) setViewId(id);
// Start fetching text immediately
try {
const { title, text } = await extractArticleContent(newArticle.url);
updateArticleStatus(id, PlaybackStatus.IDLE, undefined, undefined, title, text);
} catch (error: any) {
updateArticleStatus(id, PlaybackStatus.ERROR, error.message || "Failed to load article");
}
};
// 2. Generate Audio for an article
const prepareAudio = async (articleId: string): Promise<string | null> => {
const article = queue.find(a => a.id === articleId);
if (!article) return null;
// 1. Split text into segments immediately
const segments = segmentText(text);
// If already has audio return it
if (article.audioUrl) return article.audioUrl;
updateArticle(id, {
title,
text,
segments,
status: PlaybackStatus.LOADING_AUDIO
});
updateArticleStatus(articleId, PlaybackStatus.LOADING_AUDIO);
// 2. Trigger audio for the first segment immediately
if (segments.length > 0) {
// We manually call the processor for the first one to ensure fast start
const firstSeg = segments[0];
await processSegmentAudio(id, firstSeg.id, firstSeg.text, playerState.selectedVoice);
try {
if (!article.text || article.text.length < 10) {
throw new Error("No text available to read.");
// Once first segment is ready, we are effectively ready to play
updateArticle(id, { status: PlaybackStatus.READY });
// If nothing else is playing, auto-play this
setQueue(prev => {
const current = prev.find(a => a.id === id);
if (current && current.segments[0].audioUrl && !playerState.isPlaying) {
playArticle(id);
}
return prev;
});
} else {
updateArticle(id, { status: PlaybackStatus.ERROR, errorMessage: "No readable text found." });
}
const base64Audio = await generateSpeechFromText(article.text, playerState.selectedVoice);
const pcmData = base64ToUint8Array(base64Audio);
const wavBlob = createWavBlob(pcmData);
const audioUrl = URL.createObjectURL(wavBlob);
updateArticleStatus(articleId, PlaybackStatus.READY, undefined, audioUrl);
return audioUrl;
} catch (error: any) {
updateArticleStatus(articleId, PlaybackStatus.ERROR, error.message || "Failed to generate speech");
return null;
updateArticle(id, {
status: PlaybackStatus.ERROR,
errorMessage: error.message || "Failed to load article"
});
}
};
// 3. Play Logic
// -- Playback Control --
const playArticle = useCallback(async (id: string) => {
const article = queue.find(a => a.id === id);
if (!article) return;
// If currently playing a different one, pause it.
if (playerState.currentArticleId && playerState.currentArticleId !== id) {
audioRef.current.pause();
}
setPlayerState(prev => ({ ...prev, currentArticleId: id, isPlaying: true }));
// Also switch view to the playing article
setViewId(id);
updateArticle(id, { status: PlaybackStatus.PLAYING });
let src = article.audioUrl;
// Check if we need to generate audio
if (!src) {
src = await prepareAudio(id);
}
if (src) {
// Only update src if it's different to avoid reload
if (audioSrcRef.current !== src) {
audioRef.current.src = src;
audioSrcRef.current = src;
// Apply current speed
audioRef.current.playbackRate = playerState.playbackRate;
}
try {
await audioRef.current.play();
updateArticleStatus(id, PlaybackStatus.PLAYING);
} catch (e) {
console.error("Play error", e);
setPlayerState(prev => ({ ...prev, isPlaying: false }));
}
}
}, [queue, playerState.currentArticleId, playerState.playbackRate, playerState.selectedVoice]);
// The actual audio switching is handled by the useEffect monitoring currentArticleId + currentSegmentIndex
}, []);
const pausePlayback = useCallback(() => {
audioRef.current.pause();
setPlayerState(prev => ({ ...prev, isPlaying: false }));
if (playerState.currentArticleId) {
updateArticleStatus(playerState.currentArticleId, PlaybackStatus.PAUSED);
updateArticle(playerState.currentArticleId, { status: PlaybackStatus.PAUSED });
}
}, [playerState.currentArticleId]);
// -- Effects --
// 1. Audio Player Loop
// This effect reacts to changes in article/segment index and loads the correct audio source
useEffect(() => {
const article = queue.find(a => a.id === playerState.currentArticleId);
if (!article || !playerState.isPlaying) return;
const currentSegment = article.segments[article.currentSegmentIndex];
// If finished all segments
if (!currentSegment) {
updateArticle(article.id, { status: PlaybackStatus.COMPLETED });
setPlayerState(prev => ({ ...prev, isPlaying: false }));
return;
}
// Check if audio is ready
if (currentSegment.audioUrl) {
// Only switch src if we aren't already playing it
// We use a custom attribute on the audio element to track current url to avoid state race conditions
const audioEl = audioRef.current;
const currentSrc = audioEl.getAttribute('data-current-src');
if (currentSrc !== currentSegment.audioUrl) {
audioEl.src = currentSegment.audioUrl;
audioEl.setAttribute('data-current-src', currentSegment.audioUrl);
audioEl.playbackRate = playerState.playbackRate;
audioEl.play().catch(e => console.warn("Playback interrupted", e));
} else if (audioEl.paused) {
audioEl.play().catch(e => console.warn("Resume failed", e));
}
} else {
// Buffer stalled?
// If it's loading, show loading state. If not loading and no url, trigger load.
if (!currentSegment.isLoading && !currentSegment.hasError) {
processSegmentAudio(article.id, currentSegment.id, currentSegment.text, playerState.selectedVoice);
}
}
// Always try to buffer ahead
manageBuffer(article);
}, [queue, playerState.currentArticleId, playerState.isPlaying, playerState.playbackRate, playerState.selectedVoice, manageBuffer, processSegmentAudio]);
// 2. Handle 'Ended' event to advance segment
useEffect(() => {
const audio = audioRef.current;
const handleEnded = () => {
const currentId = playerState.currentArticleId;
// We need to get the *latest* queue state to find the current index
// Since we can't easily access 'queue' inside this event listener without recreating the listener constantly,
// we rely on the state setters functional update or a ref.
// However, for simplicity in this specific React structure, let's use the state update pattern.
setQueue(prevQueue => {
const article = prevQueue.find(a => a.id === currentId);
if (!article) return prevQueue;
const nextIndex = article.currentSegmentIndex + 1;
// If we have a next segment, advance index
if (nextIndex < article.segments.length) {
return prevQueue.map(a => a.id === currentId ? { ...a, currentSegmentIndex: nextIndex } : a);
} else {
// Article finished
// Try to play next article in queue
const artIndex = prevQueue.findIndex(a => a.id === currentId);
if (artIndex !== -1 && artIndex < prevQueue.length - 1) {
// Queue next article
setTimeout(() => playArticle(prevQueue[artIndex + 1].id), 100); // Small delay to let state settle
return prevQueue.map(a => a.id === currentId ? { ...a, status: PlaybackStatus.COMPLETED } : a);
} else {
// End of queue
setPlayerState(ps => ({ ...ps, isPlaying: false }));
return prevQueue.map(a => a.id === currentId ? { ...a, status: PlaybackStatus.COMPLETED } : a);
}
}
});
};
audio.addEventListener('ended', handleEnded);
return () => audio.removeEventListener('ended', handleEnded);
}, [playerState.currentArticleId, playArticle]); // removed 'queue' from dependency to avoid re-attaching listener on every segment update
// 3. Handle Speed Change
const handleSpeedChange = (newSpeed: number) => {
// Clamp
const speed = Math.max(MIN_SPEED, Math.min(MAX_SPEED, newSpeed));
setPlayerState(prev => ({ ...prev, playbackRate: speed }));
if (audioRef.current) {
@@ -167,32 +276,6 @@ export default function App() {
}
};
// Auto-Advance Logic
useEffect(() => {
const audio = audioRef.current;
const handleEnded = () => {
const currentId = playerState.currentArticleId;
if (currentId) {
updateArticleStatus(currentId, PlaybackStatus.COMPLETED);
// Find next
const currentIndex = queue.findIndex(a => a.id === currentId);
if (currentIndex !== -1 && currentIndex < queue.length - 1) {
const nextId = queue[currentIndex + 1].id;
playArticle(nextId);
} else {
setPlayerState(prev => ({ ...prev, isPlaying: false }));
}
}
};
audio.addEventListener('ended', handleEnded);
return () => {
audio.removeEventListener('ended', handleEnded);
};
}, [playerState.currentArticleId, queue, playArticle]);
// -- Render --
@@ -219,12 +302,12 @@ export default function App() {
</div>
</header>
{/* Main Content - Split Layout */}
{/* Main Content */}
<main className="flex-grow px-4 py-6 max-w-7xl mx-auto w-full grid grid-cols-1 lg:grid-cols-12 gap-8">
{/* Left Column: Controls & Queue (5 cols) */}
{/* Left Column: Controls & Queue */}
<div className="lg:col-span-5 space-y-6">
{/* Input Section */}
{/* Input */}
<div className="bg-white p-1 rounded-2xl shadow-sm border border-slate-200 flex gap-2 items-center pl-4">
<input
type="url"
@@ -267,8 +350,8 @@ export default function App() {
onPause={pausePlayback}
onRemove={() => {
if (playerState.currentArticleId === article.id) {
pausePlayback();
setPlayerState(prev => ({ ...prev, currentArticleId: null }));
pausePlayback();
setPlayerState(prev => ({ ...prev, currentArticleId: null }));
}
setQueue(prev => prev.filter(a => a.id !== article.id));
if (viewId === article.id) setViewId(null);
@@ -281,12 +364,11 @@ export default function App() {
</div>
</div>
{/* Right Column: Reader View (7 cols) */}
{/* Right Column: Reader View */}
<div className="lg:col-span-7 h-full hidden lg:block">
<ReaderView article={viewingArticle} />
</div>
{/* Mobile: Reader View appears below if selected */}
<div className="lg:hidden block">
{viewingArticle && (
<div className="mt-8">
@@ -299,26 +381,31 @@ export default function App() {
</div>
</main>
{/* Sticky Player */}
{/* Player Bar */}
<div className="fixed bottom-0 left-0 right-0 bg-white/90 backdrop-blur-lg border-t border-slate-200 p-4 pb-6 shadow-[0_-4px_20px_rgba(0,0,0,0.05)] z-30">
<div className="max-w-7xl mx-auto flex flex-col sm:flex-row items-center gap-4 sm:gap-8">
{/* Current Track Info */}
<div className="flex-grow w-full sm:w-auto min-w-0 text-center sm:text-left">
{currentArticle ? (
<div>
<h4 className="font-bold text-slate-900 truncate">{currentArticle.title}</h4>
<p className="text-xs text-slate-500 truncate">Playing from queue</p>
{/* Progress Bar for current segment */}
<div className="w-full h-1 bg-slate-200 rounded-full mt-2 overflow-hidden">
<div
className="h-full bg-blue-500 transition-all duration-300"
style={{ width: `${((currentArticle.currentSegmentIndex + 1) / Math.max(1, currentArticle.segments.length)) * 100}%`}}
/>
</div>
<p className="text-xs text-slate-500 truncate mt-1">
Playing segment {currentArticle.currentSegmentIndex + 1} of {currentArticle.segments.length}
</p>
</div>
) : (
<div className="text-slate-400 text-sm font-medium">Ready to play</div>
)}
</div>
{/* Controls */}
<div className="flex items-center gap-6">
{/* Speed Control */}
<div className="hidden sm:flex items-center gap-2 group relative">
<Gauge className="w-4 h-4 text-slate-400" />
<div className="flex items-center gap-2 bg-slate-100 rounded-lg p-1">
@@ -334,15 +421,19 @@ export default function App() {
</div>
</div>
{/* Main Transport */}
<div className="flex items-center gap-4">
<button
className="p-2 text-slate-400 hover:text-slate-600 transition-colors"
onClick={() => {
const idx = queue.findIndex(a => a.id === playerState.currentArticleId);
if (idx > 0) playArticle(queue[idx - 1].id);
if (currentArticle && currentArticle.currentSegmentIndex > 0) {
// Go back one segment
setQueue(prev => prev.map(a => a.id === currentArticle.id ? { ...a, currentSegmentIndex: a.currentSegmentIndex - 1 } : a));
} else {
// Prev article
const idx = queue.findIndex(a => a.id === playerState.currentArticleId);
if (idx > 0) playArticle(queue[idx - 1].id);
}
}}
disabled={!playerState.currentArticleId || queue.findIndex(a => a.id === playerState.currentArticleId) <= 0}
>
<SkipBack className="w-5 h-5" />
</button>
@@ -362,10 +453,15 @@ export default function App() {
<button
className="p-2 text-slate-400 hover:text-slate-600 transition-colors"
onClick={() => {
const idx = queue.findIndex(a => a.id === playerState.currentArticleId);
if (idx !== -1 && idx < queue.length - 1) playArticle(queue[idx + 1].id);
if (currentArticle && currentArticle.currentSegmentIndex < currentArticle.segments.length - 1) {
// Next segment
setQueue(prev => prev.map(a => a.id === currentArticle.id ? { ...a, currentSegmentIndex: a.currentSegmentIndex + 1 } : a));
} else {
// Next article
const idx = queue.findIndex(a => a.id === playerState.currentArticleId);
if (idx !== -1 && idx < queue.length - 1) playArticle(queue[idx + 1].id);
}
}}
disabled={!playerState.currentArticleId || queue.findIndex(a => a.id === playerState.currentArticleId) >= queue.length - 1}
>
<SkipForward className="w-5 h-5" />
</button>

View File

@@ -1,4 +1,5 @@
import React from 'react';
import React, { useEffect, useRef } from 'react';
import { Article } from '../types';
import { FileText } from 'lucide-react';
@@ -7,6 +8,18 @@ interface ReaderViewProps {
}
export const ReaderView: React.FC<ReaderViewProps> = ({ article }) => {
const scrollRef = useRef<HTMLDivElement>(null);
// Auto-scroll to active segment
useEffect(() => {
if (!article || article.status !== 'PLAYING') return;
const activeEl = document.getElementById(`segment-${article.currentSegmentIndex}`);
if (activeEl && scrollRef.current) {
activeEl.scrollIntoView({ behavior: 'smooth', block: 'center' });
}
}, [article?.currentSegmentIndex, article?.status]);
if (!article) {
return (
<div className="h-full flex flex-col items-center justify-center text-slate-400 p-12 border-2 border-dashed border-slate-200 rounded-2xl bg-slate-50/50">
@@ -17,11 +30,6 @@ export const ReaderView: React.FC<ReaderViewProps> = ({ article }) => {
);
}
// Split text by newlines to create paragraphs
const paragraphs = article.text
? article.text.split('\n').filter(p => p.trim().length > 0)
: [];
return (
<div className="bg-white rounded-2xl border border-slate-200 shadow-sm overflow-hidden h-[calc(100vh-12rem)] flex flex-col">
<div className="p-6 border-b border-slate-100 bg-white sticky top-0 z-10">
@@ -38,18 +46,34 @@ export const ReaderView: React.FC<ReaderViewProps> = ({ article }) => {
</a>
</div>
<div className="flex-grow overflow-y-auto p-6 sm:p-8 space-y-6 custom-scrollbar bg-white">
{paragraphs.length > 0 ? (
paragraphs.map((paragraph, idx) => (
<p key={idx} className="text-lg text-slate-700 leading-relaxed font-serif">
{paragraph}
</p>
))
<div ref={scrollRef} className="flex-grow overflow-y-auto p-6 sm:p-8 space-y-6 custom-scrollbar bg-white">
{article.segments.length > 0 ? (
article.segments.map((segment, idx) => {
const isActive = article.currentSegmentIndex === idx;
return (
<p
key={segment.id}
id={`segment-${idx}`}
className={`text-lg leading-relaxed font-serif transition-colors duration-300 ${
isActive
? 'text-slate-900 bg-blue-50 p-2 rounded-lg -mx-2 border-l-4 border-blue-500'
: 'text-slate-700'
}`}
>
{segment.text}
</p>
);
})
) : (
// Loading State skeleton
<div className="space-y-4 animate-pulse">
<div className="h-4 bg-slate-100 rounded w-3/4"></div>
<div className="h-4 bg-slate-100 rounded w-full"></div>
<div className="h-4 bg-slate-100 rounded w-5/6"></div>
{[1,2,3,4].map(i => (
<div key={i} className="space-y-2">
<div className="h-4 bg-slate-100 rounded w-full"></div>
<div className="h-4 bg-slate-100 rounded w-full"></div>
<div className="h-4 bg-slate-100 rounded w-3/4"></div>
</div>
))}
<p className="text-slate-400 italic mt-4">Extracting article content...</p>
</div>
)}

60
services/textUtils.ts Normal file
View File

@@ -0,0 +1,60 @@
import { v4 as uuidv4 } from 'uuid';
import { AudioSegment } from '../types';
/**
* Splits a long text string into manageable segments for audio generation.
* It prioritizes splitting by newlines (paragraphs), then by sentence endings
* if a paragraph is too long.
*/
export const segmentText = (fullText: string): AudioSegment[] => {
if (!fullText) return [];
// 1. Split by double newlines (paragraphs)
const rawParagraphs = fullText.split(/\n\s*\n/);
const segments: AudioSegment[] = [];
for (const rawPara of rawParagraphs) {
const cleanPara = rawPara.trim();
if (!cleanPara) continue;
// If paragraph is reasonably sized (< 500 chars), keep it as one
if (cleanPara.length < 500) {
segments.push({
id: uuidv4(),
text: cleanPara,
isLoading: false,
hasError: false
});
} else {
// If paragraph is huge, split by sentences to avoid timeouts
const sentences = cleanPara.match(/[^.!?]+[.!?]+["']?|.+/g) || [cleanPara];
let currentChunk = "";
for (const sentence of sentences) {
if (currentChunk.length + sentence.length > 400) {
segments.push({
id: uuidv4(),
text: currentChunk.trim(),
isLoading: false,
hasError: false
});
currentChunk = sentence;
} else {
currentChunk += " " + sentence;
}
}
if (currentChunk.trim()) {
segments.push({
id: uuidv4(),
text: currentChunk.trim(),
isLoading: false,
hasError: false
});
}
}
}
return segments;
};

View File

@@ -1,3 +1,4 @@
export enum VoiceName {
Puck = 'Puck',
Charon = 'Charon',
@@ -17,12 +18,23 @@ export enum PlaybackStatus {
COMPLETED = 'COMPLETED'
}
export interface AudioSegment {
id: string;
text: string;
audioUrl?: string; // Blob URL for this specific segment
isLoading: boolean;
hasError: boolean;
}
export interface Article {
id: string;
url: string;
title: string;
// We keep the full text for display/reference
text: string;
audioUrl?: string; // Blob URL for the WAV file
// We split content into segments for faster playback
segments: AudioSegment[];
currentSegmentIndex: number;
status: PlaybackStatus;
errorMessage?: string;
}