feat: Enhance article segment navigation

Implement segment selection in ReaderView for user-driven playback control. This change allows users to click on specific segments within an article to jump to and play that segment directly.

The Gemini service's HTML parsing has also been simplified by removing redundant selectors and focusing on essential tag removal for more efficient text extraction.
This commit is contained in:
Anthony
2025-11-19 20:28:14 +08:00
parent 8e902fd9c1
commit dadebf8cd0
3 changed files with 35 additions and 47 deletions

14
App.tsx
View File

@@ -198,6 +198,18 @@ export default function App() {
});
}, [playerState.currentArticleId]);
const handleSegmentSelect = useCallback((articleId: string, index: number) => {
setPlayerState(prev => ({
...prev,
currentArticleId: articleId,
isPlaying: true
}));
updateArticle(articleId, {
currentSegmentIndex: index,
status: PlaybackStatus.PLAYING
});
}, []);
// -- Keyboard Shortcuts --
useEffect(() => {
const handleKeyDown = (e: KeyboardEvent) => {
@@ -478,6 +490,7 @@ export default function App() {
article={viewingArticle}
settings={settings}
onToggleAutoScroll={() => setSettings(s => ({...s, autoScroll: !s.autoScroll}))}
onSegmentSelect={(index) => viewingArticle && handleSegmentSelect(viewingArticle.id, index)}
/>
</div>
@@ -490,6 +503,7 @@ export default function App() {
article={viewingArticle}
settings={settings}
onToggleAutoScroll={() => setSettings(s => ({...s, autoScroll: !s.autoScroll}))}
onSegmentSelect={(index) => viewingArticle && handleSegmentSelect(viewingArticle.id, index)}
/>
</div>
</div>

View File

@@ -7,9 +7,10 @@ interface ReaderViewProps {
article?: Article | null;
settings?: ReaderSettings;
onToggleAutoScroll?: () => void;
onSegmentSelect?: (index: number) => void;
}
export const ReaderView: React.FC<ReaderViewProps> = ({ article, settings, onToggleAutoScroll }) => {
export const ReaderView: React.FC<ReaderViewProps> = ({ article, settings, onToggleAutoScroll, onSegmentSelect }) => {
const scrollRef = useRef<HTMLDivElement>(null);
// Auto-scroll to active segment
@@ -100,7 +101,7 @@ export const ReaderView: React.FC<ReaderViewProps> = ({ article, settings, onTog
<div
ref={scrollRef}
className={`flex-grow overflow-y-auto p-6 sm:p-8 space-y-6 custom-scrollbar bg-white dark:bg-slate-900 transition-colors duration-300 ${getFontClass()} ${getSizeClass()}`}
className={`flex-grow overflow-y-auto p-6 sm:p-8 space-y-1 custom-scrollbar bg-white dark:bg-slate-900 transition-colors duration-300 ${getFontClass()} ${getSizeClass()}`}
>
{article.segments.length > 0 ? (
article.segments.map((segment, idx) => {
@@ -109,11 +110,16 @@ export const ReaderView: React.FC<ReaderViewProps> = ({ article, settings, onTog
<div
key={segment.id}
id={`segment-${idx}`}
className={`transition-all duration-300 whitespace-pre-wrap ${getLeadingClass()} ${
isActive
? 'text-slate-900 dark:text-white bg-blue-50 dark:bg-blue-900/20 p-4 rounded-lg -mx-4 border-l-4 border-blue-500 shadow-sm'
: 'text-slate-700 dark:text-slate-300'
}`}
onClick={() => onSegmentSelect?.(idx)}
title="Click to play from here"
className={`
transition-all duration-200 whitespace-pre-wrap rounded-xl p-3 sm:p-4 -mx-2 sm:-mx-4 border-l-4 mb-2
${getLeadingClass()}
${isActive
? 'text-slate-900 dark:text-white bg-blue-50 dark:bg-blue-900/20 border-blue-500 shadow-sm'
: 'text-slate-700 dark:text-slate-300 border-transparent hover:bg-slate-100 dark:hover:bg-slate-800/50 cursor-pointer hover:border-slate-300 dark:hover:border-slate-600'
}
`}
>
{segment.text}
</div>

View File

@@ -49,52 +49,20 @@ function cleanAndMinifyHtml(rawHtml: string): string {
const doc = parser.parseFromString(rawHtml, 'text/html');
// 1. Remove heavy technical tags
// We remove these because they consume tokens and provide no semantic value for text extraction.
const technicalTags = ['script', 'style', 'noscript', 'iframe', 'svg', 'link', 'meta', 'button', 'input', 'form', 'img', 'picture', 'video'];
technicalTags.forEach(tag => {
const elements = doc.querySelectorAll(tag);
elements.forEach(el => el.remove());
});
// 2. Remove semantic layout tags that are usually clutter
const layoutTags = ['nav', 'footer', 'aside', 'header'];
layoutTags.forEach(tag => {
const elements = doc.querySelectorAll(tag);
elements.forEach(el => el.remove());
});
// NOTE: We intentionally DO NOT remove semantic tags like <nav>, <footer>, or use class-based heuristics.
// Previous versions tried to identify <article> or remove .ad-container, but this often caused
// the "Content appears to be empty" error on sites with unique structures.
// Gemini Flash has a large enough context window to ingest the entire <body> and intelligently extract the article.
// 3. Remove common ad/social/cookie containers by class/id heuristics
const junkSelectors = [
'[class*="ad-"]', '[id*="ad-"]',
'[class*="cookie"]', '[id*="cookie"]',
'[class*="newsletter"]', '[id*="newsletter"]',
'[class*="social"]', '[class*="share"]',
'[class*="comment"]', '[id*="comment"]',
'[class*="recommended"]', '[class*="related"]'
];
junkSelectors.forEach(selector => {
try {
const elements = doc.querySelectorAll(selector);
elements.forEach(el => el.remove());
} catch (e) {
// Ignore invalid selector errors
}
});
// 4. Return the cleanest possible content
// If there is a specific article tag, it's usually the best bet.
const article = doc.querySelector('article');
if (article && article.textContent && article.textContent.length > 200) {
return article.innerHTML;
}
const main = doc.querySelector('main');
if (main && main.textContent && main.textContent.length > 200) {
return main.innerHTML;
}
// Fallback: Return the cleaned body
return doc.body.innerHTML;
// Return the body. Trust Gemini to find the needle in the haystack.
return doc.body ? doc.body.innerHTML : rawHtml;
} catch (e) {
console.warn("HTML cleaning failed, using raw string", e);
return rawHtml;
@@ -115,7 +83,7 @@ async function fetchRawHtml(inputUrl: string): Promise<string> {
console.log(`Fetching via proxy: ${proxyUrl}`);
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 10000); // 10s timeout per proxy
const timeoutId = setTimeout(() => controller.abort(), 15000); // 15s timeout per proxy
// We purposely do NOT add complex headers here.
// Adding headers like 'X-Requested-With' often triggers a CORS Preflight (OPTIONS) request,