mirror of
https://github.com/Tony0410/News-reader-pro.git
synced 2026-05-24 21:31:44 +08:00
feat: Enhance article segment navigation
Implement segment selection in ReaderView for user-driven playback control. This change allows users to click on specific segments within an article to jump to and play that segment directly. The Gemini service's HTML parsing has also been simplified by removing redundant selectors and focusing on essential tag removal for more efficient text extraction.
This commit is contained in:
14
App.tsx
14
App.tsx
@@ -198,6 +198,18 @@ export default function App() {
|
|||||||
});
|
});
|
||||||
}, [playerState.currentArticleId]);
|
}, [playerState.currentArticleId]);
|
||||||
|
|
||||||
|
const handleSegmentSelect = useCallback((articleId: string, index: number) => {
|
||||||
|
setPlayerState(prev => ({
|
||||||
|
...prev,
|
||||||
|
currentArticleId: articleId,
|
||||||
|
isPlaying: true
|
||||||
|
}));
|
||||||
|
updateArticle(articleId, {
|
||||||
|
currentSegmentIndex: index,
|
||||||
|
status: PlaybackStatus.PLAYING
|
||||||
|
});
|
||||||
|
}, []);
|
||||||
|
|
||||||
// -- Keyboard Shortcuts --
|
// -- Keyboard Shortcuts --
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
const handleKeyDown = (e: KeyboardEvent) => {
|
const handleKeyDown = (e: KeyboardEvent) => {
|
||||||
@@ -478,6 +490,7 @@ export default function App() {
|
|||||||
article={viewingArticle}
|
article={viewingArticle}
|
||||||
settings={settings}
|
settings={settings}
|
||||||
onToggleAutoScroll={() => setSettings(s => ({...s, autoScroll: !s.autoScroll}))}
|
onToggleAutoScroll={() => setSettings(s => ({...s, autoScroll: !s.autoScroll}))}
|
||||||
|
onSegmentSelect={(index) => viewingArticle && handleSegmentSelect(viewingArticle.id, index)}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -490,6 +503,7 @@ export default function App() {
|
|||||||
article={viewingArticle}
|
article={viewingArticle}
|
||||||
settings={settings}
|
settings={settings}
|
||||||
onToggleAutoScroll={() => setSettings(s => ({...s, autoScroll: !s.autoScroll}))}
|
onToggleAutoScroll={() => setSettings(s => ({...s, autoScroll: !s.autoScroll}))}
|
||||||
|
onSegmentSelect={(index) => viewingArticle && handleSegmentSelect(viewingArticle.id, index)}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -7,9 +7,10 @@ interface ReaderViewProps {
|
|||||||
article?: Article | null;
|
article?: Article | null;
|
||||||
settings?: ReaderSettings;
|
settings?: ReaderSettings;
|
||||||
onToggleAutoScroll?: () => void;
|
onToggleAutoScroll?: () => void;
|
||||||
|
onSegmentSelect?: (index: number) => void;
|
||||||
}
|
}
|
||||||
|
|
||||||
export const ReaderView: React.FC<ReaderViewProps> = ({ article, settings, onToggleAutoScroll }) => {
|
export const ReaderView: React.FC<ReaderViewProps> = ({ article, settings, onToggleAutoScroll, onSegmentSelect }) => {
|
||||||
const scrollRef = useRef<HTMLDivElement>(null);
|
const scrollRef = useRef<HTMLDivElement>(null);
|
||||||
|
|
||||||
// Auto-scroll to active segment
|
// Auto-scroll to active segment
|
||||||
@@ -100,7 +101,7 @@ export const ReaderView: React.FC<ReaderViewProps> = ({ article, settings, onTog
|
|||||||
|
|
||||||
<div
|
<div
|
||||||
ref={scrollRef}
|
ref={scrollRef}
|
||||||
className={`flex-grow overflow-y-auto p-6 sm:p-8 space-y-6 custom-scrollbar bg-white dark:bg-slate-900 transition-colors duration-300 ${getFontClass()} ${getSizeClass()}`}
|
className={`flex-grow overflow-y-auto p-6 sm:p-8 space-y-1 custom-scrollbar bg-white dark:bg-slate-900 transition-colors duration-300 ${getFontClass()} ${getSizeClass()}`}
|
||||||
>
|
>
|
||||||
{article.segments.length > 0 ? (
|
{article.segments.length > 0 ? (
|
||||||
article.segments.map((segment, idx) => {
|
article.segments.map((segment, idx) => {
|
||||||
@@ -109,11 +110,16 @@ export const ReaderView: React.FC<ReaderViewProps> = ({ article, settings, onTog
|
|||||||
<div
|
<div
|
||||||
key={segment.id}
|
key={segment.id}
|
||||||
id={`segment-${idx}`}
|
id={`segment-${idx}`}
|
||||||
className={`transition-all duration-300 whitespace-pre-wrap ${getLeadingClass()} ${
|
onClick={() => onSegmentSelect?.(idx)}
|
||||||
isActive
|
title="Click to play from here"
|
||||||
? 'text-slate-900 dark:text-white bg-blue-50 dark:bg-blue-900/20 p-4 rounded-lg -mx-4 border-l-4 border-blue-500 shadow-sm'
|
className={`
|
||||||
: 'text-slate-700 dark:text-slate-300'
|
transition-all duration-200 whitespace-pre-wrap rounded-xl p-3 sm:p-4 -mx-2 sm:-mx-4 border-l-4 mb-2
|
||||||
}`}
|
${getLeadingClass()}
|
||||||
|
${isActive
|
||||||
|
? 'text-slate-900 dark:text-white bg-blue-50 dark:bg-blue-900/20 border-blue-500 shadow-sm'
|
||||||
|
: 'text-slate-700 dark:text-slate-300 border-transparent hover:bg-slate-100 dark:hover:bg-slate-800/50 cursor-pointer hover:border-slate-300 dark:hover:border-slate-600'
|
||||||
|
}
|
||||||
|
`}
|
||||||
>
|
>
|
||||||
{segment.text}
|
{segment.text}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -49,52 +49,20 @@ function cleanAndMinifyHtml(rawHtml: string): string {
|
|||||||
const doc = parser.parseFromString(rawHtml, 'text/html');
|
const doc = parser.parseFromString(rawHtml, 'text/html');
|
||||||
|
|
||||||
// 1. Remove heavy technical tags
|
// 1. Remove heavy technical tags
|
||||||
|
// We remove these because they consume tokens and provide no semantic value for text extraction.
|
||||||
const technicalTags = ['script', 'style', 'noscript', 'iframe', 'svg', 'link', 'meta', 'button', 'input', 'form', 'img', 'picture', 'video'];
|
const technicalTags = ['script', 'style', 'noscript', 'iframe', 'svg', 'link', 'meta', 'button', 'input', 'form', 'img', 'picture', 'video'];
|
||||||
technicalTags.forEach(tag => {
|
technicalTags.forEach(tag => {
|
||||||
const elements = doc.querySelectorAll(tag);
|
const elements = doc.querySelectorAll(tag);
|
||||||
elements.forEach(el => el.remove());
|
elements.forEach(el => el.remove());
|
||||||
});
|
});
|
||||||
|
|
||||||
// 2. Remove semantic layout tags that are usually clutter
|
// NOTE: We intentionally DO NOT remove semantic tags like <nav>, <footer>, or use class-based heuristics.
|
||||||
const layoutTags = ['nav', 'footer', 'aside', 'header'];
|
// Previous versions tried to identify <article> or remove .ad-container, but this often caused
|
||||||
layoutTags.forEach(tag => {
|
// the "Content appears to be empty" error on sites with unique structures.
|
||||||
const elements = doc.querySelectorAll(tag);
|
// Gemini Flash has a large enough context window to ingest the entire <body> and intelligently extract the article.
|
||||||
elements.forEach(el => el.remove());
|
|
||||||
});
|
|
||||||
|
|
||||||
// 3. Remove common ad/social/cookie containers by class/id heuristics
|
// Return the body. Trust Gemini to find the needle in the haystack.
|
||||||
const junkSelectors = [
|
return doc.body ? doc.body.innerHTML : rawHtml;
|
||||||
'[class*="ad-"]', '[id*="ad-"]',
|
|
||||||
'[class*="cookie"]', '[id*="cookie"]',
|
|
||||||
'[class*="newsletter"]', '[id*="newsletter"]',
|
|
||||||
'[class*="social"]', '[class*="share"]',
|
|
||||||
'[class*="comment"]', '[id*="comment"]',
|
|
||||||
'[class*="recommended"]', '[class*="related"]'
|
|
||||||
];
|
|
||||||
|
|
||||||
junkSelectors.forEach(selector => {
|
|
||||||
try {
|
|
||||||
const elements = doc.querySelectorAll(selector);
|
|
||||||
elements.forEach(el => el.remove());
|
|
||||||
} catch (e) {
|
|
||||||
// Ignore invalid selector errors
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// 4. Return the cleanest possible content
|
|
||||||
// If there is a specific article tag, it's usually the best bet.
|
|
||||||
const article = doc.querySelector('article');
|
|
||||||
if (article && article.textContent && article.textContent.length > 200) {
|
|
||||||
return article.innerHTML;
|
|
||||||
}
|
|
||||||
|
|
||||||
const main = doc.querySelector('main');
|
|
||||||
if (main && main.textContent && main.textContent.length > 200) {
|
|
||||||
return main.innerHTML;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback: Return the cleaned body
|
|
||||||
return doc.body.innerHTML;
|
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.warn("HTML cleaning failed, using raw string", e);
|
console.warn("HTML cleaning failed, using raw string", e);
|
||||||
return rawHtml;
|
return rawHtml;
|
||||||
@@ -115,7 +83,7 @@ async function fetchRawHtml(inputUrl: string): Promise<string> {
|
|||||||
console.log(`Fetching via proxy: ${proxyUrl}`);
|
console.log(`Fetching via proxy: ${proxyUrl}`);
|
||||||
|
|
||||||
const controller = new AbortController();
|
const controller = new AbortController();
|
||||||
const timeoutId = setTimeout(() => controller.abort(), 10000); // 10s timeout per proxy
|
const timeoutId = setTimeout(() => controller.abort(), 15000); // 15s timeout per proxy
|
||||||
|
|
||||||
// We purposely do NOT add complex headers here.
|
// We purposely do NOT add complex headers here.
|
||||||
// Adding headers like 'X-Requested-With' often triggers a CORS Preflight (OPTIONS) request,
|
// Adding headers like 'X-Requested-With' often triggers a CORS Preflight (OPTIONS) request,
|
||||||
|
|||||||
Reference in New Issue
Block a user