mirror of
https://github.com/Tony0410/readlater.git
synced 2026-05-24 22:01:41 +08:00
Fix JSDOM CSS parsing errors (border-width issue)
JSDOM crashes on modern CSS with variables like var(--border-width,1px). Fix by stripping all <style> tags and inline style attributes before parsing - Readability only needs DOM structure, not CSS. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,28 @@
|
|||||||
import { Readability } from "@mozilla/readability";
|
import { Readability } from "@mozilla/readability";
|
||||||
import { JSDOM } from "jsdom";
|
import { JSDOM, VirtualConsole } from "jsdom";
|
||||||
|
|
||||||
|
// Create a virtual console that suppresses CSS parsing errors
|
||||||
|
// JSDOM has issues with modern CSS (variables, etc.) that don't affect Readability
|
||||||
|
function createVirtualConsole() {
|
||||||
|
const virtualConsole = new VirtualConsole();
|
||||||
|
virtualConsole.on("error", () => {
|
||||||
|
// Suppress CSS parsing errors
|
||||||
|
});
|
||||||
|
virtualConsole.on("warn", () => {
|
||||||
|
// Suppress warnings
|
||||||
|
});
|
||||||
|
return virtualConsole;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strip style tags and inline styles from HTML to prevent JSDOM CSS parsing errors
|
||||||
|
// Readability doesn't need CSS - it only needs the DOM structure
|
||||||
|
function stripStyles(html: string): string {
|
||||||
|
// Remove <style> tags and their contents
|
||||||
|
let cleaned = html.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "");
|
||||||
|
// Remove style attributes (but keep the rest of the tag)
|
||||||
|
cleaned = cleaned.replace(/\s+style\s*=\s*["'][^"']*["']/gi, "");
|
||||||
|
return cleaned;
|
||||||
|
}
|
||||||
|
|
||||||
export interface ExtractedArticle {
|
export interface ExtractedArticle {
|
||||||
title: string;
|
title: string;
|
||||||
@@ -41,7 +64,11 @@ export async function extractArticle(url: string): Promise<ExtractedArticle> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const html = await response.text();
|
const html = await response.text();
|
||||||
const dom = new JSDOM(html, { url });
|
const cleanedHtml = stripStyles(html);
|
||||||
|
const dom = new JSDOM(cleanedHtml, {
|
||||||
|
url,
|
||||||
|
virtualConsole: createVirtualConsole(),
|
||||||
|
});
|
||||||
const document = dom.window.document;
|
const document = dom.window.document;
|
||||||
|
|
||||||
// Extract using Readability
|
// Extract using Readability
|
||||||
@@ -83,7 +110,11 @@ export async function extractFromHtml(
|
|||||||
url: string,
|
url: string,
|
||||||
fallbackTitle?: string
|
fallbackTitle?: string
|
||||||
): Promise<ExtractedArticle> {
|
): Promise<ExtractedArticle> {
|
||||||
const dom = new JSDOM(html, { url });
|
const cleanedHtml = stripStyles(html);
|
||||||
|
const dom = new JSDOM(cleanedHtml, {
|
||||||
|
url,
|
||||||
|
virtualConsole: createVirtualConsole(),
|
||||||
|
});
|
||||||
const document = dom.window.document;
|
const document = dom.window.document;
|
||||||
|
|
||||||
// Extract using Readability
|
// Extract using Readability
|
||||||
|
|||||||
Reference in New Issue
Block a user