mirror of
https://github.com/Tony0410/readlater.git
synced 2026-05-24 22:01:41 +08:00
Fix JSDOM CSS parsing errors (border-width issue)
JSDOM crashes on modern CSS with variables like var(--border-width,1px). Fix by stripping all <style> tags and inline style attributes before parsing - Readability only needs DOM structure, not CSS. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,28 @@
|
||||
import { Readability } from "@mozilla/readability";
|
||||
import { JSDOM } from "jsdom";
|
||||
import { JSDOM, VirtualConsole } from "jsdom";
|
||||
|
||||
// Create a virtual console that suppresses CSS parsing errors
|
||||
// JSDOM has issues with modern CSS (variables, etc.) that don't affect Readability
|
||||
function createVirtualConsole() {
|
||||
const virtualConsole = new VirtualConsole();
|
||||
virtualConsole.on("error", () => {
|
||||
// Suppress CSS parsing errors
|
||||
});
|
||||
virtualConsole.on("warn", () => {
|
||||
// Suppress warnings
|
||||
});
|
||||
return virtualConsole;
|
||||
}
|
||||
|
||||
// Strip style tags and inline styles from HTML to prevent JSDOM CSS parsing errors
|
||||
// Readability doesn't need CSS - it only needs the DOM structure
|
||||
function stripStyles(html: string): string {
|
||||
// Remove <style> tags and their contents
|
||||
let cleaned = html.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "");
|
||||
// Remove style attributes (but keep the rest of the tag)
|
||||
cleaned = cleaned.replace(/\s+style\s*=\s*["'][^"']*["']/gi, "");
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
export interface ExtractedArticle {
|
||||
title: string;
|
||||
@@ -41,7 +64,11 @@ export async function extractArticle(url: string): Promise<ExtractedArticle> {
|
||||
}
|
||||
|
||||
const html = await response.text();
|
||||
const dom = new JSDOM(html, { url });
|
||||
const cleanedHtml = stripStyles(html);
|
||||
const dom = new JSDOM(cleanedHtml, {
|
||||
url,
|
||||
virtualConsole: createVirtualConsole(),
|
||||
});
|
||||
const document = dom.window.document;
|
||||
|
||||
// Extract using Readability
|
||||
@@ -83,7 +110,11 @@ export async function extractFromHtml(
|
||||
url: string,
|
||||
fallbackTitle?: string
|
||||
): Promise<ExtractedArticle> {
|
||||
const dom = new JSDOM(html, { url });
|
||||
const cleanedHtml = stripStyles(html);
|
||||
const dom = new JSDOM(cleanedHtml, {
|
||||
url,
|
||||
virtualConsole: createVirtualConsole(),
|
||||
});
|
||||
const document = dom.window.document;
|
||||
|
||||
// Extract using Readability
|
||||
|
||||
Reference in New Issue
Block a user