Fix JSDOM CSS parsing errors (border-width issue)

JSDOM crashes on modern CSS with variables like var(--border-width,1px).
Fix by stripping all <style> tags and inline style attributes before
parsing - Readability only needs DOM structure, not CSS.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Gemini Agent
2026-01-23 20:20:03 +00:00
parent 61e1ac4d81
commit 8151705b17

View File

@@ -1,5 +1,28 @@
import { Readability } from "@mozilla/readability";
import { JSDOM } from "jsdom";
import { JSDOM, VirtualConsole } from "jsdom";
// Create a virtual console that suppresses CSS parsing errors
// JSDOM has issues with modern CSS (variables, etc.) that don't affect Readability
function createVirtualConsole() {
const virtualConsole = new VirtualConsole();
virtualConsole.on("error", () => {
// Suppress CSS parsing errors
});
virtualConsole.on("warn", () => {
// Suppress warnings
});
return virtualConsole;
}
// Strip style tags and inline styles from HTML to prevent JSDOM CSS parsing errors
// Readability doesn't need CSS - it only needs the DOM structure
function stripStyles(html: string): string {
// Remove <style> tags and their contents
let cleaned = html.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "");
// Remove style attributes (but keep the rest of the tag)
cleaned = cleaned.replace(/\s+style\s*=\s*["'][^"']*["']/gi, "");
return cleaned;
}
export interface ExtractedArticle {
title: string;
@@ -41,7 +64,11 @@ export async function extractArticle(url: string): Promise<ExtractedArticle> {
}
const html = await response.text();
const dom = new JSDOM(html, { url });
const cleanedHtml = stripStyles(html);
const dom = new JSDOM(cleanedHtml, {
url,
virtualConsole: createVirtualConsole(),
});
const document = dom.window.document;
// Extract using Readability
@@ -83,7 +110,11 @@ export async function extractFromHtml(
url: string,
fallbackTitle?: string
): Promise<ExtractedArticle> {
const dom = new JSDOM(html, { url });
const cleanedHtml = stripStyles(html);
const dom = new JSDOM(cleanedHtml, {
url,
virtualConsole: createVirtualConsole(),
});
const document = dom.window.document;
// Extract using Readability