From c6a400a04da7a36b1625ffb026c4f80c6ae08f26 Mon Sep 17 00:00:00 2001
From: Gemini Agent <gemini-agent@homelab.local>
Date: Sun, 25 Jan 2026 01:07:40 +0000
Subject: [PATCH] Gracefully handle 403 blocked sites with minimal article

Instead of failing completely on 403/401, save a placeholder article
with the URL so users can still access via 'Open original' link.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/lib/utils/extract.ts | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/src/lib/utils/extract.ts b/src/lib/utils/extract.ts
index d6e3ec3..b60188f 100644
--- a/src/lib/utils/extract.ts
+++ b/src/lib/utils/extract.ts
@@ -58,8 +58,20 @@ export async function extractArticle(url: string): Promise<ExtractedArticle> {
   });
 
   if (!response.ok) {
-    if (response.status === 403) {
-      throw new Error(`This site blocks automated access (403 Forbidden). Try using the bookmarklet from the article page instead - it can capture content your browser can see.`);
+    // On 403/blocked, return minimal article with just URL info
+    if (response.status === 403 || response.status === 401) {
+      const hostname = new URL(url).hostname.replace(/^www\./, "");
+      return {
+        title: `Article from ${hostname}`,
+        author: null,
+        siteName: hostname,
+        excerpt: "This site blocked automated access. Use 'Open original' to read, or the Content Capture bookmarklet to save the full article.",
+        content: `<p>This site blocked automated access. <a href="${url}" target="_blank">Open original article</a> to read.</p><p>Tip: Use the Content Capture bookmarklet from the article page to save the full content.</p>`,
+        textContent: "This site blocked automated access. Open original article to read.",
+        leadImage: null,
+        wordCount: 0,
+        publishedAt: null,
+      };
     }
     throw new Error(`Failed to fetch: ${response.status} ${response.statusText}`);
   }