diff --git a/src/app/api/save/route.ts b/src/app/api/save/route.ts index f698a9a..374693a 100644 --- a/src/app/api/save/route.ts +++ b/src/app/api/save/route.ts @@ -1,6 +1,6 @@ import { NextRequest, NextResponse } from "next/server"; import { db, schema } from "@/lib/db"; -import { extractArticle } from "@/lib/utils/extract"; +import { extractArticle, extractFromHtml } from "@/lib/utils/extract"; import { v4 as uuidv4 } from "uuid"; import { eq } from "drizzle-orm"; @@ -138,3 +138,139 @@ export async function GET(request: NextRequest) { ); } } + +// POST /api/save - Save article with HTML content from bookmarklet +export async function POST(request: NextRequest) { + const htmlResponse = (status: "success" | "error" | "exists", message: string) => { + const bgColor = status === "success" ? "#22c55e" : status === "exists" ? "#eab308" : "#ef4444"; + const html = ` + + + + + + ReadLater + + + +
+
+ ${status === "success" ? '' : status === "exists" ? '' : ''} +
+

${status === "success" ? "Saved!" : status === "exists" ? "Already Saved" : "Error"}

+

${message}

+ +
+ + +`; + return new NextResponse(html, { + headers: { "Content-Type": "text/html" }, + }); + }; + + try { + // Parse form data from bookmarklet + const formData = await request.formData(); + const url = formData.get("url") as string; + const html = formData.get("html") as string; + const title = formData.get("title") as string; + + if (!url) { + return htmlResponse("error", "No URL provided"); + } + + // Check if article already exists + const existing = await db + .select() + .from(schema.articles) + .where(eq(schema.articles.url, url)) + .limit(1); + + if (existing.length > 0) { + return htmlResponse("exists", `"${existing[0].title}" is already in your reading list`); + } + + // Extract article from provided HTML content + const extracted = await extractFromHtml(html, url, title); + + const id = uuidv4(); + const newArticle: schema.NewArticle = { + id, + url, + title: extracted.title, + author: extracted.author, + siteName: extracted.siteName, + excerpt: extracted.excerpt, + content: extracted.content, + textContent: extracted.textContent, + leadImage: extracted.leadImage, + wordCount: extracted.wordCount, + }; + + await db.insert(schema.articles).values(newArticle); + + return htmlResponse("success", `"${extracted.title}" has been added to your reading list`); + } catch (error) { + console.error("Error saving article from HTML:", error); + return htmlResponse( + "error", + error instanceof Error ? error.message : "Failed to save article" + ); + } +} diff --git a/src/app/bookmarklet/page.tsx b/src/app/bookmarklet/page.tsx index 58133dc..6e2b9de 100644 --- a/src/app/bookmarklet/page.tsx +++ b/src/app/bookmarklet/page.tsx @@ -1,23 +1,34 @@ "use client"; import { useState, useEffect } from "react"; -import { BookOpen, Copy, Check } from "lucide-react"; +import { BookOpen, Copy, Check, Zap, Link2 } from "lucide-react"; import Link from "next/link"; export default function BookmarkletPage() { const [baseUrl, setBaseUrl] = useState(""); - const [copied, setCopied] = useState(false); + const [copiedSimple, setCopiedSimple] = useState(false); + const [copiedAdvanced, setCopiedAdvanced] = useState(false); useEffect(() => { setBaseUrl(window.location.origin); }, []); - const bookmarkletCode = `javascript:(function(){var url=encodeURIComponent(window.location.href);window.open('${baseUrl}/api/save?url='+url,'_blank','width=400,height=300');})();`; + // Simple bookmarklet - just sends URL (works for most sites) + const simpleBookmarklet = `javascript:(function(){var url=encodeURIComponent(window.location.href);window.open('${baseUrl}/api/save?url='+url,'_blank','width=400,height=300');})();`; - const handleCopy = async () => { - await navigator.clipboard.writeText(bookmarkletCode); - setCopied(true); - setTimeout(() => setCopied(false), 2000); + // Advanced bookmarklet - captures page content directly (works for paywalled/protected sites) + const advancedBookmarklet = `javascript:(function(){var d=document,b=d.body,t=d.title,u=location.href,h=d.documentElement.outerHTML;var f=d.createElement('form');f.method='POST';f.action='${baseUrl}/api/save';f.target='_blank';var addField=function(n,v){var i=d.createElement('input');i.type='hidden';i.name=n;i.value=v;f.appendChild(i);};addField('url',u);addField('title',t);addField('html',h);b.appendChild(f);f.submit();b.removeChild(f);})();`; + + const handleCopySimple = async () => { + await navigator.clipboard.writeText(simpleBookmarklet); + setCopiedSimple(true); + setTimeout(() => setCopiedSimple(false), 2000); + }; + + const handleCopyAdvanced = async () => { + await navigator.clipboard.writeText(advancedBookmarklet); + setCopiedAdvanced(true); + setTimeout(() => setCopiedAdvanced(false), 2000); }; return ( @@ -31,31 +42,76 @@ export default function BookmarkletPage() { Back to ReadLater -

Bookmarklet

+

Bookmarklets

-
-

Quick Save Bookmarklet

-

- Drag this button to your bookmarks bar, or right-click and "Add to Bookmarks". - Then click it on any page to save the article to ReadLater. + {/* Advanced Bookmarklet - Recommended */} +

+
+ +

Content Capture (Recommended)

+
+

+ Captures the actual page content from your browser. Works with paywalled sites (Economist, NYT, etc.) + and sites with bot protection - as long as you can see the article, it can save it.

e.preventDefault()} className="inline-flex items-center gap-2 px-6 py-3 bg-[var(--accent)] text-white rounded-lg font-medium cursor-move" title="Drag to bookmarks bar" > - + Save to ReadLater +
+
+ + {/* Simple Bookmarklet */} +
+
+ +

URL Only (Lightweight)

+
+

+ Just sends the URL - our server fetches the content. Smaller bookmarklet, but won't work + for paywalled or bot-protected sites. +

+ +
+ e.preventDefault()} + className="inline-flex items-center gap-2 px-6 py-3 bg-[var(--muted)] text-white rounded-lg font-medium cursor-move" + title="Drag to bookmarks bar" + > + + Save URL + + +
-

Manual Installation

-

- If dragging doesn't work, create a new bookmark and paste this as the URL: -

-
-            {bookmarkletCode}
-          
+

Installation

+
    +
  1. Drag the button above to your bookmarks bar
  2. +
  3. Or right-click → "Add to Bookmarks"
  4. +
  5. Or copy the code and create a bookmark manually
  6. +
-

How it works:

-
    -
  1. Click the bookmarklet on any article page
  2. -
  3. A popup will confirm the article was saved
  4. -
  5. The article appears in your ReadLater list
  6. -
+

Tips:

+
    +
  • Use the Content Capture bookmarklet for paywalled sites you're subscribed to
  • +
  • Make sure you're logged in to see the full article before clicking
  • +
  • The bookmarklet sends the visible page content directly to ReadLater
  • +
diff --git a/src/lib/utils/extract.ts b/src/lib/utils/extract.ts index 06e11c1..5e3b3d8 100644 --- a/src/lib/utils/extract.ts +++ b/src/lib/utils/extract.ts @@ -13,14 +13,30 @@ export interface ExtractedArticle { } export async function extractArticle(url: string): Promise { - // Fetch the page + // Fetch the page with browser-like headers to avoid bot detection const response = await fetch(url, { headers: { - "User-Agent": "Mozilla/5.0 (compatible; ReadLater/1.0)", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.9", + "Accept-Encoding": "gzip, deflate, br", + "Cache-Control": "no-cache", + "Pragma": "no-cache", + "Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', + "Sec-Ch-Ua-Mobile": "?0", + "Sec-Ch-Ua-Platform": '"macOS"', + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", + "Upgrade-Insecure-Requests": "1", }, }); if (!response.ok) { + if (response.status === 403) { + throw new Error(`This site blocks automated access (403 Forbidden). Try using the bookmarklet from the article page instead - it can capture content your browser can see.`); + } throw new Error(`Failed to fetch: ${response.status} ${response.statusText}`); } @@ -60,3 +76,45 @@ export async function extractArticle(url: string): Promise { wordCount, }; } + +// Extract article from provided HTML content (for bookmarklet with content capture) +export async function extractFromHtml( + html: string, + url: string, + fallbackTitle?: string +): Promise { + const dom = new JSDOM(html, { url }); + const document = dom.window.document; + + // Extract using Readability + const reader = new Readability(document); + const article = reader.parse(); + + if (!article) { + throw new Error("Could not extract article content from provided HTML"); + } + + // Try to find lead image + let leadImage: string | null = null; + const ogImage = document.querySelector('meta[property="og:image"]'); + if (ogImage) { + leadImage = ogImage.getAttribute("content"); + } + + const textContent = article.textContent || ""; + const content = article.content || ""; + + // Calculate word count + const wordCount = textContent.split(/\s+/).filter(Boolean).length; + + return { + title: article.title || fallbackTitle || "Untitled", + author: article.byline || null, + siteName: article.siteName || new URL(url).hostname, + excerpt: article.excerpt || null, + content, + textContent, + leadImage, + wordCount, + }; +}