mirror of
https://github.com/Tony0410/readlater.git
synced 2026-05-24 22:01:41 +08:00
Add content capture bookmarklet for paywalled sites
- New "Content Capture" bookmarklet sends page HTML directly - Works for paywalled sites (Economist, NYT, etc.) when logged in - Works for Cloudflare-protected sites - Added POST handler to /api/save for HTML content - Added extractFromHtml() for processing captured content - Improved 403 error message with bookmarklet suggestion - Updated bookmarklet page with both options Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
import { NextRequest, NextResponse } from "next/server";
|
||||
import { db, schema } from "@/lib/db";
|
||||
import { extractArticle } from "@/lib/utils/extract";
|
||||
import { extractArticle, extractFromHtml } from "@/lib/utils/extract";
|
||||
import { v4 as uuidv4 } from "uuid";
|
||||
import { eq } from "drizzle-orm";
|
||||
|
||||
@@ -138,3 +138,139 @@ export async function GET(request: NextRequest) {
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// POST /api/save - Save article with HTML content from bookmarklet
|
||||
export async function POST(request: NextRequest) {
|
||||
const htmlResponse = (status: "success" | "error" | "exists", message: string) => {
|
||||
const bgColor = status === "success" ? "#22c55e" : status === "exists" ? "#eab308" : "#ef4444";
|
||||
const html = `
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>ReadLater</title>
|
||||
<style>
|
||||
* { margin: 0; padding: 0; box-sizing: border-box; }
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
||||
background: #000;
|
||||
color: #fff;
|
||||
min-height: 100vh;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
padding: 20px;
|
||||
}
|
||||
.container {
|
||||
text-align: center;
|
||||
max-width: 300px;
|
||||
}
|
||||
.icon {
|
||||
width: 60px;
|
||||
height: 60px;
|
||||
border-radius: 50%;
|
||||
background: ${bgColor};
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
margin: 0 auto 20px;
|
||||
}
|
||||
.icon svg {
|
||||
width: 30px;
|
||||
height: 30px;
|
||||
fill: white;
|
||||
}
|
||||
h1 {
|
||||
font-size: 18px;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
p {
|
||||
color: #888;
|
||||
font-size: 14px;
|
||||
line-height: 1.5;
|
||||
}
|
||||
.close {
|
||||
margin-top: 20px;
|
||||
padding: 10px 20px;
|
||||
background: #333;
|
||||
border: none;
|
||||
border-radius: 8px;
|
||||
color: #fff;
|
||||
cursor: pointer;
|
||||
font-size: 14px;
|
||||
}
|
||||
.close:hover {
|
||||
background: #444;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="icon">
|
||||
${status === "success" ? '<svg viewBox="0 0 24 24"><path d="M9 16.17L4.83 12l-1.42 1.41L9 19 21 7l-1.41-1.41z"/></svg>' : status === "exists" ? '<svg viewBox="0 0 24 24"><path d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm-2 15l-5-5 1.41-1.41L10 14.17l7.59-7.59L19 8l-9 9z"/></svg>' : '<svg viewBox="0 0 24 24"><path d="M19 6.41L17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>'}
|
||||
</div>
|
||||
<h1>${status === "success" ? "Saved!" : status === "exists" ? "Already Saved" : "Error"}</h1>
|
||||
<p>${message}</p>
|
||||
<button class="close" onclick="window.close()">Close</button>
|
||||
</div>
|
||||
<script>
|
||||
setTimeout(() => window.close(), 3000);
|
||||
</script>
|
||||
</body>
|
||||
</html>`;
|
||||
return new NextResponse(html, {
|
||||
headers: { "Content-Type": "text/html" },
|
||||
});
|
||||
};
|
||||
|
||||
try {
|
||||
// Parse form data from bookmarklet
|
||||
const formData = await request.formData();
|
||||
const url = formData.get("url") as string;
|
||||
const html = formData.get("html") as string;
|
||||
const title = formData.get("title") as string;
|
||||
|
||||
if (!url) {
|
||||
return htmlResponse("error", "No URL provided");
|
||||
}
|
||||
|
||||
// Check if article already exists
|
||||
const existing = await db
|
||||
.select()
|
||||
.from(schema.articles)
|
||||
.where(eq(schema.articles.url, url))
|
||||
.limit(1);
|
||||
|
||||
if (existing.length > 0) {
|
||||
return htmlResponse("exists", `"${existing[0].title}" is already in your reading list`);
|
||||
}
|
||||
|
||||
// Extract article from provided HTML content
|
||||
const extracted = await extractFromHtml(html, url, title);
|
||||
|
||||
const id = uuidv4();
|
||||
const newArticle: schema.NewArticle = {
|
||||
id,
|
||||
url,
|
||||
title: extracted.title,
|
||||
author: extracted.author,
|
||||
siteName: extracted.siteName,
|
||||
excerpt: extracted.excerpt,
|
||||
content: extracted.content,
|
||||
textContent: extracted.textContent,
|
||||
leadImage: extracted.leadImage,
|
||||
wordCount: extracted.wordCount,
|
||||
};
|
||||
|
||||
await db.insert(schema.articles).values(newArticle);
|
||||
|
||||
return htmlResponse("success", `"${extracted.title}" has been added to your reading list`);
|
||||
} catch (error) {
|
||||
console.error("Error saving article from HTML:", error);
|
||||
return htmlResponse(
|
||||
"error",
|
||||
error instanceof Error ? error.message : "Failed to save article"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,23 +1,34 @@
|
||||
"use client";
|
||||
|
||||
import { useState, useEffect } from "react";
|
||||
import { BookOpen, Copy, Check } from "lucide-react";
|
||||
import { BookOpen, Copy, Check, Zap, Link2 } from "lucide-react";
|
||||
import Link from "next/link";
|
||||
|
||||
export default function BookmarkletPage() {
|
||||
const [baseUrl, setBaseUrl] = useState("");
|
||||
const [copied, setCopied] = useState(false);
|
||||
const [copiedSimple, setCopiedSimple] = useState(false);
|
||||
const [copiedAdvanced, setCopiedAdvanced] = useState(false);
|
||||
|
||||
useEffect(() => {
|
||||
setBaseUrl(window.location.origin);
|
||||
}, []);
|
||||
|
||||
const bookmarkletCode = `javascript:(function(){var url=encodeURIComponent(window.location.href);window.open('${baseUrl}/api/save?url='+url,'_blank','width=400,height=300');})();`;
|
||||
// Simple bookmarklet - just sends URL (works for most sites)
|
||||
const simpleBookmarklet = `javascript:(function(){var url=encodeURIComponent(window.location.href);window.open('${baseUrl}/api/save?url='+url,'_blank','width=400,height=300');})();`;
|
||||
|
||||
const handleCopy = async () => {
|
||||
await navigator.clipboard.writeText(bookmarkletCode);
|
||||
setCopied(true);
|
||||
setTimeout(() => setCopied(false), 2000);
|
||||
// Advanced bookmarklet - captures page content directly (works for paywalled/protected sites)
|
||||
const advancedBookmarklet = `javascript:(function(){var d=document,b=d.body,t=d.title,u=location.href,h=d.documentElement.outerHTML;var f=d.createElement('form');f.method='POST';f.action='${baseUrl}/api/save';f.target='_blank';var addField=function(n,v){var i=d.createElement('input');i.type='hidden';i.name=n;i.value=v;f.appendChild(i);};addField('url',u);addField('title',t);addField('html',h);b.appendChild(f);f.submit();b.removeChild(f);})();`;
|
||||
|
||||
const handleCopySimple = async () => {
|
||||
await navigator.clipboard.writeText(simpleBookmarklet);
|
||||
setCopiedSimple(true);
|
||||
setTimeout(() => setCopiedSimple(false), 2000);
|
||||
};
|
||||
|
||||
const handleCopyAdvanced = async () => {
|
||||
await navigator.clipboard.writeText(advancedBookmarklet);
|
||||
setCopiedAdvanced(true);
|
||||
setTimeout(() => setCopiedAdvanced(false), 2000);
|
||||
};
|
||||
|
||||
return (
|
||||
@@ -31,31 +42,76 @@ export default function BookmarkletPage() {
|
||||
Back to ReadLater
|
||||
</Link>
|
||||
|
||||
<h1 className="text-3xl font-bold mb-6">Bookmarklet</h1>
|
||||
<h1 className="text-3xl font-bold mb-6">Bookmarklets</h1>
|
||||
|
||||
<div className="bg-[var(--surface)] rounded-lg p-6 mb-8">
|
||||
<h2 className="text-xl font-semibold mb-4">Quick Save Bookmarklet</h2>
|
||||
<p className="text-[var(--muted)] mb-6">
|
||||
Drag this button to your bookmarks bar, or right-click and "Add to Bookmarks".
|
||||
Then click it on any page to save the article to ReadLater.
|
||||
{/* Advanced Bookmarklet - Recommended */}
|
||||
<div className="bg-[var(--surface)] rounded-lg p-6 mb-6 border-2 border-[var(--accent)]">
|
||||
<div className="flex items-center gap-2 mb-4">
|
||||
<Zap className="w-5 h-5 text-[var(--accent)]" />
|
||||
<h2 className="text-xl font-semibold">Content Capture (Recommended)</h2>
|
||||
</div>
|
||||
<p className="text-[var(--muted)] mb-4">
|
||||
Captures the actual page content from your browser. <strong>Works with paywalled sites</strong> (Economist, NYT, etc.)
|
||||
and sites with bot protection - as long as you can see the article, it can save it.
|
||||
</p>
|
||||
|
||||
<div className="flex flex-col sm:flex-row gap-4 items-start">
|
||||
<a
|
||||
href={bookmarkletCode}
|
||||
href={advancedBookmarklet}
|
||||
onClick={(e) => e.preventDefault()}
|
||||
className="inline-flex items-center gap-2 px-6 py-3 bg-[var(--accent)] text-white rounded-lg font-medium cursor-move"
|
||||
title="Drag to bookmarks bar"
|
||||
>
|
||||
<BookOpen className="w-5 h-5" />
|
||||
<Zap className="w-5 h-5" />
|
||||
Save to ReadLater
|
||||
</a>
|
||||
|
||||
<button
|
||||
onClick={handleCopy}
|
||||
className="inline-flex items-center gap-2 px-4 py-3 border border-[var(--border)] rounded-lg hover:bg-[var(--surface)] transition-colors"
|
||||
onClick={handleCopyAdvanced}
|
||||
className="inline-flex items-center gap-2 px-4 py-3 border border-[var(--border)] rounded-lg hover:bg-[var(--background)] transition-colors"
|
||||
>
|
||||
{copied ? (
|
||||
{copiedAdvanced ? (
|
||||
<>
|
||||
<Check className="w-5 h-5 text-green-500" />
|
||||
Copied!
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<Copy className="w-5 h-5" />
|
||||
Copy code
|
||||
</>
|
||||
)}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Simple Bookmarklet */}
|
||||
<div className="bg-[var(--surface)] rounded-lg p-6 mb-8">
|
||||
<div className="flex items-center gap-2 mb-4">
|
||||
<Link2 className="w-5 h-5" />
|
||||
<h2 className="text-xl font-semibold">URL Only (Lightweight)</h2>
|
||||
</div>
|
||||
<p className="text-[var(--muted)] mb-4">
|
||||
Just sends the URL - our server fetches the content. Smaller bookmarklet, but won't work
|
||||
for paywalled or bot-protected sites.
|
||||
</p>
|
||||
|
||||
<div className="flex flex-col sm:flex-row gap-4 items-start">
|
||||
<a
|
||||
href={simpleBookmarklet}
|
||||
onClick={(e) => e.preventDefault()}
|
||||
className="inline-flex items-center gap-2 px-6 py-3 bg-[var(--muted)] text-white rounded-lg font-medium cursor-move"
|
||||
title="Drag to bookmarks bar"
|
||||
>
|
||||
<Link2 className="w-5 h-5" />
|
||||
Save URL
|
||||
</a>
|
||||
|
||||
<button
|
||||
onClick={handleCopySimple}
|
||||
className="inline-flex items-center gap-2 px-4 py-3 border border-[var(--border)] rounded-lg hover:bg-[var(--background)] transition-colors"
|
||||
>
|
||||
{copiedSimple ? (
|
||||
<>
|
||||
<Check className="w-5 h-5 text-green-500" />
|
||||
Copied!
|
||||
@@ -71,22 +127,21 @@ export default function BookmarkletPage() {
|
||||
</div>
|
||||
|
||||
<div className="bg-[var(--surface)] rounded-lg p-6">
|
||||
<h2 className="text-xl font-semibold mb-4">Manual Installation</h2>
|
||||
<p className="text-[var(--muted)] mb-4">
|
||||
If dragging doesn't work, create a new bookmark and paste this as the URL:
|
||||
</p>
|
||||
<pre className="bg-[var(--background)] p-4 rounded-lg overflow-x-auto text-sm">
|
||||
<code className="text-[var(--muted)]">{bookmarkletCode}</code>
|
||||
</pre>
|
||||
<h2 className="text-xl font-semibold mb-4">Installation</h2>
|
||||
<ol className="list-decimal list-inside space-y-2 text-[var(--muted)]">
|
||||
<li><strong>Drag</strong> the button above to your bookmarks bar</li>
|
||||
<li>Or <strong>right-click</strong> → "Add to Bookmarks"</li>
|
||||
<li>Or <strong>copy the code</strong> and create a bookmark manually</li>
|
||||
</ol>
|
||||
</div>
|
||||
|
||||
<div className="mt-8 text-[var(--muted)] text-sm">
|
||||
<h3 className="font-semibold mb-2">How it works:</h3>
|
||||
<ol className="list-decimal list-inside space-y-1">
|
||||
<li>Click the bookmarklet on any article page</li>
|
||||
<li>A popup will confirm the article was saved</li>
|
||||
<li>The article appears in your ReadLater list</li>
|
||||
</ol>
|
||||
<h3 className="font-semibold mb-2">Tips:</h3>
|
||||
<ul className="list-disc list-inside space-y-1">
|
||||
<li>Use the <strong>Content Capture</strong> bookmarklet for paywalled sites you're subscribed to</li>
|
||||
<li>Make sure you're logged in to see the full article before clicking</li>
|
||||
<li>The bookmarklet sends the visible page content directly to ReadLater</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -13,14 +13,30 @@ export interface ExtractedArticle {
|
||||
}
|
||||
|
||||
export async function extractArticle(url: string): Promise<ExtractedArticle> {
|
||||
// Fetch the page
|
||||
// Fetch the page with browser-like headers to avoid bot detection
|
||||
const response = await fetch(url, {
|
||||
headers: {
|
||||
"User-Agent": "Mozilla/5.0 (compatible; ReadLater/1.0)",
|
||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Cache-Control": "no-cache",
|
||||
"Pragma": "no-cache",
|
||||
"Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
|
||||
"Sec-Ch-Ua-Mobile": "?0",
|
||||
"Sec-Ch-Ua-Platform": '"macOS"',
|
||||
"Sec-Fetch-Dest": "document",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Sec-Fetch-Site": "none",
|
||||
"Sec-Fetch-User": "?1",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
if (response.status === 403) {
|
||||
throw new Error(`This site blocks automated access (403 Forbidden). Try using the bookmarklet from the article page instead - it can capture content your browser can see.`);
|
||||
}
|
||||
throw new Error(`Failed to fetch: ${response.status} ${response.statusText}`);
|
||||
}
|
||||
|
||||
@@ -60,3 +76,45 @@ export async function extractArticle(url: string): Promise<ExtractedArticle> {
|
||||
wordCount,
|
||||
};
|
||||
}
|
||||
|
||||
// Extract article from provided HTML content (for bookmarklet with content capture)
|
||||
export async function extractFromHtml(
|
||||
html: string,
|
||||
url: string,
|
||||
fallbackTitle?: string
|
||||
): Promise<ExtractedArticle> {
|
||||
const dom = new JSDOM(html, { url });
|
||||
const document = dom.window.document;
|
||||
|
||||
// Extract using Readability
|
||||
const reader = new Readability(document);
|
||||
const article = reader.parse();
|
||||
|
||||
if (!article) {
|
||||
throw new Error("Could not extract article content from provided HTML");
|
||||
}
|
||||
|
||||
// Try to find lead image
|
||||
let leadImage: string | null = null;
|
||||
const ogImage = document.querySelector('meta[property="og:image"]');
|
||||
if (ogImage) {
|
||||
leadImage = ogImage.getAttribute("content");
|
||||
}
|
||||
|
||||
const textContent = article.textContent || "";
|
||||
const content = article.content || "";
|
||||
|
||||
// Calculate word count
|
||||
const wordCount = textContent.split(/\s+/).filter(Boolean).length;
|
||||
|
||||
return {
|
||||
title: article.title || fallbackTitle || "Untitled",
|
||||
author: article.byline || null,
|
||||
siteName: article.siteName || new URL(url).hostname,
|
||||
excerpt: article.excerpt || null,
|
||||
content,
|
||||
textContent,
|
||||
leadImage,
|
||||
wordCount,
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user