mirror of
https://github.com/Tony0410/readlater.git
synced 2026-05-25 06:11:40 +08:00
Compare commits
4 Commits
61e1ac4d81
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9e7e2cc199 | ||
|
|
c6a400a04d | ||
|
|
96ece66204 | ||
|
|
8151705b17 |
1
drizzle/0002_modern_white_tiger.sql
Normal file
1
drizzle/0002_modern_white_tiger.sql
Normal file
@@ -0,0 +1 @@
|
||||
ALTER TABLE `articles` ADD `published_at` integer;
|
||||
566
drizzle/meta/0002_snapshot.json
Normal file
566
drizzle/meta/0002_snapshot.json
Normal file
@@ -0,0 +1,566 @@
|
||||
{
|
||||
"version": "6",
|
||||
"dialect": "sqlite",
|
||||
"id": "2817f3e4-6ce5-4d64-80e2-fb5fa10c8fa2",
|
||||
"prevId": "d3369a08-d474-468e-a003-df32d5f2c61d",
|
||||
"tables": {
|
||||
"api_keys": {
|
||||
"name": "api_keys",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"name": {
|
||||
"name": "name",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"key": {
|
||||
"name": "key",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"last_used": {
|
||||
"name": "last_used",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"api_keys_key_unique": {
|
||||
"name": "api_keys_key_unique",
|
||||
"columns": [
|
||||
"key"
|
||||
],
|
||||
"isUnique": true
|
||||
}
|
||||
},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"articles": {
|
||||
"name": "articles",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"url": {
|
||||
"name": "url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"title": {
|
||||
"name": "title",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"author": {
|
||||
"name": "author",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"site_name": {
|
||||
"name": "site_name",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"excerpt": {
|
||||
"name": "excerpt",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"content": {
|
||||
"name": "content",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"text_content": {
|
||||
"name": "text_content",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"lead_image": {
|
||||
"name": "lead_image",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"word_count": {
|
||||
"name": "word_count",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"reading_progress": {
|
||||
"name": "reading_progress",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"reading_time_seconds": {
|
||||
"name": "reading_time_seconds",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"is_favorite": {
|
||||
"name": "is_favorite",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": false
|
||||
},
|
||||
"is_archived": {
|
||||
"name": "is_archived",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": false
|
||||
},
|
||||
"folder_id": {
|
||||
"name": "folder_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"tags": {
|
||||
"name": "tags",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": "'[]'"
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"updated_at": {
|
||||
"name": "updated_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"read_at": {
|
||||
"name": "read_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"finished_at": {
|
||||
"name": "finished_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"published_at": {
|
||||
"name": "published_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"email_config": {
|
||||
"name": "email_config",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"inbox_email": {
|
||||
"name": "inbox_email",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"is_active": {
|
||||
"name": "is_active",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": true
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"email_config_inbox_email_unique": {
|
||||
"name": "email_config_inbox_email_unique",
|
||||
"columns": [
|
||||
"inbox_email"
|
||||
],
|
||||
"isUnique": true
|
||||
}
|
||||
},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"folders": {
|
||||
"name": "folders",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"name": {
|
||||
"name": "name",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"color": {
|
||||
"name": "color",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": "'#3b82f6'"
|
||||
},
|
||||
"icon": {
|
||||
"name": "icon",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": "'folder'"
|
||||
},
|
||||
"parent_id": {
|
||||
"name": "parent_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"sort_order": {
|
||||
"name": "sort_order",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"highlights": {
|
||||
"name": "highlights",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"article_id": {
|
||||
"name": "article_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"text": {
|
||||
"name": "text",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"note": {
|
||||
"name": "note",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"color": {
|
||||
"name": "color",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": "'#fbbf24'"
|
||||
},
|
||||
"start_offset": {
|
||||
"name": "start_offset",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"end_offset": {
|
||||
"name": "end_offset",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"reading_goals": {
|
||||
"name": "reading_goals",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"type": {
|
||||
"name": "type",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"metric": {
|
||||
"name": "metric",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"target": {
|
||||
"name": "target",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"is_active": {
|
||||
"name": "is_active",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": true
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"reading_stats": {
|
||||
"name": "reading_stats",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"date": {
|
||||
"name": "date",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"articles_read": {
|
||||
"name": "articles_read",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"articles_added": {
|
||||
"name": "articles_added",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"words_read": {
|
||||
"name": "words_read",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"time_spent_seconds": {
|
||||
"name": "time_spent_seconds",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"streak": {
|
||||
"name": "streak",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"reading_stats_date_unique": {
|
||||
"name": "reading_stats_date_unique",
|
||||
"columns": [
|
||||
"date"
|
||||
],
|
||||
"isUnique": true
|
||||
}
|
||||
},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"settings": {
|
||||
"name": "settings",
|
||||
"columns": {
|
||||
"key": {
|
||||
"name": "key",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"value": {
|
||||
"name": "value",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"updated_at": {
|
||||
"name": "updated_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
}
|
||||
},
|
||||
"views": {},
|
||||
"enums": {},
|
||||
"_meta": {
|
||||
"schemas": {},
|
||||
"tables": {},
|
||||
"columns": {}
|
||||
},
|
||||
"internal": {
|
||||
"indexes": {}
|
||||
}
|
||||
}
|
||||
@@ -15,6 +15,13 @@
|
||||
"when": 1768638242044,
|
||||
"tag": "0001_watery_the_santerians",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 2,
|
||||
"version": "6",
|
||||
"when": 1769236358306,
|
||||
"tag": "0002_modern_white_tiger",
|
||||
"breakpoints": true
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -30,6 +30,7 @@ export async function GET(request: NextRequest) {
|
||||
updatedAt: schema.articles.updatedAt,
|
||||
readAt: schema.articles.readAt,
|
||||
finishedAt: schema.articles.finishedAt,
|
||||
publishedAt: schema.articles.publishedAt,
|
||||
};
|
||||
|
||||
let query = db.select(listFields).from(schema.articles);
|
||||
@@ -137,6 +138,7 @@ export async function POST(request: NextRequest) {
|
||||
textContent: extracted.textContent,
|
||||
leadImage: extracted.leadImage,
|
||||
wordCount: extracted.wordCount,
|
||||
publishedAt: extracted.publishedAt,
|
||||
};
|
||||
|
||||
await db.insert(schema.articles).values(newArticle);
|
||||
|
||||
@@ -125,6 +125,7 @@ export async function GET(request: NextRequest) {
|
||||
textContent: extracted.textContent,
|
||||
leadImage: extracted.leadImage,
|
||||
wordCount: extracted.wordCount,
|
||||
publishedAt: extracted.publishedAt,
|
||||
};
|
||||
|
||||
await db.insert(schema.articles).values(newArticle);
|
||||
@@ -261,6 +262,7 @@ export async function POST(request: NextRequest) {
|
||||
textContent: extracted.textContent,
|
||||
leadImage: extracted.leadImage,
|
||||
wordCount: extracted.wordCount,
|
||||
publishedAt: extracted.publishedAt,
|
||||
};
|
||||
|
||||
await db.insert(schema.articles).values(newArticle);
|
||||
|
||||
@@ -72,6 +72,7 @@ export async function POST(request: NextRequest) {
|
||||
textContent: extracted.textContent,
|
||||
leadImage: extracted.leadImage,
|
||||
wordCount: extracted.wordCount,
|
||||
publishedAt: extracted.publishedAt,
|
||||
tags: tags ? JSON.stringify(tags) : "[]",
|
||||
folderId: folderId || null,
|
||||
};
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
import { Article } from "@/lib/types";
|
||||
import { Star, Archive, Trash2, ExternalLink, Clock, CheckSquare, Square } from "lucide-react";
|
||||
import { formatDistanceToNow } from "@/lib/utils/date";
|
||||
import { formatDistanceToNow, formatDate } from "@/lib/utils/date";
|
||||
|
||||
interface ArticleListProps {
|
||||
articles: Article[];
|
||||
@@ -106,7 +106,10 @@ export function ArticleList({
|
||||
<Clock className="w-3 h-3" />
|
||||
{Math.ceil(article.wordCount / 200)} min read
|
||||
</span>
|
||||
<span>{formatDistanceToNow(article.createdAt)}</span>
|
||||
{article.publishedAt && (
|
||||
<span title="Published date">{formatDate(article.publishedAt)}</span>
|
||||
)}
|
||||
<span title="Added to library">{formatDistanceToNow(article.createdAt)}</span>
|
||||
{article.readingProgress > 0 && article.readingProgress < 100 && (
|
||||
<span>{article.readingProgress}% read</span>
|
||||
)}
|
||||
|
||||
@@ -2,7 +2,8 @@
|
||||
|
||||
import { useState, useEffect } from "react";
|
||||
import { Article, ReaderSettings } from "@/lib/types";
|
||||
import { ArrowLeft, Star, Archive, Trash2, Settings, ExternalLink } from "lucide-react";
|
||||
import { ArrowLeft, Star, Archive, Trash2, Settings, ExternalLink, Calendar } from "lucide-react";
|
||||
import { formatDate } from "@/lib/utils/date";
|
||||
|
||||
interface ReaderProps {
|
||||
article: Article;
|
||||
@@ -141,6 +142,12 @@ export function Reader({
|
||||
<div className="flex flex-wrap items-center gap-x-4 gap-y-2 text-[var(--muted)] text-sm">
|
||||
{article.siteName && <span>{article.siteName}</span>}
|
||||
{article.author && <span>By {article.author}</span>}
|
||||
{article.publishedAt && (
|
||||
<span className="flex items-center gap-1">
|
||||
<Calendar className="w-3.5 h-3.5" />
|
||||
{formatDate(article.publishedAt)}
|
||||
</span>
|
||||
)}
|
||||
<span>{Math.ceil(article.wordCount / 200)} min read</span>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
@@ -42,6 +42,7 @@ export const articles = sqliteTable("articles", {
|
||||
updatedAt: integer("updated_at", { mode: "timestamp" }).$defaultFn(() => new Date()),
|
||||
readAt: integer("read_at", { mode: "timestamp" }),
|
||||
finishedAt: integer("finished_at", { mode: "timestamp" }), // When reading was completed
|
||||
publishedAt: integer("published_at", { mode: "timestamp" }), // Original article publish date
|
||||
});
|
||||
|
||||
// Highlights and notes
|
||||
|
||||
@@ -19,6 +19,7 @@ export interface Article {
|
||||
updatedAt: string;
|
||||
readAt: string | null;
|
||||
finishedAt: string | null;
|
||||
publishedAt: string | null;
|
||||
}
|
||||
|
||||
export interface Folder {
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
export function formatDate(date: string | Date): string {
|
||||
const d = new Date(date);
|
||||
return d.toLocaleDateString("en-US", {
|
||||
month: "short",
|
||||
day: "numeric",
|
||||
year: "numeric",
|
||||
});
|
||||
}
|
||||
|
||||
export function formatDistanceToNow(date: string | Date): string {
|
||||
const d = new Date(date);
|
||||
const now = new Date();
|
||||
|
||||
@@ -1,5 +1,28 @@
|
||||
import { Readability } from "@mozilla/readability";
|
||||
import { JSDOM } from "jsdom";
|
||||
import { JSDOM, VirtualConsole } from "jsdom";
|
||||
|
||||
// Create a virtual console that suppresses CSS parsing errors
|
||||
// JSDOM has issues with modern CSS (variables, etc.) that don't affect Readability
|
||||
function createVirtualConsole() {
|
||||
const virtualConsole = new VirtualConsole();
|
||||
virtualConsole.on("error", () => {
|
||||
// Suppress CSS parsing errors
|
||||
});
|
||||
virtualConsole.on("warn", () => {
|
||||
// Suppress warnings
|
||||
});
|
||||
return virtualConsole;
|
||||
}
|
||||
|
||||
// Strip style tags and inline styles from HTML to prevent JSDOM CSS parsing errors
|
||||
// Readability doesn't need CSS - it only needs the DOM structure
|
||||
function stripStyles(html: string): string {
|
||||
// Remove <style> tags and their contents
|
||||
let cleaned = html.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "");
|
||||
// Remove style attributes (but keep the rest of the tag)
|
||||
cleaned = cleaned.replace(/\s+style\s*=\s*["'][^"']*["']/gi, "");
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
export interface ExtractedArticle {
|
||||
title: string;
|
||||
@@ -10,11 +33,58 @@ export interface ExtractedArticle {
|
||||
textContent: string;
|
||||
leadImage: string | null;
|
||||
wordCount: number;
|
||||
publishedAt: Date | null;
|
||||
}
|
||||
|
||||
// Try to extract actual article URL from Google News redirect page
|
||||
function extractGoogleNewsUrl(html: string): string | null {
|
||||
// Google News embeds the real URL in various ways
|
||||
// Look for data-n-au attribute (article URL)
|
||||
const dataMatch = html.match(/data-n-au="([^"]+)"/);
|
||||
if (dataMatch) return decodeURIComponent(dataMatch[1]);
|
||||
|
||||
// Look for canonical link
|
||||
const canonicalMatch = html.match(/<link[^>]+rel=["']canonical["'][^>]+href=["']([^"']+)["']/i);
|
||||
if (canonicalMatch && !canonicalMatch[1].includes('news.google.com')) {
|
||||
return canonicalMatch[1];
|
||||
}
|
||||
|
||||
// Look for og:url that's not Google News
|
||||
const ogMatch = html.match(/<meta[^>]+property=["']og:url["'][^>]+content=["']([^"']+)["']/i);
|
||||
if (ogMatch && !ogMatch[1].includes('news.google.com')) {
|
||||
return ogMatch[1];
|
||||
}
|
||||
|
||||
// Look for article link in jsdata or similar
|
||||
const jsMatch = html.match(/jsdata="[^"]*https?:\/\/(?!news\.google\.com)[^"&\s]+/);
|
||||
if (jsMatch) {
|
||||
const urlMatch = jsMatch[0].match(/https?:\/\/[^"&\s]+/);
|
||||
if (urlMatch) return urlMatch[0];
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export async function extractArticle(url: string): Promise<ExtractedArticle> {
|
||||
// Resolve shortened/redirect URLs first
|
||||
let resolvedUrl = url;
|
||||
|
||||
// Follow redirects to get final URL
|
||||
try {
|
||||
const headResponse = await fetch(url, {
|
||||
method: 'HEAD',
|
||||
redirect: 'follow',
|
||||
headers: {
|
||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
},
|
||||
});
|
||||
resolvedUrl = headResponse.url;
|
||||
} catch {
|
||||
// If HEAD fails, continue with original URL
|
||||
}
|
||||
|
||||
// Fetch the page with browser-like headers to avoid bot detection
|
||||
const response = await fetch(url, {
|
||||
const response = await fetch(resolvedUrl, {
|
||||
headers: {
|
||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
||||
@@ -34,14 +104,51 @@ export async function extractArticle(url: string): Promise<ExtractedArticle> {
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
if (response.status === 403) {
|
||||
throw new Error(`This site blocks automated access (403 Forbidden). Try using the bookmarklet from the article page instead - it can capture content your browser can see.`);
|
||||
// On 403/blocked, return minimal article with just URL info
|
||||
if (response.status === 403 || response.status === 401) {
|
||||
const hostname = new URL(url).hostname.replace(/^www\./, "");
|
||||
return {
|
||||
title: `Article from ${hostname}`,
|
||||
author: null,
|
||||
siteName: hostname,
|
||||
excerpt: "This site blocked automated access. Use 'Open original' to read, or the Content Capture bookmarklet to save the full article.",
|
||||
content: `<p>This site blocked automated access. <a href="${url}" target="_blank">Open original article</a> to read.</p><p>Tip: Use the Content Capture bookmarklet from the article page to save the full content.</p>`,
|
||||
textContent: "This site blocked automated access. Open original article to read.",
|
||||
leadImage: null,
|
||||
wordCount: 0,
|
||||
publishedAt: null,
|
||||
};
|
||||
}
|
||||
throw new Error(`Failed to fetch: ${response.status} ${response.statusText}`);
|
||||
}
|
||||
|
||||
const html = await response.text();
|
||||
const dom = new JSDOM(html, { url });
|
||||
let html = await response.text();
|
||||
let finalUrl = resolvedUrl;
|
||||
|
||||
// Check if we landed on Google News - need to extract actual article URL
|
||||
if (resolvedUrl.includes('news.google.com')) {
|
||||
const realUrl = extractGoogleNewsUrl(html);
|
||||
if (realUrl) {
|
||||
// Fetch the actual article
|
||||
const articleResponse = await fetch(realUrl, {
|
||||
headers: {
|
||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
},
|
||||
});
|
||||
if (articleResponse.ok) {
|
||||
html = await articleResponse.text();
|
||||
finalUrl = realUrl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const cleanedHtml = stripStyles(html);
|
||||
const dom = new JSDOM(cleanedHtml, {
|
||||
url: finalUrl,
|
||||
virtualConsole: createVirtualConsole(),
|
||||
});
|
||||
const document = dom.window.document;
|
||||
|
||||
// Extract using Readability
|
||||
@@ -59,6 +166,34 @@ export async function extractArticle(url: string): Promise<ExtractedArticle> {
|
||||
leadImage = ogImage.getAttribute("content");
|
||||
}
|
||||
|
||||
// Try to find publish date from various meta tags
|
||||
let publishedAt: Date | null = null;
|
||||
const dateSelectors = [
|
||||
'meta[property="article:published_time"]',
|
||||
'meta[name="article:published_time"]',
|
||||
'meta[property="og:published_time"]',
|
||||
'meta[name="pubdate"]',
|
||||
'meta[name="publishdate"]',
|
||||
'meta[name="date"]',
|
||||
'meta[itemprop="datePublished"]',
|
||||
'time[datetime]',
|
||||
'time[pubdate]',
|
||||
];
|
||||
|
||||
for (const selector of dateSelectors) {
|
||||
const el = document.querySelector(selector);
|
||||
if (el) {
|
||||
const dateStr = el.getAttribute("content") || el.getAttribute("datetime");
|
||||
if (dateStr) {
|
||||
const parsed = new Date(dateStr);
|
||||
if (!isNaN(parsed.getTime())) {
|
||||
publishedAt = parsed;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const textContent = article.textContent || "";
|
||||
const content = article.content || "";
|
||||
|
||||
@@ -68,12 +203,13 @@ export async function extractArticle(url: string): Promise<ExtractedArticle> {
|
||||
return {
|
||||
title: article.title || "Untitled",
|
||||
author: article.byline || null,
|
||||
siteName: article.siteName || new URL(url).hostname,
|
||||
siteName: article.siteName || new URL(finalUrl).hostname,
|
||||
excerpt: article.excerpt || null,
|
||||
content,
|
||||
textContent,
|
||||
leadImage,
|
||||
wordCount,
|
||||
publishedAt,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -83,7 +219,11 @@ export async function extractFromHtml(
|
||||
url: string,
|
||||
fallbackTitle?: string
|
||||
): Promise<ExtractedArticle> {
|
||||
const dom = new JSDOM(html, { url });
|
||||
const cleanedHtml = stripStyles(html);
|
||||
const dom = new JSDOM(cleanedHtml, {
|
||||
url,
|
||||
virtualConsole: createVirtualConsole(),
|
||||
});
|
||||
const document = dom.window.document;
|
||||
|
||||
// Extract using Readability
|
||||
@@ -101,6 +241,34 @@ export async function extractFromHtml(
|
||||
leadImage = ogImage.getAttribute("content");
|
||||
}
|
||||
|
||||
// Try to find publish date from various meta tags
|
||||
let publishedAt: Date | null = null;
|
||||
const dateSelectors = [
|
||||
'meta[property="article:published_time"]',
|
||||
'meta[name="article:published_time"]',
|
||||
'meta[property="og:published_time"]',
|
||||
'meta[name="pubdate"]',
|
||||
'meta[name="publishdate"]',
|
||||
'meta[name="date"]',
|
||||
'meta[itemprop="datePublished"]',
|
||||
'time[datetime]',
|
||||
'time[pubdate]',
|
||||
];
|
||||
|
||||
for (const selector of dateSelectors) {
|
||||
const el = document.querySelector(selector);
|
||||
if (el) {
|
||||
const dateStr = el.getAttribute("content") || el.getAttribute("datetime");
|
||||
if (dateStr) {
|
||||
const parsed = new Date(dateStr);
|
||||
if (!isNaN(parsed.getTime())) {
|
||||
publishedAt = parsed;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const textContent = article.textContent || "";
|
||||
const content = article.content || "";
|
||||
|
||||
@@ -116,5 +284,6 @@ export async function extractFromHtml(
|
||||
textContent,
|
||||
leadImage,
|
||||
wordCount,
|
||||
publishedAt,
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user