AI Newsletter Digest improvements: fixed QP soft line break decoding, URL extraction, and content cleaning

This commit is contained in:
Krilly
2026-03-04 13:29:22 +00:00
parent 29a98137a7
commit 57dd294675
13706 changed files with 2114953 additions and 237629 deletions

View File

@@ -0,0 +1,28 @@
import fs from 'node:fs';
import path from 'node:path';
import dotenv from 'dotenv';
dotenv.config();
export const ROOT = process.cwd();
export const STATE_DIR = path.join(ROOT, 'state');
export const LOG_DIR = path.join(ROOT, 'logs');
export const SESSION_FILE = path.join(STATE_DIR, 'x-session.json');
if (!fs.existsSync(STATE_DIR)) fs.mkdirSync(STATE_DIR, { recursive: true });
if (!fs.existsSync(LOG_DIR)) fs.mkdirSync(LOG_DIR, { recursive: true });
export const ENABLE_POSTING = String(process.env.ENABLE_POSTING || 'false').toLowerCase() === 'true';
export const X_QUERY = process.env.X_QUERY || '(AI OR OpenClaw) lang:en -is:retweet';
export const MAX_TWEETS = Number(process.env.MAX_TWEETS || 10);
export const USER_AGENT = process.env.USER_AGENT;
export function nowStamp() {
return new Date().toISOString();
}
export function writeJsonLog(name, data) {
const file = path.join(LOG_DIR, `${name}-${Date.now()}.json`);
fs.writeFileSync(file, JSON.stringify(data, null, 2));
return file;
}

View File

@@ -0,0 +1,65 @@
import fs from 'node:fs';
import { chromium } from 'playwright';
import {
SESSION_FILE,
USER_AGENT,
X_QUERY,
MAX_TWEETS,
nowStamp,
writeJsonLog
} from './common.js';
const hasSession = fs.existsSync(SESSION_FILE);
if (!hasSession) {
console.error('No session found. Run: npm run login');
process.exit(1);
}
const browser = await chromium.launch({ headless: true });
const context = await browser.newContext({
storageState: SESSION_FILE,
userAgent: USER_AGENT || undefined,
viewport: { width: 1366, height: 900 }
});
const page = await context.newPage();
const url = `https://x.com/search?q=${encodeURIComponent(X_QUERY)}&src=typed_query&f=live`;
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 60000 });
await page.waitForTimeout(2500);
const tweets = await page.evaluate((maxTweets) => {
const out = [];
const articles = Array.from(document.querySelectorAll('article'));
for (const a of articles) {
if (out.length >= maxTweets) break;
const textNode = a.querySelector('[data-testid="tweetText"]');
const userNode = a.querySelector('a[role="link"][href*="/"]');
const timeNode = a.querySelector('time');
const linkNode = a.querySelector('a[href*="/status/"]');
const text = textNode?.innerText?.trim();
const user = userNode?.getAttribute('href') || null;
const when = timeNode?.getAttribute('datetime') || null;
const link = linkNode ? `https://x.com${linkNode.getAttribute('href')}` : null;
if (text) out.push({ text, user, when, link });
}
return out;
}, MAX_TWEETS);
const payload = {
ts: nowStamp(),
query: X_QUERY,
count: tweets.length,
tweets
};
const file = writeJsonLog('x-fetch', payload);
console.log(JSON.stringify(payload, null, 2));
console.log(`\nSaved log: ${file}`);
await browser.close();

View File

@@ -0,0 +1,26 @@
import fs from 'node:fs';
import { chromium } from 'playwright';
import { SESSION_FILE, USER_AGENT } from './common.js';
console.log('Starting X login capture...');
console.log('A browser will open. Log in manually, then press ENTER here to save session.');
const browser = await chromium.launch({ headless: false });
const context = await browser.newContext({
userAgent: USER_AGENT || undefined,
viewport: { width: 1366, height: 900 }
});
const page = await context.newPage();
await page.goto('https://x.com/i/flow/login', { waitUntil: 'domcontentloaded' });
process.stdin.resume();
await new Promise((resolve) => {
process.stdout.write('\nPress ENTER after login completes in browser...\n');
process.stdin.once('data', () => resolve());
});
await context.storageState({ path: SESSION_FILE });
console.log(`Saved session to ${SESSION_FILE}`);
await browser.close();
process.exit(0);

View File

@@ -0,0 +1,45 @@
import fs from 'node:fs';
import { chromium } from 'playwright';
import { ENABLE_POSTING, SESSION_FILE, USER_AGENT } from './common.js';
const text = process.argv.slice(2).join(' ').trim();
if (!text) {
console.error('Usage: npm run post -- "your post text"');
process.exit(1);
}
if (!ENABLE_POSTING) {
console.error('Posting is disabled. Set ENABLE_POSTING=true in .env to enable.');
process.exit(1);
}
if (!fs.existsSync(SESSION_FILE)) {
console.error('No session found. Run: npm run login');
process.exit(1);
}
const browser = await chromium.launch({ headless: true });
const context = await browser.newContext({
storageState: SESSION_FILE,
userAgent: USER_AGENT || undefined,
viewport: { width: 1366, height: 900 }
});
const page = await context.newPage();
await page.goto('https://x.com/compose/post', { waitUntil: 'domcontentloaded', timeout: 60000 });
await page.waitForSelector('[data-testid="tweetTextarea_0"]', { timeout: 20000 });
await page.fill('[data-testid="tweetTextarea_0"]', text);
// Safety: we require explicit --confirm to actually click Post
const confirmed = process.argv.includes('--confirm');
if (!confirmed) {
console.log('Draft prepared, not posted. Re-run with --confirm to publish.');
await browser.close();
process.exit(0);
}
await page.click('[data-testid="tweetButtonInline"]');
await page.waitForTimeout(2000);
console.log('Post submitted.');
await browser.close();