AI Newsletter Digest improvements: fixed QP soft line break decoding, URL extraction, and content cleaning

This commit is contained in:
Krilly
2026-03-04 13:29:22 +00:00
parent 29a98137a7
commit 57dd294675
13706 changed files with 2114953 additions and 237629 deletions

View File

@@ -0,0 +1,214 @@
#!/usr/bin/env node
/**
* Check anthonymau@gmail.com for new AI-related emails
* Fetches full email content for summarization
*/
const Imap = require('imap');
const config = {
user: 'anthonymau@gmail.com',
password: 'wfux qjhw eqjo jswm',
host: 'imap.gmail.com',
port: 993,
tls: true,
tlsOptions: { rejectUnauthorized: false }
};
// AI newsletter patterns to match
const AI_PATTERNS = [
'ai valley',
'the rundown',
'ai secret',
'byte-sized',
'bytesized',
'myclaw',
'openai global',
'deep view',
'khoj',
'synthetic',
'openhands',
'artificial intelligence',
'ai newsletter',
'benevolent',
'tldr',
'benedict'
];
function isAINewsletter(from, subject) {
const text = (from + ' ' + subject).toLowerCase();
return AI_PATTERNS.some(pattern => text.includes(pattern.toLowerCase()));
}
const imap = new Imap(config);
imap.once('ready', () => {
imap.openBox('INBOX', false, (err, box) => {
if (err) {
console.log('STATUS:error');
console.log('ERROR:open_box:' + err.message);
imap.end();
process.exit(1);
}
imap.search(['ALL'], (err, results) => {
if (err) {
console.log('STATUS:error');
console.log('ERROR:search:' + err.message);
imap.end();
process.exit(1);
}
const total = results.length;
const recent = results.slice(-200); // Get last 200 emails (covers ~2 days for most inboxes)
const lastUid = recent.length > 0 ? recent[recent.length - 1] : 0;
console.log('STATUS:connected');
console.log('TOTAL:' + total);
console.log('LAST_UID:' + lastUid);
console.log('RECENT:' + recent.join(','));
if (recent.length > 0) {
// Fetch headers with UID
const fetch = imap.fetch(recent, {
bodies: 'HEADER.FIELDS (FROM SUBJECT DATE)',
struct: false,
uid: true
});
const aiEmails = [];
fetch.on('message', (msg) => {
let headers = '';
let uid = null;
msg.on('body', (stream, info) => {
stream.on('data', (chunk) => {
headers += chunk.toString('utf8');
});
});
msg.once('attributes', (attrs) => {
uid = attrs.uid;
});
msg.once('end', () => {
const fromMatch = headers.match(/From: (.+)/i);
const subjectMatch = headers.match(/Subject: (.+)/i);
const dateMatch = headers.match(/Date: (.+)/i);
const from = fromMatch ? fromMatch[1].trim() : '';
const subject = subjectMatch ? subjectMatch[1].trim() : '';
const dateStr = dateMatch ? dateMatch[1].trim() : '';
// Only include emails from the last 36 hours
let isRecent = true;
if (dateStr) {
try {
const emailDate = new Date(dateStr);
const cutoff = new Date(Date.now() - 36 * 60 * 60 * 1000);
isRecent = emailDate > cutoff;
} catch(e) {}
}
if (isRecent && isAINewsletter(from, subject)) {
aiEmails.push({ uid, from, subject });
}
});
});
fetch.once('end', () => {
console.log('AI_COUNT:' + aiEmails.length);
if (aiEmails.length === 0) {
imap.end();
process.exit(0);
}
// Get UIDs for body fetch
const aiUids = aiEmails.map(e => e.uid).filter(u => u);
if (aiUids.length === 0) {
imap.end();
process.exit(0);
}
// Fetch full body for AI emails
const bodyFetch = imap.fetch(aiUids, {
bodies: ['1'],
struct: false,
uid: true
});
let processed = 0;
bodyFetch.on('message', (msg, seqno) => {
let body = '';
let uid = null;
msg.on('body', (stream) => {
stream.on('data', (chunk) => {
body += chunk.toString('utf8');
});
});
msg.once('attributes', (attrs) => {
uid = attrs.uid;
});
msg.once('end', () => {
processed++;
// Find matching email metadata
const meta = aiEmails.find(e => e.uid === uid) || aiEmails[processed - 1];
// Clean content
let content = body;
try {
// Decode quoted-printable soft line breaks FIRST
content = content.replace(/=\r?\n/g, '');
// Decode quoted-printable hex codes
content = content.replace(/=([0-9A-F]{2})/gi, (m, p) =>
String.fromCharCode(parseInt(p, 16)));
// Strip HTML
content = content.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
} catch (e) {}
const truncated = content.substring(0, 3000).trim();
console.log(`AI_EMAIL:${meta.from} | ${meta.subject}`);
console.log(`AI_CONTENT:${truncated}`);
if (processed === aiUids.length) {
imap.end();
process.exit(0);
}
});
});
bodyFetch.once('error', (err) => {
console.log('ERROR:body_fetch:' + err.message);
imap.end();
process.exit(1);
});
});
} else {
console.log('AI_COUNT:0');
imap.end();
process.exit(0);
}
});
});
});
imap.once('error', (err) => {
console.log('STATUS:error');
console.log('ERROR:connect:' + err.message);
process.exit(1);
});
imap.once('end', () => {
process.exit(0);
});
imap.connect();

View File

@@ -0,0 +1,214 @@
#!/usr/bin/env node
/**
* Check anthonymau@gmail.com for new AI-related emails
* Fetches full email content for summarization
*/
const Imap = require('imap');
const config = {
user: 'anthonymau@gmail.com',
password: 'wfux qjhw eqjo jswm',
host: 'imap.gmail.com',
port: 993,
tls: true,
tlsOptions: { rejectUnauthorized: false }
};
// AI newsletter patterns to match
const AI_PATTERNS = [
'ai valley',
'the rundown',
'ai secret',
'byte-sized',
'bytesized',
'myclaw',
'openai global',
'deep view',
'khoj',
'synthetic',
'openhands',
'artificial intelligence',
'ai newsletter',
'benevolent',
'tldr',
'benedict'
];
function isAINewsletter(from, subject) {
const text = (from + ' ' + subject).toLowerCase();
return AI_PATTERNS.some(pattern => text.includes(pattern.toLowerCase()));
}
const imap = new Imap(config);
imap.once('ready', () => {
imap.openBox('INBOX', false, (err, box) => {
if (err) {
console.log('STATUS:error');
console.log('ERROR:open_box:' + err.message);
imap.end();
process.exit(1);
}
imap.search(['ALL'], (err, results) => {
if (err) {
console.log('STATUS:error');
console.log('ERROR:search:' + err.message);
imap.end();
process.exit(1);
}
const total = results.length;
const recent = results.slice(-200); // Get last 200 emails (covers ~2 days for most inboxes)
const lastUid = recent.length > 0 ? recent[recent.length - 1] : 0;
console.log('STATUS:connected');
console.log('TOTAL:' + total);
console.log('LAST_UID:' + lastUid);
console.log('RECENT:' + recent.join(','));
if (recent.length > 0) {
// Fetch headers with UID
const fetch = imap.fetch(recent, {
bodies: 'HEADER.FIELDS (FROM SUBJECT DATE)',
struct: false,
uid: true
});
const aiEmails = [];
fetch.on('message', (msg) => {
let headers = '';
let uid = null;
msg.on('body', (stream, info) => {
stream.on('data', (chunk) => {
headers += chunk.toString('utf8');
});
});
msg.once('attributes', (attrs) => {
uid = attrs.uid;
});
msg.once('end', () => {
const fromMatch = headers.match(/From: (.+)/i);
const subjectMatch = headers.match(/Subject: (.+)/i);
const dateMatch = headers.match(/Date: (.+)/i);
const from = fromMatch ? fromMatch[1].trim() : '';
const subject = subjectMatch ? subjectMatch[1].trim() : '';
const dateStr = dateMatch ? dateMatch[1].trim() : '';
// Only include emails from the last 36 hours
let isRecent = true;
if (dateStr) {
try {
const emailDate = new Date(dateStr);
const cutoff = new Date(Date.now() - 36 * 60 * 60 * 1000);
isRecent = emailDate > cutoff;
} catch(e) {}
}
if (isRecent && isAINewsletter(from, subject)) {
aiEmails.push({ uid, from, subject });
}
});
});
fetch.once('end', () => {
console.log('AI_COUNT:' + aiEmails.length);
if (aiEmails.length === 0) {
imap.end();
process.exit(0);
}
// Get UIDs for body fetch
const aiUids = aiEmails.map(e => e.uid).filter(u => u);
if (aiUids.length === 0) {
imap.end();
process.exit(0);
}
// Fetch full body for AI emails
const bodyFetch = imap.fetch(aiUids, {
bodies: ['1'],
struct: false,
uid: true
});
let processed = 0;
bodyFetch.on('message', (msg, seqno) => {
let body = '';
let uid = null;
msg.on('body', (stream) => {
stream.on('data', (chunk) => {
body += chunk.toString('utf8');
});
});
msg.once('attributes', (attrs) => {
uid = attrs.uid;
});
msg.once('end', () => {
processed++;
// Find matching email metadata
const meta = aiEmails.find(e => e.uid === uid) || aiEmails[processed - 1];
// Clean content
let content = body;
try {
// Decode quoted-printable soft line breaks FIRST
content = content.replace(/=\r?\n/g, '');
// Decode quoted-printable hex codes
content = content.replace(/=([0-9A-F]{2})/gi, (m, p) =>
String.fromCharCode(parseInt(p, 16)));
// Strip HTML
content = content.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
} catch (e) {}
const truncated = content.substring(0, 3000).trim();
console.log(`AI_EMAIL:${meta.from} | ${meta.subject}`);
console.log(`AI_CONTENT:${truncated}`);
if (processed === aiUids.length) {
imap.end();
process.exit(0);
}
});
});
bodyFetch.once('error', (err) => {
console.log('ERROR:body_fetch:' + err.message);
imap.end();
process.exit(1);
});
});
} else {
console.log('AI_COUNT:0');
imap.end();
process.exit(0);
}
});
});
});
imap.once('error', (err) => {
console.log('STATUS:error');
console.log('ERROR:connect:' + err.message);
process.exit(1);
});
imap.once('end', () => {
process.exit(0);
});
imap.connect();

View File

@@ -0,0 +1,219 @@
#!/usr/bin/env node
/**
* IMAP IDLE Monitor for krillyclaw@gmail.com
* Uses IMAP IDLE (RFC 2177) for real-time push notifications
*/
const path = require('path');
const https = require('https');
// Load imap modules (installed in this directory)
const Imap = require('imap');
const { simpleParser } = require('mailparser');
require('dotenv').config({ path: path.resolve(__dirname, '../.env.krillyclaw') });
const STATE_FILE = path.resolve(__dirname, '../../../workspace/memory/.krillyclaw-imap-state.json');
const GATEWAY_URL = 'http://127.0.0.1:18789/api/message/send';
let lastUid = 0;
function loadState() {
try {
const fs = require('fs');
if (fs.existsSync(STATE_FILE)) {
const state = JSON.parse(fs.readFileSync(STATE_FILE, 'utf8'));
lastUid = state.last_uid || 0;
}
} catch (e) {
console.error('Error loading state:', e.message);
}
}
function saveState() {
try {
const fs = require('fs');
fs.writeFileSync(STATE_FILE, JSON.stringify({ last_uid: lastUid, last_check: Date.now() }));
} catch (e) {
console.error('Error saving state:', e.message);
}
}
function sendAlert(subject, from) {
const message = `📬 **New email in krillyclaw@gmail.com**:\n\n• **${subject}**\n From: ${from}\n\n— Krilly 🦀`;
const data = JSON.stringify({
channel: 'telegram',
to: 'telegram:1793951355',
message: message
});
const http = require('http');
const req = http.request(GATEWAY_URL, {
method: 'POST',
headers: { 'Content-Type': 'application/json' }
}, (res) => {
console.log('Alert sent:', res.statusCode);
});
req.on('error', (e) => console.error('Alert failed:', e.message));
req.write(data);
req.end();
}
function createImap() {
return new Imap({
user: process.env.IMAP_USER,
password: process.env.IMAP_PASS,
host: process.env.IMAP_HOST,
port: parseInt(process.env.IMAP_PORT),
tls: process.env.IMAP_TLS === 'true',
tlsOptions: { rejectUnauthorized: false },
connTimeout: 60000,
authTimeout: 10000
});
}
function fetchNewMessages(imap, box) {
return new Promise((resolve, reject) => {
const searchCriteria = ['UNSEEN'];
const fetchOptions = { bodies: ['HEADER.FIELDS (FROM SUBJECT)'], struct: true };
imap.search(searchCriteria, (err, results) => {
if (err) {
reject(err);
return;
}
if (!results || results.length === 0) {
resolve([]);
return;
}
const fetch = imap.fetch(results, fetchOptions);
const messages = [];
fetch.on('message', (msg, seqno) => {
let header = {};
let uid = 0;
msg.on('body', (stream) => {
let buffer = '';
stream.on('data', (chunk) => buffer += chunk);
stream.on('end', () => {
header = Imap.parseHeader(buffer);
});
});
msg.once('attributes', (attrs) => {
uid = attrs.uid;
});
msg.once('end', () => {
messages.push({
uid,
subject: header.subject ? header.subject[0] : 'No subject',
from: header.from ? header.from[0] : 'Unknown'
});
});
});
fetch.once('error', reject);
fetch.once('end', () => resolve(messages));
});
});
}
async function monitor() {
loadState();
console.log(`[${new Date().toISOString()}] Starting IMAP IDLE monitor for ${process.env.IMAP_USER}`);
console.log(`[${new Date().toISOString()}] Last seen UID: ${lastUid}`);
const imap = createImap();
imap.once('ready', () => {
imap.openBox('INBOX', false, async (err, box) => {
if (err) {
console.error('Error opening inbox:', err);
return;
}
console.log(`[${new Date().toISOString()}] Connected to INBOX, watching for new emails...`);
// Fetch initial unread messages
try {
const messages = await fetchNewMessages(imap, box);
let newCount = 0;
for (const msg of messages) {
if (msg.uid > lastUid) {
newCount++;
lastUid = msg.uid;
console.log(`[${new Date().toISOString()}] New message: ${msg.subject} (UID: ${msg.uid})`);
sendAlert(msg.subject, msg.from);
}
}
if (newCount > 0) {
saveState();
}
} catch (e) {
console.error('Error fetching messages:', e);
}
// Set up IDLE mode
imap.on('mail', async (numNewMsgs) => {
console.log(`[${new Date().toISOString()}] ${numNewMsgs} new message(s) received`);
try {
const messages = await fetchNewMessages(imap, box);
for (const msg of messages) {
if (msg.uid > lastUid) {
lastUid = msg.uid;
console.log(`[${new Date().toISOString()}] New message: ${msg.subject} (UID: ${msg.uid})`);
sendAlert(msg.subject, msg.from);
saveState();
}
}
} catch (e) {
console.error('Error fetching new messages:', e);
}
});
imap.on('update', (seqno, info) => {
console.log(`[${new Date().toISOString()}] Update: seqno=${seqno}`);
});
imap.on('expunge', (seqno) => {
console.log(`[${new Date().toISOString()}] Expunge: seqno=${seqno}`);
});
});
});
imap.once('error', (err) => {
console.error(`[${new Date().toISOString()}] IMAP error:`, err.message);
// Reconnect after 30 seconds
setTimeout(monitor, 30000);
});
imap.once('end', () => {
console.log(`[${new Date().toISOString()}] Connection ended, reconnecting in 30s...`);
setTimeout(monitor, 30000);
});
imap.connect();
}
// Handle graceful shutdown
process.on('SIGINT', () => {
console.log('\nShutting down...');
process.exit(0);
});
process.on('SIGTERM', () => {
console.log('\nShutting down...');
process.exit(0);
});
monitor();