207 lines
6.4 KiB
Python
207 lines
6.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
AI Newsletter Digest
|
|
Consolidates AI-related newsletters into a single, deduplicated summary
|
|
"""
|
|
import sys
|
|
import os
|
|
import json
|
|
import subprocess
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
import re
|
|
|
|
# Add skills to path
|
|
WORKSPACE = Path(__file__).parent.parent.parent
|
|
EMAIL_SKILL = WORKSPACE / "skills" / "imap-smtp-email"
|
|
|
|
# Newsletter keywords to look for in sender/subject
|
|
AI_KEYWORDS = [
|
|
'ai', 'artificial intelligence', 'machine learning', 'ml', 'llm',
|
|
'gpt', 'claude', 'openai', 'anthropic', 'deepmind', 'neural',
|
|
'chatgpt', 'transformer', 'diffusion', 'generative', 'newsletter'
|
|
]
|
|
|
|
def log(msg):
|
|
"""Log to stderr"""
|
|
print(msg, file=sys.stderr)
|
|
|
|
def is_ai_newsletter(from_addr, subject):
|
|
"""Check if email is likely an AI newsletter"""
|
|
text = f"{from_addr} {subject}".lower()
|
|
return any(keyword in text for keyword in AI_KEYWORDS)
|
|
|
|
def fetch_unread_emails(limit=50):
|
|
"""Fetch unread emails using the IMAP skill"""
|
|
log("📧 Fetching unread emails...")
|
|
|
|
cmd = [
|
|
"python3",
|
|
str(EMAIL_SKILL / "scripts" / "imap-py.py"),
|
|
"search",
|
|
"--unseen",
|
|
"--limit", str(limit)
|
|
]
|
|
|
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
|
|
if result.returncode != 0:
|
|
log(f"Error fetching emails: {result.stderr}")
|
|
return []
|
|
|
|
# Parse the output
|
|
emails = []
|
|
lines = result.stdout.strip().split('\n')
|
|
|
|
current_email = {}
|
|
for line in lines:
|
|
line = line.strip()
|
|
|
|
if line.startswith('●') or line.startswith(' '):
|
|
# Start of new email
|
|
if 'UID:' in line:
|
|
if current_email:
|
|
emails.append(current_email)
|
|
uid = line.split('UID:')[1].strip().split()[0]
|
|
current_email = {'uid': uid}
|
|
elif line.startswith('From:'):
|
|
current_email['from'] = line.replace('From:', '').strip()
|
|
elif line.startswith('Subject:'):
|
|
current_email['subject'] = line.replace('Subject:', '').strip()
|
|
elif line.startswith('Date:'):
|
|
current_email['date'] = line.replace('Date:', '').strip()
|
|
|
|
if current_email:
|
|
emails.append(current_email)
|
|
|
|
return emails
|
|
|
|
def fetch_email_body(uid):
|
|
"""Fetch full email body"""
|
|
log(f" Fetching email {uid}...")
|
|
|
|
cmd = [
|
|
"python3",
|
|
str(EMAIL_SKILL / "scripts" / "imap-py.py"),
|
|
"fetch",
|
|
uid
|
|
]
|
|
|
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
|
|
if result.returncode != 0:
|
|
return None
|
|
|
|
# Extract body (everything after the separator line)
|
|
parts = result.stdout.split('-' * 80)
|
|
if len(parts) > 1:
|
|
return parts[1].strip()
|
|
|
|
return result.stdout.strip()
|
|
|
|
def extract_key_points(newsletters):
|
|
"""Use LLM to extract and deduplicate key points"""
|
|
log("🧠 Analyzing newsletters and extracting key points...")
|
|
|
|
# Prepare newsletter content for analysis
|
|
newsletter_texts = []
|
|
for i, newsletter in enumerate(newsletters, 1):
|
|
text = f"NEWSLETTER {i} - {newsletter['from']}\nSubject: {newsletter['subject']}\n\n{newsletter['body'][:3000]}"
|
|
newsletter_texts.append(text)
|
|
|
|
combined = "\n\n" + "="*80 + "\n\n".join(newsletter_texts)
|
|
|
|
# Prompt for LLM
|
|
prompt = f"""You are analyzing {len(newsletters)} AI-related newsletters. Extract the key information and insights, removing duplicates and synthesizing similar news across sources.
|
|
|
|
{combined}
|
|
|
|
Please provide:
|
|
1. **Top AI News** - The most important developments mentioned (deduplicated)
|
|
2. **Product Launches** - New tools, models, or features announced
|
|
3. **Research Highlights** - Notable papers or breakthroughs
|
|
4. **Industry Trends** - Patterns or themes across multiple newsletters
|
|
5. **Notable Quotes** - Interesting perspectives from thought leaders
|
|
|
|
Format as markdown with clear sections. Be concise but informative. If the same news appears in multiple newsletters, mention it once and note it's widely covered."""
|
|
|
|
# Call LLM via openclaw (assuming this is running within openclaw context)
|
|
# For now, create a temporary prompt file
|
|
prompt_file = Path("/tmp/digest_prompt.txt")
|
|
prompt_file.write_text(prompt)
|
|
|
|
log(" Generating digest with LLM...")
|
|
log(" (This may take a moment...)")
|
|
|
|
# Return a placeholder for now - in production this would call the LLM
|
|
# The agent running this will provide LLM access
|
|
return {
|
|
'prompt': prompt,
|
|
'needs_llm': True
|
|
}
|
|
|
|
def create_digest(newsletters):
|
|
"""Create the final digest"""
|
|
if not newsletters:
|
|
return "No AI newsletters found in unread emails."
|
|
|
|
# For now, return structured data that the agent can analyze
|
|
digest = {
|
|
'count': len(newsletters),
|
|
'sources': [n['from'] for n in newsletters],
|
|
'newsletters': newsletters
|
|
}
|
|
|
|
return digest
|
|
|
|
def main():
|
|
log("🤖 AI Newsletter Digest Generator")
|
|
log("=" * 60)
|
|
|
|
# Fetch unread emails
|
|
emails = fetch_unread_emails(limit=50)
|
|
|
|
# Filter for AI newsletters
|
|
log(f"📊 Found {len(emails)} unread emails")
|
|
ai_newsletters = []
|
|
|
|
for email in emails:
|
|
if is_ai_newsletter(email.get('from', ''), email.get('subject', '')):
|
|
ai_newsletters.append(email)
|
|
|
|
log(f"🎯 Found {len(ai_newsletters)} AI-related newsletters")
|
|
|
|
if not ai_newsletters:
|
|
print(json.dumps({'status': 'no_newsletters', 'message': 'No AI newsletters found'}))
|
|
return
|
|
|
|
# Fetch full content for each
|
|
log("📖 Fetching full newsletter content...")
|
|
for newsletter in ai_newsletters[:10]: # Limit to 10 to avoid overwhelming
|
|
body = fetch_email_body(newsletter['uid'])
|
|
if body:
|
|
newsletter['body'] = body
|
|
|
|
# Filter out ones without body
|
|
ai_newsletters = [n for n in ai_newsletters if 'body' in n]
|
|
|
|
log(f"✅ Successfully fetched {len(ai_newsletters)} newsletters")
|
|
|
|
# Output structured data for the agent to analyze
|
|
result = {
|
|
'status': 'success',
|
|
'count': len(ai_newsletters),
|
|
'sources': list(set([n['from'] for n in ai_newsletters])),
|
|
'newsletters': ai_newsletters[:10] # Limit to 10
|
|
}
|
|
|
|
print(json.dumps(result, indent=2))
|
|
|
|
# Log summary to stderr
|
|
log("\n📋 Newsletter Sources:")
|
|
for source in result['sources']:
|
|
log(f" • {source}")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|