Files
openclaw-backups/automations/ai-newsletter-digest/digest.py
2026-02-17 18:00:21 +00:00

207 lines
6.4 KiB
Python

#!/usr/bin/env python3
"""
AI Newsletter Digest
Consolidates AI-related newsletters into a single, deduplicated summary
"""
import sys
import os
import json
import subprocess
from pathlib import Path
from datetime import datetime
import re
# Add skills to path
WORKSPACE = Path(__file__).parent.parent.parent
EMAIL_SKILL = WORKSPACE / "skills" / "imap-smtp-email"
# Newsletter keywords to look for in sender/subject
AI_KEYWORDS = [
'ai', 'artificial intelligence', 'machine learning', 'ml', 'llm',
'gpt', 'claude', 'openai', 'anthropic', 'deepmind', 'neural',
'chatgpt', 'transformer', 'diffusion', 'generative', 'newsletter'
]
def log(msg):
"""Log to stderr"""
print(msg, file=sys.stderr)
def is_ai_newsletter(from_addr, subject):
"""Check if email is likely an AI newsletter"""
text = f"{from_addr} {subject}".lower()
return any(keyword in text for keyword in AI_KEYWORDS)
def fetch_unread_emails(limit=50):
"""Fetch unread emails using the IMAP skill"""
log("📧 Fetching unread emails...")
cmd = [
"python3",
str(EMAIL_SKILL / "scripts" / "imap-py.py"),
"search",
"--unseen",
"--limit", str(limit)
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
log(f"Error fetching emails: {result.stderr}")
return []
# Parse the output
emails = []
lines = result.stdout.strip().split('\n')
current_email = {}
for line in lines:
line = line.strip()
if line.startswith('') or line.startswith(' '):
# Start of new email
if 'UID:' in line:
if current_email:
emails.append(current_email)
uid = line.split('UID:')[1].strip().split()[0]
current_email = {'uid': uid}
elif line.startswith('From:'):
current_email['from'] = line.replace('From:', '').strip()
elif line.startswith('Subject:'):
current_email['subject'] = line.replace('Subject:', '').strip()
elif line.startswith('Date:'):
current_email['date'] = line.replace('Date:', '').strip()
if current_email:
emails.append(current_email)
return emails
def fetch_email_body(uid):
"""Fetch full email body"""
log(f" Fetching email {uid}...")
cmd = [
"python3",
str(EMAIL_SKILL / "scripts" / "imap-py.py"),
"fetch",
uid
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
return None
# Extract body (everything after the separator line)
parts = result.stdout.split('-' * 80)
if len(parts) > 1:
return parts[1].strip()
return result.stdout.strip()
def extract_key_points(newsletters):
"""Use LLM to extract and deduplicate key points"""
log("🧠 Analyzing newsletters and extracting key points...")
# Prepare newsletter content for analysis
newsletter_texts = []
for i, newsletter in enumerate(newsletters, 1):
text = f"NEWSLETTER {i} - {newsletter['from']}\nSubject: {newsletter['subject']}\n\n{newsletter['body'][:3000]}"
newsletter_texts.append(text)
combined = "\n\n" + "="*80 + "\n\n".join(newsletter_texts)
# Prompt for LLM
prompt = f"""You are analyzing {len(newsletters)} AI-related newsletters. Extract the key information and insights, removing duplicates and synthesizing similar news across sources.
{combined}
Please provide:
1. **Top AI News** - The most important developments mentioned (deduplicated)
2. **Product Launches** - New tools, models, or features announced
3. **Research Highlights** - Notable papers or breakthroughs
4. **Industry Trends** - Patterns or themes across multiple newsletters
5. **Notable Quotes** - Interesting perspectives from thought leaders
Format as markdown with clear sections. Be concise but informative. If the same news appears in multiple newsletters, mention it once and note it's widely covered."""
# Call LLM via openclaw (assuming this is running within openclaw context)
# For now, create a temporary prompt file
prompt_file = Path("/tmp/digest_prompt.txt")
prompt_file.write_text(prompt)
log(" Generating digest with LLM...")
log(" (This may take a moment...)")
# Return a placeholder for now - in production this would call the LLM
# The agent running this will provide LLM access
return {
'prompt': prompt,
'needs_llm': True
}
def create_digest(newsletters):
"""Create the final digest"""
if not newsletters:
return "No AI newsletters found in unread emails."
# For now, return structured data that the agent can analyze
digest = {
'count': len(newsletters),
'sources': [n['from'] for n in newsletters],
'newsletters': newsletters
}
return digest
def main():
log("🤖 AI Newsletter Digest Generator")
log("=" * 60)
# Fetch unread emails
emails = fetch_unread_emails(limit=50)
# Filter for AI newsletters
log(f"📊 Found {len(emails)} unread emails")
ai_newsletters = []
for email in emails:
if is_ai_newsletter(email.get('from', ''), email.get('subject', '')):
ai_newsletters.append(email)
log(f"🎯 Found {len(ai_newsletters)} AI-related newsletters")
if not ai_newsletters:
print(json.dumps({'status': 'no_newsletters', 'message': 'No AI newsletters found'}))
return
# Fetch full content for each
log("📖 Fetching full newsletter content...")
for newsletter in ai_newsletters[:10]: # Limit to 10 to avoid overwhelming
body = fetch_email_body(newsletter['uid'])
if body:
newsletter['body'] = body
# Filter out ones without body
ai_newsletters = [n for n in ai_newsletters if 'body' in n]
log(f"✅ Successfully fetched {len(ai_newsletters)} newsletters")
# Output structured data for the agent to analyze
result = {
'status': 'success',
'count': len(ai_newsletters),
'sources': list(set([n['from'] for n in ai_newsletters])),
'newsletters': ai_newsletters[:10] # Limit to 10
}
print(json.dumps(result, indent=2))
# Log summary to stderr
log("\n📋 Newsletter Sources:")
for source in result['sources']:
log(f"{source}")
if __name__ == '__main__':
main()