Auto backup: 2026-02-17 18:00
This commit is contained in:
206
automations/ai-newsletter-digest/digest.py
Normal file
206
automations/ai-newsletter-digest/digest.py
Normal file
@@ -0,0 +1,206 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
AI Newsletter Digest
|
||||
Consolidates AI-related newsletters into a single, deduplicated summary
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import re
|
||||
|
||||
# Add skills to path
|
||||
WORKSPACE = Path(__file__).parent.parent.parent
|
||||
EMAIL_SKILL = WORKSPACE / "skills" / "imap-smtp-email"
|
||||
|
||||
# Newsletter keywords to look for in sender/subject
|
||||
AI_KEYWORDS = [
|
||||
'ai', 'artificial intelligence', 'machine learning', 'ml', 'llm',
|
||||
'gpt', 'claude', 'openai', 'anthropic', 'deepmind', 'neural',
|
||||
'chatgpt', 'transformer', 'diffusion', 'generative', 'newsletter'
|
||||
]
|
||||
|
||||
def log(msg):
|
||||
"""Log to stderr"""
|
||||
print(msg, file=sys.stderr)
|
||||
|
||||
def is_ai_newsletter(from_addr, subject):
|
||||
"""Check if email is likely an AI newsletter"""
|
||||
text = f"{from_addr} {subject}".lower()
|
||||
return any(keyword in text for keyword in AI_KEYWORDS)
|
||||
|
||||
def fetch_unread_emails(limit=50):
|
||||
"""Fetch unread emails using the IMAP skill"""
|
||||
log("📧 Fetching unread emails...")
|
||||
|
||||
cmd = [
|
||||
"python3",
|
||||
str(EMAIL_SKILL / "scripts" / "imap-py.py"),
|
||||
"search",
|
||||
"--unseen",
|
||||
"--limit", str(limit)
|
||||
]
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
|
||||
if result.returncode != 0:
|
||||
log(f"Error fetching emails: {result.stderr}")
|
||||
return []
|
||||
|
||||
# Parse the output
|
||||
emails = []
|
||||
lines = result.stdout.strip().split('\n')
|
||||
|
||||
current_email = {}
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
|
||||
if line.startswith('●') or line.startswith(' '):
|
||||
# Start of new email
|
||||
if 'UID:' in line:
|
||||
if current_email:
|
||||
emails.append(current_email)
|
||||
uid = line.split('UID:')[1].strip().split()[0]
|
||||
current_email = {'uid': uid}
|
||||
elif line.startswith('From:'):
|
||||
current_email['from'] = line.replace('From:', '').strip()
|
||||
elif line.startswith('Subject:'):
|
||||
current_email['subject'] = line.replace('Subject:', '').strip()
|
||||
elif line.startswith('Date:'):
|
||||
current_email['date'] = line.replace('Date:', '').strip()
|
||||
|
||||
if current_email:
|
||||
emails.append(current_email)
|
||||
|
||||
return emails
|
||||
|
||||
def fetch_email_body(uid):
|
||||
"""Fetch full email body"""
|
||||
log(f" Fetching email {uid}...")
|
||||
|
||||
cmd = [
|
||||
"python3",
|
||||
str(EMAIL_SKILL / "scripts" / "imap-py.py"),
|
||||
"fetch",
|
||||
uid
|
||||
]
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
|
||||
if result.returncode != 0:
|
||||
return None
|
||||
|
||||
# Extract body (everything after the separator line)
|
||||
parts = result.stdout.split('-' * 80)
|
||||
if len(parts) > 1:
|
||||
return parts[1].strip()
|
||||
|
||||
return result.stdout.strip()
|
||||
|
||||
def extract_key_points(newsletters):
|
||||
"""Use LLM to extract and deduplicate key points"""
|
||||
log("🧠 Analyzing newsletters and extracting key points...")
|
||||
|
||||
# Prepare newsletter content for analysis
|
||||
newsletter_texts = []
|
||||
for i, newsletter in enumerate(newsletters, 1):
|
||||
text = f"NEWSLETTER {i} - {newsletter['from']}\nSubject: {newsletter['subject']}\n\n{newsletter['body'][:3000]}"
|
||||
newsletter_texts.append(text)
|
||||
|
||||
combined = "\n\n" + "="*80 + "\n\n".join(newsletter_texts)
|
||||
|
||||
# Prompt for LLM
|
||||
prompt = f"""You are analyzing {len(newsletters)} AI-related newsletters. Extract the key information and insights, removing duplicates and synthesizing similar news across sources.
|
||||
|
||||
{combined}
|
||||
|
||||
Please provide:
|
||||
1. **Top AI News** - The most important developments mentioned (deduplicated)
|
||||
2. **Product Launches** - New tools, models, or features announced
|
||||
3. **Research Highlights** - Notable papers or breakthroughs
|
||||
4. **Industry Trends** - Patterns or themes across multiple newsletters
|
||||
5. **Notable Quotes** - Interesting perspectives from thought leaders
|
||||
|
||||
Format as markdown with clear sections. Be concise but informative. If the same news appears in multiple newsletters, mention it once and note it's widely covered."""
|
||||
|
||||
# Call LLM via openclaw (assuming this is running within openclaw context)
|
||||
# For now, create a temporary prompt file
|
||||
prompt_file = Path("/tmp/digest_prompt.txt")
|
||||
prompt_file.write_text(prompt)
|
||||
|
||||
log(" Generating digest with LLM...")
|
||||
log(" (This may take a moment...)")
|
||||
|
||||
# Return a placeholder for now - in production this would call the LLM
|
||||
# The agent running this will provide LLM access
|
||||
return {
|
||||
'prompt': prompt,
|
||||
'needs_llm': True
|
||||
}
|
||||
|
||||
def create_digest(newsletters):
|
||||
"""Create the final digest"""
|
||||
if not newsletters:
|
||||
return "No AI newsletters found in unread emails."
|
||||
|
||||
# For now, return structured data that the agent can analyze
|
||||
digest = {
|
||||
'count': len(newsletters),
|
||||
'sources': [n['from'] for n in newsletters],
|
||||
'newsletters': newsletters
|
||||
}
|
||||
|
||||
return digest
|
||||
|
||||
def main():
|
||||
log("🤖 AI Newsletter Digest Generator")
|
||||
log("=" * 60)
|
||||
|
||||
# Fetch unread emails
|
||||
emails = fetch_unread_emails(limit=50)
|
||||
|
||||
# Filter for AI newsletters
|
||||
log(f"📊 Found {len(emails)} unread emails")
|
||||
ai_newsletters = []
|
||||
|
||||
for email in emails:
|
||||
if is_ai_newsletter(email.get('from', ''), email.get('subject', '')):
|
||||
ai_newsletters.append(email)
|
||||
|
||||
log(f"🎯 Found {len(ai_newsletters)} AI-related newsletters")
|
||||
|
||||
if not ai_newsletters:
|
||||
print(json.dumps({'status': 'no_newsletters', 'message': 'No AI newsletters found'}))
|
||||
return
|
||||
|
||||
# Fetch full content for each
|
||||
log("📖 Fetching full newsletter content...")
|
||||
for newsletter in ai_newsletters[:10]: # Limit to 10 to avoid overwhelming
|
||||
body = fetch_email_body(newsletter['uid'])
|
||||
if body:
|
||||
newsletter['body'] = body
|
||||
|
||||
# Filter out ones without body
|
||||
ai_newsletters = [n for n in ai_newsletters if 'body' in n]
|
||||
|
||||
log(f"✅ Successfully fetched {len(ai_newsletters)} newsletters")
|
||||
|
||||
# Output structured data for the agent to analyze
|
||||
result = {
|
||||
'status': 'success',
|
||||
'count': len(ai_newsletters),
|
||||
'sources': list(set([n['from'] for n in ai_newsletters])),
|
||||
'newsletters': ai_newsletters[:10] # Limit to 10
|
||||
}
|
||||
|
||||
print(json.dumps(result, indent=2))
|
||||
|
||||
# Log summary to stderr
|
||||
log("\n📋 Newsletter Sources:")
|
||||
for source in result['sources']:
|
||||
log(f" • {source}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user