openclaw-backups/automations/ai-newsletter-digest/digest.py

#!/usr/bin/env python3
"""
AI Newsletter Digest
Consolidates AI-related newsletters into a single, deduplicated summary
"""
import sys
import os
import json
import subprocess
from pathlib import Path
from datetime import datetime
import re

# Add skills to path
WORKSPACE = Path(__file__).parent.parent.parent
EMAIL_SKILL = WORKSPACE / "skills" / "imap-smtp-email"

# Newsletter keywords to look for in sender/subject
AI_KEYWORDS = [
    'ai', 'artificial intelligence', 'machine learning', 'ml', 'llm',
    'gpt', 'claude', 'openai', 'anthropic', 'deepmind', 'neural',
    'chatgpt', 'transformer', 'diffusion', 'generative', 'newsletter'
]

def log(msg):
    """Log to stderr"""
    print(msg, file=sys.stderr)

def is_ai_newsletter(from_addr, subject):
    """Check if email is likely an AI newsletter"""
    text = f"{from_addr} {subject}".lower()
    return any(keyword in text for keyword in AI_KEYWORDS)

def fetch_unread_emails(limit=50):
    """Fetch unread emails using the IMAP skill"""
    log("📧 Fetching unread emails...")

    cmd = [
        "python3",
        str(EMAIL_SKILL / "scripts" / "imap-py.py"),
        "search",
        "--unseen",
        "--limit", str(limit)
    ]

    result = subprocess.run(cmd, capture_output=True, text=True)

    if result.returncode != 0:
        log(f"Error fetching emails: {result.stderr}")
        return []

    # Parse the output
    emails = []
    lines = result.stdout.strip().split('\n')

    current_email = {}
    for line in lines:
        line = line.strip()

        if line.startswith('●') or line.startswith(' '):
            # Start of new email
            if 'UID:' in line:
                if current_email:
                    emails.append(current_email)
                uid = line.split('UID:')[1].strip().split()[0]
                current_email = {'uid': uid}
        elif line.startswith('From:'):
            current_email['from'] = line.replace('From:', '').strip()
        elif line.startswith('Subject:'):
            current_email['subject'] = line.replace('Subject:', '').strip()
        elif line.startswith('Date:'):
            current_email['date'] = line.replace('Date:', '').strip()

    if current_email:
        emails.append(current_email)

    return emails

def fetch_email_body(uid):
    """Fetch full email body"""
    log(f"  Fetching email {uid}...")

    cmd = [
        "python3",
        str(EMAIL_SKILL / "scripts" / "imap-py.py"),
        "fetch",
        uid
    ]

    result = subprocess.run(cmd, capture_output=True, text=True)

    if result.returncode != 0:
        return None

    # Extract body (everything after the separator line)
    parts = result.stdout.split('-' * 80)
    if len(parts) > 1:
        return parts[1].strip()

    return result.stdout.strip()

def extract_key_points(newsletters):
    """Use LLM to extract and deduplicate key points"""
    log("🧠 Analyzing newsletters and extracting key points...")

    # Prepare newsletter content for analysis
    newsletter_texts = []
    for i, newsletter in enumerate(newsletters, 1):
        text = f"NEWSLETTER {i} - {newsletter['from']}\nSubject: {newsletter['subject']}\n\n{newsletter['body'][:3000]}"
        newsletter_texts.append(text)

    combined = "\n\n" + "="*80 + "\n\n".join(newsletter_texts)

    # Prompt for LLM
    prompt = f"""You are analyzing {len(newsletters)} AI-related newsletters. Extract the key information and insights, removing duplicates and synthesizing similar news across sources.

{combined}

Please provide:
1. **Top AI News** - The most important developments mentioned (deduplicated)
2. **Product Launches** - New tools, models, or features announced
3. **Research Highlights** - Notable papers or breakthroughs
4. **Industry Trends** - Patterns or themes across multiple newsletters
5. **Notable Quotes** - Interesting perspectives from thought leaders

Format as markdown with clear sections. Be concise but informative. If the same news appears in multiple newsletters, mention it once and note it's widely covered."""

    # Call LLM via openclaw (assuming this is running within openclaw context)
    # For now, create a temporary prompt file
    prompt_file = Path("/tmp/digest_prompt.txt")
    prompt_file.write_text(prompt)

    log("  Generating digest with LLM...")
    log("  (This may take a moment...)")

    # Return a placeholder for now - in production this would call the LLM
    # The agent running this will provide LLM access
    return {
        'prompt': prompt,
        'needs_llm': True
    }

def create_digest(newsletters):
    """Create the final digest"""
    if not newsletters:
        return "No AI newsletters found in unread emails."

    # For now, return structured data that the agent can analyze
    digest = {
        'count': len(newsletters),
        'sources': [n['from'] for n in newsletters],
        'newsletters': newsletters
    }

    return digest

def main():
    log("🤖 AI Newsletter Digest Generator")
    log("=" * 60)

    # Fetch unread emails
    emails = fetch_unread_emails(limit=50)

    # Filter for AI newsletters
    log(f"📊 Found {len(emails)} unread emails")
    ai_newsletters = []

    for email in emails:
        if is_ai_newsletter(email.get('from', ''), email.get('subject', '')):
            ai_newsletters.append(email)

    log(f"🎯 Found {len(ai_newsletters)} AI-related newsletters")

    if not ai_newsletters:
        print(json.dumps({'status': 'no_newsletters', 'message': 'No AI newsletters found'}))
        return

    # Fetch full content for each
    log("📖 Fetching full newsletter content...")
    for newsletter in ai_newsletters[:10]:  # Limit to 10 to avoid overwhelming
        body = fetch_email_body(newsletter['uid'])
        if body:
            newsletter['body'] = body

    # Filter out ones without body
    ai_newsletters = [n for n in ai_newsletters if 'body' in n]

    log(f"✅ Successfully fetched {len(ai_newsletters)} newsletters")

    # Output structured data for the agent to analyze
    result = {
        'status': 'success',
        'count': len(ai_newsletters),
        'sources': list(set([n['from'] for n in ai_newsletters])),
        'newsletters': ai_newsletters[:10]  # Limit to 10
    }

    print(json.dumps(result, indent=2))

    # Log summary to stderr
    log("\n📋 Newsletter Sources:")
    for source in result['sources']:
        log(f"  • {source}")

if __name__ == '__main__':
    main()