#!/usr/bin/env python3 """ Enhanced Content Aggregation for OpenClaw Daily Digest Features: topic tags, read time, trending detection, color-coded sources, LLM summaries """ import json import sys import os import re import hashlib from datetime import datetime, timedelta from typing import List, Dict, Any, Tuple sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'sources')) from reddit_fetcher import fetch_reddit_content from news_fetcher import fetch_news_content # Topic detection keywords TOPIC_KEYWORDS = { 'AI/LLMs': ['llm', 'gpt', 'claude', 'openai', 'anthropic', 'model', 'training', 'inference', 'token', 'embedding', 'fine-tune', 'rag'], 'Coding': ['code', 'github', 'programming', 'developer', 'api', 'python', 'javascript', 'typescript', 'rust', 'go'], 'Home Automation': ['home assistant', 'smart home', 'automation', 'zigbee', 'z-wave', 'mqtt', 'iot', 'sensor'], 'Self-Hosting': ['self-host', 'homelab', 'server', 'docker', 'kubernetes', 'proxmox', 'nas', 'selfhosted'], 'Hardware': ['gpu', 'nvidia', 'amd', 'cpu', 'ram', 'ssd', 'raspberry pi', 'arduino', 'esp32'], 'Privacy': ['privacy', 'security', 'encryption', 'vpn', 'tor', 'self-hosted', 'data protection'], 'OpenClaw': ['openclaw', 'claw', 'mcp', 'agent', 'skill', 'clawhub'], } # User's top topics (will be updated based on clicks over time) TOP_TOPICS = ['AI/LLMs', 'OpenClaw', 'Coding', 'Home Automation'] def detect_topics(title: str, excerpt: str = '') -> List[str]: """Detect topics from title and excerpt""" text = f"{title} {excerpt}".lower() topics = [] for topic, keywords in TOPIC_KEYWORDS.items(): if any(kw in text for kw in keywords): topics.append(topic) return topics[:3] # Max 3 topics def get_topic_color(topic: str) -> str: """Get color for topic tag""" colors = { 'AI/LLMs': '#00d2ff', 'Coding': '#a29bfe', 'Home Automation': '#20b47a', 'Self-Hosting': '#ff9f43', 'Hardware': '#ff6b6b', 'Privacy': '#fd79a8', 'OpenClaw': '#ee5a24', } return colors.get(topic, '#74b9ff') def estimate_read_time(url: str, excerpt: str = '') -> str: """Estimate read time based on content type""" # Default 3 min for most content minutes = 3 # Adjust based on excerpt length if excerpt: word_count = len(excerpt.split()) if word_count > 500: minutes = 8 elif word_count > 200: minutes = 5 elif word_count < 50: minutes = 2 # Check URL patterns for known quick reads if any(domain in url.lower() for domain in ['github.com', 'gist.github.com']): minutes = max(2, minutes - 1) # GitHub tends to be code-heavy elif 'youtube.com' in url.lower() or 'youtu.be' in url.lower(): minutes = 10 # Videos take longer return f"{minutes} min read" def is_trending(story: Dict) -> bool: """Detect if a story is trending (high engagement ratio)""" comments = story.get('num_comments', 0) score = story.get('score') or story.get('points', 0) if score == 0: return False # High comment-to-score ratio = hot discussion ratio = comments / score if score > 0 else 0 # Trending if: # - Score > 50 AND comments > 20 AND ratio > 0.3 (lots of discussion) # OR score > 200 (just popular) return (score > 50 and comments > 20 and ratio > 0.3) or score > 200 def get_trending_emoji(story: Dict) -> str: """Get appropriate trending indicator""" score = story.get('score') or story.get('points', 0) comments = story.get('num_comments', 0) if score > 500 or comments > 100: return "๐ฅ๐ฅ" # Very hot elif is_trending(story): return "๐ฅ" # Trending return "" def generate_quick_reply_links(story: Dict, story_id: str) -> str: """Generate quick action links for Telegram/Discord""" url = story.get('url', '') title = story.get('title', '')[:50] # Create deep links for quick actions # These would need corresponding bot handlers summarize_link = f"https://t.me/openclaw_bot?start=summarize_{story_id}" save_link = f"https://t.me/openclaw_bot?start=save_{story_id}" return f'''
| ๐ Summarize | ๐พ Save |
{"".join(tags)}
' def format_reddit_story(story: Dict, include_quick_actions: bool = False) -> str: """Format Reddit story with all enhancements""" # Detect topics excerpt = story.get('selftext', '')[:200] topics = detect_topics(story.get('title', ''), excerpt) topic_html = format_topic_tags(topics) # Read time read_time = estimate_read_time(story.get('url', ''), excerpt) # Trending indicator trending = get_trending_emoji(story) # Engagement badges engagement = [] if story.get('score'): engagement.append(f"โ {story['score']}") if story.get('num_comments'): engagement.append(f"๐ฌ {story['num_comments']}") engagement.append(f"โฑ๏ธ {read_time}") # Title with flair flair = story.get('link_flair_text', '') title = story.get('title', '') if flair: title = f"[{flair}] {title}" # Story hash for quick actions story_hash = hashlib.md5(story.get('url', '').encode()).hexdigest()[:8] quick_actions = generate_quick_reply_links(story, story_hash) if include_quick_actions else "" # Trim excerpt if len(story.get('selftext', '')) > 200: excerpt += "..." excerpt_html = f"{excerpt}
" if excerpt else "" trending_html = f"{trending}" if trending else "" return f'''
{trending_html}{title}u/{story.get('author', 'unknown')} {excerpt_html}{' ยท '.join(engagement)} {quick_actions} |
{excerpt}
" if excerpt else "" trending_html = f"{trending}" if trending else "" return f'''
{trending_html}{story.get('title', '')}{excerpt_html}{' ยท '.join(engagement)} {quick_actions} |
|
Your Top Topics {''.join(topic_badges)} |
๐ง X/Twitter integration coming soon
' # Generate text sections reddit_text = '\n'.join([format_story_text(s) for s in reddit_top]) if reddit_top else "No new Reddit posts today." news_text = '\n'.join([format_story_text(s) for s in news_top]) if news_top else "No new news articles today." twitter_text = "๐ง X/Twitter integration coming soon - requires API setup\n" # Build result result = { "meta": { "generated_at": datetime.utcnow().isoformat(), "time_window_hours": hours, "date": datetime.utcnow().strftime("%A, %B %d, %Y") }, "stats": { "reddit_count": reddit_data.get('total_posts', 0), "news_count": news_data.get('total_items', 0), "twitter_count": 0, "total_unique": len(unique_items), "trending_count": trending_count }, "content": { "reddit": reddit_data, "news": news_data, "twitter": twitter_data }, "formatted": { "top_topics_html": top_topics_html, "reddit_html": reddit_html, "news_html": news_html, "twitter_html": twitter_html, "reddit_text": reddit_text, "news_text": news_text, "twitter_text": twitter_text }, "user_preferences": { "top_topics": TOP_TOPICS } } print("\n" + "=" * 50) print(f"โ Aggregation complete!") print(f" Reddit posts: {result['stats']['reddit_count']}") print(f" News items: {result['stats']['news_count']}") print(f" Trending: {result['stats']['trending_count']}") print(f" Total unique: {result['stats']['total_unique']}") return result if __name__ == "__main__": hours = int(sys.argv[1]) if len(sys.argv) > 1 else 24 output_file = sys.argv[2] if len(sys.argv) > 2 else "/home/openclaw/.openclaw/workspace/automations/openclaw-digest/output/digest.json" result = aggregate_content(hours=hours) with open(output_file, 'w') as f: json.dump(result, f, indent=2) print(f"\n๐ Output saved to: {output_file}")