#!/usr/bin/env python3 """ Enhanced Content Aggregation for OpenClaw Daily Digest Features: topic tags, read time, trending detection, color-coded sources, LLM summaries """ import json import sys import os import re import hashlib from datetime import datetime, timedelta from typing import List, Dict, Any, Tuple sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'sources')) from reddit_fetcher import fetch_reddit_content from news_fetcher import fetch_news_content # Topic detection keywords TOPIC_KEYWORDS = { 'AI/LLMs': ['llm', 'gpt', 'claude', 'openai', 'anthropic', 'model', 'training', 'inference', 'token', 'embedding', 'fine-tune', 'rag'], 'Coding': ['code', 'github', 'programming', 'developer', 'api', 'python', 'javascript', 'typescript', 'rust', 'go'], 'Home Automation': ['home assistant', 'smart home', 'automation', 'zigbee', 'z-wave', 'mqtt', 'iot', 'sensor'], 'Self-Hosting': ['self-host', 'homelab', 'server', 'docker', 'kubernetes', 'proxmox', 'nas', 'selfhosted'], 'Hardware': ['gpu', 'nvidia', 'amd', 'cpu', 'ram', 'ssd', 'raspberry pi', 'arduino', 'esp32'], 'Privacy': ['privacy', 'security', 'encryption', 'vpn', 'tor', 'self-hosted', 'data protection'], 'OpenClaw': ['openclaw', 'claw', 'mcp', 'agent', 'skill', 'clawhub'], } # User's top topics (will be updated based on clicks over time) TOP_TOPICS = ['AI/LLMs', 'OpenClaw', 'Coding', 'Home Automation'] def detect_topics(title: str, excerpt: str = '') -> List[str]: """Detect topics from title and excerpt""" text = f"{title} {excerpt}".lower() topics = [] for topic, keywords in TOPIC_KEYWORDS.items(): if any(kw in text for kw in keywords): topics.append(topic) return topics[:3] # Max 3 topics def get_topic_color(topic: str) -> str: """Get color for topic tag""" colors = { 'AI/LLMs': '#00d2ff', 'Coding': '#a29bfe', 'Home Automation': '#20b47a', 'Self-Hosting': '#ff9f43', 'Hardware': '#ff6b6b', 'Privacy': '#fd79a8', 'OpenClaw': '#ee5a24', } return colors.get(topic, '#74b9ff') def estimate_read_time(url: str, excerpt: str = '') -> str: """Estimate read time based on content type""" # Default 3 min for most content minutes = 3 # Adjust based on excerpt length if excerpt: word_count = len(excerpt.split()) if word_count > 500: minutes = 8 elif word_count > 200: minutes = 5 elif word_count < 50: minutes = 2 # Check URL patterns for known quick reads if any(domain in url.lower() for domain in ['github.com', 'gist.github.com']): minutes = max(2, minutes - 1) # GitHub tends to be code-heavy elif 'youtube.com' in url.lower() or 'youtu.be' in url.lower(): minutes = 10 # Videos take longer return f"{minutes} min read" def is_trending(story: Dict) -> bool: """Detect if a story is trending (high engagement ratio)""" comments = story.get('num_comments', 0) score = story.get('score') or story.get('points', 0) if score == 0: return False # High comment-to-score ratio = hot discussion ratio = comments / score if score > 0 else 0 # Trending if: # - Score > 50 AND comments > 20 AND ratio > 0.3 (lots of discussion) # OR score > 200 (just popular) return (score > 50 and comments > 20 and ratio > 0.3) or score > 200 def get_trending_emoji(story: Dict) -> str: """Get appropriate trending indicator""" score = story.get('score') or story.get('points', 0) comments = story.get('num_comments', 0) if score > 500 or comments > 100: return "๐Ÿ”ฅ๐Ÿ”ฅ" # Very hot elif is_trending(story): return "๐Ÿ”ฅ" # Trending return "" def generate_quick_reply_links(story: Dict, story_id: str) -> str: """Generate quick action links for Telegram/Discord""" url = story.get('url', '') title = story.get('title', '')[:50] # Create deep links for quick actions # These would need corresponding bot handlers summarize_link = f"https://t.me/openclaw_bot?start=summarize_{story_id}" save_link = f"https://t.me/openclaw_bot?start=save_{story_id}" return f'''
๐Ÿ“ Summarize ๐Ÿ’พ Save
''' def deduplicate_stories(items: List[Dict]) -> List[Dict]: """Remove duplicate stories based on URL similarity""" seen_urls = set() unique = [] for item in items: url = item.get('url', '').lower().split('?')[0] if url in seen_urls: continue # Title similarity check title = item.get('title', '').lower() is_duplicate = False for existing in unique: existing_title = existing.get('title', '').lower() title_words = set(title.split()) existing_words = set(existing_title.split()) if title_words and existing_words: overlap = len(title_words & existing_words) / max(len(title_words), len(existing_words)) if overlap > 0.8: is_duplicate = True break if not is_duplicate: seen_urls.add(url) unique.append(item) return unique def score_relevance(item: Dict) -> float: """Score story relevance with topic boost""" score = 0.0 # Base engagement if 'score' in item and 'num_comments' in item: score += item.get('score', 0) * 0.5 score += item.get('num_comments', 0) * 1.5 score += item.get('upvote_ratio', 0.5) * 50 elif 'points' in item: score += item.get('points', 0) * 1.0 score += item.get('num_comments', 0) * 2.0 else: score = 50.0 # Boost for high engagement if item.get('num_comments', 0) > 50 or item.get('points', 0) > 100: score += 100 # Boost for user's top topics topics = detect_topics(item.get('title', ''), item.get('selftext', '')[:200]) for topic in topics: if topic in TOP_TOPICS: score += 50 # Significant boost for preferred topics return score def format_topic_tags(topics: List[str]) -> str: """Format topic tags as styled badges""" if not topics: return "" tags = [] for topic in topics: color = get_topic_color(topic) is_top = topic in TOP_TOPICS border = f"border:1px solid {color};" if is_top else "" star = "โ˜… " if is_top else "" tags.append(f'{star}{topic}') return f'

{"".join(tags)}

' def format_reddit_story(story: Dict, include_quick_actions: bool = False) -> str: """Format Reddit story with all enhancements""" # Detect topics excerpt = story.get('selftext', '')[:200] topics = detect_topics(story.get('title', ''), excerpt) topic_html = format_topic_tags(topics) # Read time read_time = estimate_read_time(story.get('url', ''), excerpt) # Trending indicator trending = get_trending_emoji(story) # Engagement badges engagement = [] if story.get('score'): engagement.append(f"โ†‘ {story['score']}") if story.get('num_comments'): engagement.append(f"๐Ÿ’ฌ {story['num_comments']}") engagement.append(f"โฑ๏ธ {read_time}") # Title with flair flair = story.get('link_flair_text', '') title = story.get('title', '') if flair: title = f"[{flair}] {title}" # Story hash for quick actions story_hash = hashlib.md5(story.get('url', '').encode()).hexdigest()[:8] quick_actions = generate_quick_reply_links(story, story_hash) if include_quick_actions else "" # Trim excerpt if len(story.get('selftext', '')) > 200: excerpt += "..." excerpt_html = f"

{excerpt}

" if excerpt else "" trending_html = f"{trending}" if trending else "" return f'''
Reddit
{topic_html}

{trending_html}{title}

u/{story.get('author', 'unknown')}

{excerpt_html}

{' ยท '.join(engagement)}

{quick_actions}
''' def format_news_story(story: Dict, include_quick_actions: bool = False) -> str: """Format news story with all enhancements""" # Detect topics excerpt = story.get('summary', '')[:200] topics = detect_topics(story.get('title', ''), excerpt) topic_html = format_topic_tags(topics) # Read time read_time = estimate_read_time(story.get('url', ''), excerpt) # Trending indicator trending = get_trending_emoji(story) # Source styling source = story.get('source', 'News') tag_colors = { 'GitHub': ('#a29bfe', 'rgba(139,148,158,0.15)'), 'Hacker News': ('#ff9f43', 'rgba(255,102,0,0.15)'), } tag_color, tag_bg = tag_colors.get(source, ('#74b9ff', 'rgba(116,185,255,0.15)')) # Engagement engagement = [] if story.get('points'): engagement.append(f"โ†‘ {story['points']}") if story.get('num_comments'): engagement.append(f"๐Ÿ’ฌ {story['num_comments']}") engagement.append(f"โฑ๏ธ {read_time}") # Story hash for quick actions story_hash = hashlib.md5(story.get('url', '').encode()).hexdigest()[:8] quick_actions = generate_quick_reply_links(story, story_hash) if include_quick_actions else "" # Trim excerpt if len(story.get('summary', '')) > 200: excerpt += "..." excerpt_html = f"

{excerpt}

" if excerpt else "" trending_html = f"{trending}" if trending else "" return f'''
{source}
{topic_html}

{trending_html}{story.get('title', '')}

{excerpt_html}

{' ยท '.join(engagement)}

{quick_actions}
''' def format_top_topics_section() -> str: """Generate top topics summary for the email""" topic_badges = [] for topic in TOP_TOPICS[:4]: # Show top 4 color = get_topic_color(topic) topic_badges.append(f'โ˜… {topic}') return f'''

Your Top Topics

{''.join(topic_badges)}

''' def format_story_text(story: Dict) -> str: """Format a story for plain-text email""" lines = [f"๐Ÿ“Œ {story.get('title', '')}"] # Add topics topics = detect_topics(story.get('title', ''), story.get('selftext', '')[:100]) if topics: lines.append(f" Topics: {', '.join(topics)}") lines.append(f" Link: {story.get('url', '')}") if story.get('author'): lines.append(f" Author: {story.get('author')}") # Engagement stats stats = [] if story.get('score') or story.get('points'): stats.append(f"{story.get('score') or story.get('points', 0)} upvotes") if story.get('num_comments'): stats.append(f"{story.get('num_comments')} comments") stats.append(estimate_read_time(story.get('url', ''), story.get('selftext', ''))) if stats: lines.append(f" {' | '.join(stats)}") # Trending if is_trending(story): lines.append(" ๐Ÿ”ฅ TRENDING") excerpt = story.get('selftext', '') or story.get('summary', '') if excerpt: excerpt = excerpt[:150] + "..." if len(excerpt) > 150 else excerpt lines.append(f" {excerpt}") lines.append("") return "\n".join(lines) def aggregate_content(hours: int = 24) -> Dict[str, Any]: """Main aggregation function with enhanced features""" print(f"๐Ÿฆ€ Aggregating OpenClaw content from last {hours} hours...") print("=" * 50) # Fetch from all sources print("\n๐Ÿ“ฅ Fetching Reddit content...") reddit_data = fetch_reddit_content(hours=hours) print("\n๐Ÿ“ฅ Fetching news content...") news_data = fetch_news_content(hours=hours) # Twitter placeholder twitter_data = { "source": "twitter", "total_items": 0, "tweets": [], "note": "X/Twitter integration requires API setup" } # Combine all items all_items = [] all_items.extend([{**item, '_source': 'reddit'} for item in reddit_data.get('all_posts', [])]) all_items.extend([{**item, '_source': 'news'} for item in news_data.get('all_items', [])]) # Deduplicate print("\n๐Ÿงน Deduplicating stories...") unique_items = deduplicate_stories(all_items) print(f" Removed {len(all_items) - len(unique_items)} duplicates") # Sort by relevance (includes topic boosting) unique_items.sort(key=score_relevance, reverse=True) # Split back into sections reddit_top = [item for item in unique_items if item.get('_source') == 'reddit'][:8] news_top = [item for item in unique_items if item.get('_source') == 'news'][:8] # Count trending trending_count = sum(1 for item in unique_items if is_trending(item)) # Generate HTML sections top_topics_html = format_top_topics_section() reddit_html = '\n'.join([format_reddit_story(s, include_quick_actions=True) for s in reddit_top]) news_html = '\n'.join([format_news_story(s, include_quick_actions=True) for s in news_top]) twitter_html = '

๐Ÿšง X/Twitter integration coming soon

' # Generate text sections reddit_text = '\n'.join([format_story_text(s) for s in reddit_top]) if reddit_top else "No new Reddit posts today." news_text = '\n'.join([format_story_text(s) for s in news_top]) if news_top else "No new news articles today." twitter_text = "๐Ÿšง X/Twitter integration coming soon - requires API setup\n" # Build result result = { "meta": { "generated_at": datetime.utcnow().isoformat(), "time_window_hours": hours, "date": datetime.utcnow().strftime("%A, %B %d, %Y") }, "stats": { "reddit_count": reddit_data.get('total_posts', 0), "news_count": news_data.get('total_items', 0), "twitter_count": 0, "total_unique": len(unique_items), "trending_count": trending_count }, "content": { "reddit": reddit_data, "news": news_data, "twitter": twitter_data }, "formatted": { "top_topics_html": top_topics_html, "reddit_html": reddit_html, "news_html": news_html, "twitter_html": twitter_html, "reddit_text": reddit_text, "news_text": news_text, "twitter_text": twitter_text }, "user_preferences": { "top_topics": TOP_TOPICS } } print("\n" + "=" * 50) print(f"โœ… Aggregation complete!") print(f" Reddit posts: {result['stats']['reddit_count']}") print(f" News items: {result['stats']['news_count']}") print(f" Trending: {result['stats']['trending_count']}") print(f" Total unique: {result['stats']['total_unique']}") return result if __name__ == "__main__": hours = int(sys.argv[1]) if len(sys.argv) > 1 else 24 output_file = sys.argv[2] if len(sys.argv) > 2 else "/home/openclaw/.openclaw/workspace/automations/openclaw-digest/output/digest.json" result = aggregate_content(hours=hours) with open(output_file, 'w') as f: json.dump(result, f, indent=2) print(f"\n๐Ÿ“„ Output saved to: {output_file}")