#!/usr/bin/env python3 """ News Aggregation Fetcher for OpenClaw Daily Digest Fetches from GitHub releases, Hacker News, and tech news sources """ import requests import json import feedparser from datetime import datetime, timedelta from typing import List, Dict, Any # News sources configuration SOURCES = { "github_releases": { "url": "https://github.com/openclaw/openclaw/releases.atom", "type": "rss" }, "hn_search": { "url": "https://hn.algolia.com/api/v1/search", "type": "hackernews", "query": "openclaw" } } def fetch_github_releases() -> List[Dict[str, Any]]: """Fetch latest OpenClaw releases from GitHub Atom feed""" try: feed = feedparser.parse(SOURCES["github_releases"]["url"]) releases = [] cutoff = datetime.utcnow() - timedelta(hours=24) for entry in feed.entries[:5]: # Last 5 releases try: # Try parsed date first, fallback to string parsing if hasattr(entry, 'published_parsed') and entry.published_parsed: published = datetime(*entry.published_parsed[:6]) elif hasattr(entry, 'updated_parsed') and entry.updated_parsed: published = datetime(*entry.updated_parsed[:6]) else: # Parse ISO format string date_str = entry.get('published', entry.get('updated', '')) published = datetime.fromisoformat(date_str.replace('Z', '+00:00').replace('+00:00', '')) if published >= cutoff: releases.append({ "id": entry.id, "title": entry.title, "url": entry.link, "published": published.isoformat(), "summary": entry.get("summary", "")[:300] + "..." if len(entry.get("summary", "")) > 300 else entry.get("summary", ""), "source": "GitHub", "source_icon": "šŸ™", "category": "Release" }) except Exception as e: print(f" Skipping entry due to date parse error: {e}") continue return releases except Exception as e: print(f"Error fetching GitHub releases: {e}") return [] def fetch_hackernews() -> List[Dict[str, Any]]: """Fetch OpenClaw-related stories from Hacker News (last 24h)""" try: # Algolia HN search API - last 24 hours params = { "query": SOURCES["hn_search"]["query"], "tags": "story", "numericFilters": "created_at_i>" + str(int((datetime.utcnow() - timedelta(hours=24)).timestamp())) } response = requests.get(SOURCES["hn_search"]["url"], params=params, timeout=30) response.raise_for_status() data = response.json() stories = [] for hit in data.get("hits", [])[:10]: # Top 10 stories stories.append({ "id": hit.get("objectID"), "title": hit.get("title"), "url": hit.get("url") or f"https://news.ycombinator.com/item?id={hit.get('objectID')}", "hn_url": f"https://news.ycombinator.com/item?id={hit.get('objectID')}", "published": datetime.fromtimestamp(hit.get("created_at_i", 0)).isoformat(), "author": hit.get("author"), "points": hit.get("points", 0), "num_comments": hit.get("num_comments", 0), "summary": hit.get("story_text", "")[:200] + "..." if hit.get("story_text") else "", "source": "Hacker News", "source_icon": "🟠", "category": "Discussion" }) return stories except Exception as e: print(f"Error fetching Hacker News: {e}") return [] def fetch_google_news() -> List[Dict[str, Any]]: """Fetch OpenClaw news from Google News RSS""" try: # Google News RSS for OpenClaw url = "https://news.google.com/rss/search?q=OpenClaw+AI+agent&hl=en-US&gl=US&ceid=US:en" feed = feedparser.parse(url) news = [] cutoff = datetime.utcnow() - timedelta(hours=24) for entry in feed.entries[:10]: try: published = datetime(*entry.published_parsed[:6]) if published >= cutoff: news.append({ "id": entry.id, "title": entry.title, "url": entry.link, "published": published.isoformat(), "source": entry.get("source", {}).get("title", "Google News"), "source_icon": "šŸ“°", "category": "News" }) except: continue return news except Exception as e: print(f"Error fetching Google News: {e}") return [] def fetch_news_content(hours: int = 24) -> Dict[str, Any]: """Main function to fetch all news content""" print(f"šŸ” Fetching news from last {hours} hours...") # Fetch from all sources print(" šŸ“” GitHub releases...") github = fetch_github_releases() print(f" Found {len(github)} releases") print(" šŸ“” Hacker News...") hn = fetch_hackernews() print(f" Found {len(hn)} stories") print(" šŸ“” Google News...") gnews = fetch_google_news() print(f" Found {len(gnews)} articles") # Combine and sort by published date all_items = github + hn + gnews all_items.sort(key=lambda x: x.get("published", ""), reverse=True) return { "source": "news", "fetched_at": datetime.utcnow().isoformat(), "time_window_hours": hours, "total_items": len(all_items), "github_releases": github, "hackernews": hn, "google_news": gnews, "all_items": all_items[:15] # Top 15 overall } if __name__ == "__main__": import sys hours = int(sys.argv[1]) if len(sys.argv) > 1 else 24 output_file = sys.argv[2] if len(sys.argv) > 2 else "/home/openclaw/.openclaw/workspace/automations/openclaw-digest/output/news.json" content = fetch_news_content(hours=hours) with open(output_file, "w") as f: json.dump(content, f, indent=2) print(f"\nāœ… News content saved to {output_file}") print(f" Total items: {content['total_items']}") print(f" GitHub releases: {len(content['github_releases'])}") print(f" Hacker News: {len(content['hackernews'])}") print(f" Google News: {len(content['google_news'])}")