181 lines
6.7 KiB
Python
181 lines
6.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
News Aggregation Fetcher for OpenClaw Daily Digest
|
|
Fetches from GitHub releases, Hacker News, and tech news sources
|
|
"""
|
|
|
|
import requests
|
|
import json
|
|
import feedparser
|
|
from datetime import datetime, timedelta
|
|
from typing import List, Dict, Any
|
|
|
|
# News sources configuration
|
|
SOURCES = {
|
|
"github_releases": {
|
|
"url": "https://github.com/openclaw/openclaw/releases.atom",
|
|
"type": "rss"
|
|
},
|
|
"hn_search": {
|
|
"url": "https://hn.algolia.com/api/v1/search",
|
|
"type": "hackernews",
|
|
"query": "openclaw"
|
|
}
|
|
}
|
|
|
|
def fetch_github_releases() -> List[Dict[str, Any]]:
|
|
"""Fetch latest OpenClaw releases from GitHub Atom feed"""
|
|
try:
|
|
feed = feedparser.parse(SOURCES["github_releases"]["url"])
|
|
releases = []
|
|
|
|
cutoff = datetime.utcnow() - timedelta(hours=24)
|
|
|
|
for entry in feed.entries[:5]: # Last 5 releases
|
|
try:
|
|
# Try parsed date first, fallback to string parsing
|
|
if hasattr(entry, 'published_parsed') and entry.published_parsed:
|
|
published = datetime(*entry.published_parsed[:6])
|
|
elif hasattr(entry, 'updated_parsed') and entry.updated_parsed:
|
|
published = datetime(*entry.updated_parsed[:6])
|
|
else:
|
|
# Parse ISO format string
|
|
date_str = entry.get('published', entry.get('updated', ''))
|
|
published = datetime.fromisoformat(date_str.replace('Z', '+00:00').replace('+00:00', ''))
|
|
|
|
if published >= cutoff:
|
|
releases.append({
|
|
"id": entry.id,
|
|
"title": entry.title,
|
|
"url": entry.link,
|
|
"published": published.isoformat(),
|
|
"summary": entry.get("summary", "")[:300] + "..." if len(entry.get("summary", "")) > 300 else entry.get("summary", ""),
|
|
"source": "GitHub",
|
|
"source_icon": "🐙",
|
|
"category": "Release"
|
|
})
|
|
except Exception as e:
|
|
print(f" Skipping entry due to date parse error: {e}")
|
|
continue
|
|
|
|
return releases
|
|
except Exception as e:
|
|
print(f"Error fetching GitHub releases: {e}")
|
|
return []
|
|
|
|
def fetch_hackernews() -> List[Dict[str, Any]]:
|
|
"""Fetch OpenClaw-related stories from Hacker News (last 24h)"""
|
|
try:
|
|
# Algolia HN search API - last 24 hours
|
|
params = {
|
|
"query": SOURCES["hn_search"]["query"],
|
|
"tags": "story",
|
|
"numericFilters": "created_at_i>" + str(int((datetime.utcnow() - timedelta(hours=24)).timestamp()))
|
|
}
|
|
|
|
response = requests.get(SOURCES["hn_search"]["url"], params=params, timeout=30)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
stories = []
|
|
for hit in data.get("hits", [])[:10]: # Top 10 stories
|
|
stories.append({
|
|
"id": hit.get("objectID"),
|
|
"title": hit.get("title"),
|
|
"url": hit.get("url") or f"https://news.ycombinator.com/item?id={hit.get('objectID')}",
|
|
"hn_url": f"https://news.ycombinator.com/item?id={hit.get('objectID')}",
|
|
"published": datetime.fromtimestamp(hit.get("created_at_i", 0)).isoformat(),
|
|
"author": hit.get("author"),
|
|
"points": hit.get("points", 0),
|
|
"num_comments": hit.get("num_comments", 0),
|
|
"summary": hit.get("story_text", "")[:200] + "..." if hit.get("story_text") else "",
|
|
"source": "Hacker News",
|
|
"source_icon": "🟠",
|
|
"category": "Discussion"
|
|
})
|
|
|
|
return stories
|
|
except Exception as e:
|
|
print(f"Error fetching Hacker News: {e}")
|
|
return []
|
|
|
|
def fetch_google_news() -> List[Dict[str, Any]]:
|
|
"""Fetch OpenClaw news from Google News RSS"""
|
|
try:
|
|
# Google News RSS for OpenClaw
|
|
url = "https://news.google.com/rss/search?q=OpenClaw+AI+agent&hl=en-US&gl=US&ceid=US:en"
|
|
feed = feedparser.parse(url)
|
|
|
|
news = []
|
|
cutoff = datetime.utcnow() - timedelta(hours=24)
|
|
|
|
for entry in feed.entries[:10]:
|
|
try:
|
|
published = datetime(*entry.published_parsed[:6])
|
|
if published >= cutoff:
|
|
news.append({
|
|
"id": entry.id,
|
|
"title": entry.title,
|
|
"url": entry.link,
|
|
"published": published.isoformat(),
|
|
"source": entry.get("source", {}).get("title", "Google News"),
|
|
"source_icon": "📰",
|
|
"category": "News"
|
|
})
|
|
except:
|
|
continue
|
|
|
|
return news
|
|
except Exception as e:
|
|
print(f"Error fetching Google News: {e}")
|
|
return []
|
|
|
|
def fetch_news_content(hours: int = 24) -> Dict[str, Any]:
|
|
"""Main function to fetch all news content"""
|
|
print(f"🔍 Fetching news from last {hours} hours...")
|
|
|
|
# Fetch from all sources
|
|
print(" 📡 GitHub releases...")
|
|
github = fetch_github_releases()
|
|
print(f" Found {len(github)} releases")
|
|
|
|
print(" 📡 Hacker News...")
|
|
hn = fetch_hackernews()
|
|
print(f" Found {len(hn)} stories")
|
|
|
|
print(" 📡 Google News...")
|
|
gnews = fetch_google_news()
|
|
print(f" Found {len(gnews)} articles")
|
|
|
|
# Combine and sort by published date
|
|
all_items = github + hn + gnews
|
|
all_items.sort(key=lambda x: x.get("published", ""), reverse=True)
|
|
|
|
return {
|
|
"source": "news",
|
|
"fetched_at": datetime.utcnow().isoformat(),
|
|
"time_window_hours": hours,
|
|
"total_items": len(all_items),
|
|
"github_releases": github,
|
|
"hackernews": hn,
|
|
"google_news": gnews,
|
|
"all_items": all_items[:15] # Top 15 overall
|
|
}
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
|
|
hours = int(sys.argv[1]) if len(sys.argv) > 1 else 24
|
|
output_file = sys.argv[2] if len(sys.argv) > 2 else "/home/openclaw/.openclaw/workspace/automations/openclaw-digest/output/news.json"
|
|
|
|
content = fetch_news_content(hours=hours)
|
|
|
|
with open(output_file, "w") as f:
|
|
json.dump(content, f, indent=2)
|
|
|
|
print(f"\n✅ News content saved to {output_file}")
|
|
print(f" Total items: {content['total_items']}")
|
|
print(f" GitHub releases: {len(content['github_releases'])}")
|
|
print(f" Hacker News: {len(content['hackernews'])}")
|
|
print(f" Google News: {len(content['google_news'])}")
|