#!/usr/bin/env python3 """ Reddit Content Fetcher for OpenClaw Daily Digest Fetches posts from OpenClaw-related subreddits using Reddit's JSON API No authentication required for read-only public access """ import requests import json import time from datetime import datetime, timedelta from typing import List, Dict, Any # Reddit API endpoints (JSON API - no auth needed for read-only) SUBREDDITS = [ "openclaw", "LocalLLaMA", "vibecoding", "selfhosted", "homeautomation" ] REDDIT_JSON_URL = "https://www.reddit.com/r/{subreddit}.json" def fetch_subreddit(subreddit: str, limit: int = 25) -> List[Dict[str, Any]]: """Fetch posts from a subreddit using Reddit JSON API""" url = REDDIT_JSON_URL.format(subreddit=subreddit) headers = { "User-Agent": "OpenClaw-Digest-Bot/1.0 (by /u/krillyclaw)" } try: response = requests.get(url, headers=headers, params={"limit": limit}, timeout=30) response.raise_for_status() data = response.json() posts = [] for child in data.get("data", {}).get("children", []): post = child.get("data", {}) posts.append({ "id": post.get("id"), "title": post.get("title"), "author": post.get("author"), "subreddit": post.get("subreddit"), "score": post.get("score", 0), "num_comments": post.get("num_comments", 0), "created_utc": post.get("created_utc", 0), "url": f"https://reddit.com{post.get('permalink', '')}", "selftext": post.get("selftext", "")[:500] + "..." if len(post.get("selftext", "")) > 500 else post.get("selftext", ""), "is_self": post.get("is_self", False), "link_flair_text": post.get("link_flair_text", ""), "upvote_ratio": post.get("upvote_ratio", 0) }) return posts except Exception as e: print(f"Error fetching r/{subreddit}: {e}") return [] def filter_by_time(posts: List[Dict], hours: int = 24) -> List[Dict]: """Filter posts to only include those from last N hours""" cutoff = datetime.utcnow() - timedelta(hours=hours) cutoff_timestamp = cutoff.timestamp() filtered = [] for post in posts: if post["created_utc"] >= cutoff_timestamp: post["created_datetime"] = datetime.utcfromtimestamp(post["created_utc"]).strftime("%Y-%m-%d %H:%M UTC") filtered.append(post) return filtered def filter_openclaw_related(posts: List[Dict]) -> List[Dict]: """Filter posts to only include OpenClaw-related content""" keywords = ["openclaw", "clawdbot", "open claw", "clawd"] filtered = [] for post in posts: text = f"{post.get('title', '')} {post.get('selftext', '')}".lower() if any(keyword in text for keyword in keywords): filtered.append(post) return filtered def score_post(post: Dict) -> float: """Calculate relevance score based on engagement""" score = post.get("score", 0) comments = post.get("num_comments", 0) upvote_ratio = post.get("upvote_ratio", 0.5) # Weighted scoring: comments matter more than upvotes # Upvote ratio indicates quality (avoid controversial posts) return (score * 0.3) + (comments * 2) + (upvote_ratio * 50) def fetch_reddit_content(hours: int = 24, limit_per_sub: int = 25) -> Dict[str, Any]: """Main function to fetch all Reddit content""" all_posts = [] print(f"šŸ” Fetching Reddit posts from last {hours} hours...") for subreddit in SUBREDDITS: print(f" šŸ“” r/{subreddit}...") posts = fetch_subreddit(subreddit, limit=limit_per_sub) # Filter by time recent_posts = filter_by_time(posts, hours) # For non-OpenClaw subreddits, filter for OpenClaw mentions if subreddit.lower() != "openclaw": recent_posts = filter_openclaw_related(recent_posts) print(f" Found {len(recent_posts)} recent OpenClaw-related posts") all_posts.extend(recent_posts) # Rate limiting - be nice to Reddit time.sleep(0.5) # Sort by engagement score all_posts.sort(key=score_post, reverse=True) # Separate into categories openclaw_subreddit = [p for p in all_posts if p["subreddit"].lower() == "openclaw"] other_subreddits = [p for p in all_posts if p["subreddit"].lower() != "openclaw"] return { "source": "reddit", "fetched_at": datetime.utcnow().isoformat(), "time_window_hours": hours, "total_posts": len(all_posts), "openclaw_subreddit": openclaw_subreddit[:5], # Top 5 from r/OpenClaw "other_subreddits": other_subreddits[:5], # Top 5 from elsewhere "all_posts": all_posts[:10] # Top 10 overall } if __name__ == "__main__": import sys hours = int(sys.argv[1]) if len(sys.argv) > 1 else 24 output_file = sys.argv[2] if len(sys.argv) > 2 else "/home/openclaw/.openclaw/workspace/automations/openclaw-digest/output/reddit.json" content = fetch_reddit_content(hours=hours) with open(output_file, "w") as f: json.dump(content, f, indent=2) print(f"\nāœ… Reddit content saved to {output_file}") print(f" Total posts: {content['total_posts']}") print(f" From r/OpenClaw: {len(content['openclaw_subreddit'])}") print(f" From other subs: {len(content['other_subreddits'])}")