AI Newsletter Digest improvements: fixed QP soft line break decoding, URL extraction, and content cleaning
This commit is contained in:
107
automations/ai-newsletter-digest/format-digest.py
Normal file
107
automations/ai-newsletter-digest/format-digest.py
Normal file
@@ -0,0 +1,107 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
AI Newsletter Digest - Enhanced Version
|
||||
Creates properly summarized digests like the OpenClaw Daily Digest
|
||||
"""
|
||||
import json
|
||||
import sys
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
def format_digest(newsletters_json):
|
||||
"""Format newsletters into a proper digest with summaries."""
|
||||
|
||||
newsletters = json.loads(newsletters_json)
|
||||
|
||||
# Group by source
|
||||
by_source = {}
|
||||
for nl in newsletters:
|
||||
source = nl['from'].split('<')[0].strip()
|
||||
if source not in by_source:
|
||||
by_source[source] = []
|
||||
by_source[source].append(nl)
|
||||
|
||||
# Build digest
|
||||
lines = [
|
||||
"🤖 **AI NEWSLETTER DIGEST — {date}**",
|
||||
"Your synthesized briefing from {count} AI newsletters",
|
||||
"",
|
||||
"═" * 60,
|
||||
"📊 TODAY'S OVERVIEW",
|
||||
"═" * 60,
|
||||
"• {count} Newsletters Analyzed",
|
||||
"• Sources: {sources}",
|
||||
"",
|
||||
"═" * 60,
|
||||
"🔥 TOP STORIES",
|
||||
"═" * 60,
|
||||
""
|
||||
]
|
||||
|
||||
# Format date
|
||||
date_str = datetime.now().strftime("%A, %B %d, %Y")
|
||||
sources_str = ", ".join(by_source.keys())
|
||||
|
||||
digest = "\n".join(lines).format(
|
||||
date=date_str,
|
||||
count=len(newsletters),
|
||||
sources=sources_str
|
||||
)
|
||||
|
||||
# Add each newsletter with proper formatting
|
||||
for i, nl in enumerate(newsletters, 1):
|
||||
source = nl['from'].split('<')[0].strip()
|
||||
subject = nl['subject']
|
||||
content = nl.get('content', '')[:800] # First 800 chars
|
||||
urls = nl.get('urls', [])
|
||||
|
||||
# Clean up content
|
||||
content = re.sub(r'\s+', ' ', content)
|
||||
content = content.replace('= ', '').replace('=20', ' ')
|
||||
|
||||
# Extract key sentence
|
||||
key_sentence = ""
|
||||
sentences = content.split('.')
|
||||
for s in sentences[:3]:
|
||||
if len(s.strip()) > 50:
|
||||
key_sentence = s.strip() + "."
|
||||
break
|
||||
|
||||
digest += f"\n📌 **{subject}**\n"
|
||||
digest += f" Source: {source}\n"
|
||||
if key_sentence:
|
||||
digest += f" \n {key_sentence}\n"
|
||||
|
||||
# Include ALL URLs found
|
||||
if urls:
|
||||
digest += f" \n 🔗 Links:\n"
|
||||
for url in urls[:3]: # Max 3 links
|
||||
digest += f" • {url}\n"
|
||||
|
||||
digest += "\n---\n"
|
||||
|
||||
digest += "\n🦀 Krilly the Crab | AI Newsletter Digest\n"
|
||||
digest += f"Generated: {datetime.now().strftime('%A, %B %d, %Y — %I:%M %p AWST')}\n"
|
||||
|
||||
return digest
|
||||
|
||||
def main():
|
||||
"""Read JSON from stdin and output formatted digest."""
|
||||
if len(sys.argv) > 1:
|
||||
# Read from file
|
||||
with open(sys.argv[1], 'r') as f:
|
||||
data = f.read()
|
||||
else:
|
||||
# Read from stdin
|
||||
data = sys.stdin.read()
|
||||
|
||||
try:
|
||||
digest = format_digest(data)
|
||||
print(digest)
|
||||
except Exception as e:
|
||||
print(f"Error formatting digest: {e}", file=sys.stderr)
|
||||
# Fallback: just print the raw data
|
||||
print(data)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user