AI Newsletter Digest improvements: fixed QP soft line break decoding, URL extraction, and content cleaning

This commit is contained in:
Krilly
2026-03-04 13:29:22 +00:00
parent 29a98137a7
commit 57dd294675
13706 changed files with 2114953 additions and 237629 deletions

View File

@@ -1,38 +1,40 @@
#!/bin/bash
# Daily AI Newsletter Digest - Fast Reliable Version
set -e
# Daily AI Newsletter Digest - Enhanced with LLM Summarization
# Creates properly formatted digests with AI-powered summarization
EMAIL_SKILL="/home/openclaw/.openclaw/workspace/skills/imap-smtp-email"
OUTPUT_FILE="/tmp/ai-newsletter-emails.json"
SCRIPT_DIR="/home/openclaw/.openclaw/workspace/skills/imap-smtp-email"
CHECK_SCRIPT="$SCRIPT_DIR/scripts/check-anthonymau-email.js"
DIGEST_DIR="/home/openclaw/.openclaw/workspace/automations/ai-newsletter-digest"
echo "🤖 Daily AI Newsletter Digest" >&2
echo "============================================================" >&2
echo "🤖 Daily AI Newsletter Digest (Enhanced)" >&2
echo "$(date)" >&2
echo "" >&2
echo "🔍 Searching for AI newsletters from last 48 hours..." >&2
cd "$SCRIPT_DIR"
# Single search for all recent emails, then filter locally
cd "$EMAIL_SKILL"
echo "🔍 Checking for AI newsletters..." >&2
# Get recent emails and filter for AI newsletters (expanded to 48h and more sources)
ALL_EMAILS=$(node scripts/imap.js search --recent 48h --limit 100 2>/dev/null | jq '[.[] | select(.from | test("AI Valley|AI Secret|DeepView|Deep View|The Rundown|TLDR|Benedict|aivalley|aisecret|deepview|therundown|tldr|benedict"; "i"))]' 2>/dev/null || echo "[]")
RESULT=$(NODE_TLS_REJECT_UNAUTHORIZED=0 timeout 60 node "$CHECK_SCRIPT" 2>&1)
# Save results
echo "$ALL_EMAILS" > "$OUTPUT_FILE"
AI_COUNT=$(echo "$RESULT" | grep "^AI_COUNT:" | cut -d: -f2)
EMAIL_COUNT=$(echo "$ALL_EMAILS" | jq '. | length')
echo "" >&2
echo "🎯 Found $EMAIL_COUNT AI-related emails" >&2
if [ "$EMAIL_COUNT" -eq 0 ]; then
echo "No new AI newsletters in the last 24 hours." >&2
echo "[]"
if [ -z "$AI_COUNT" ] || [ "$AI_COUNT" = "0" ]; then
echo "No AI newsletters found" >&2
echo "🤖 No AI newsletters today. Check back tomorrow!"
exit 0
fi
echo "" >&2
echo "📧 Ready to process $EMAIL_COUNT newsletters" >&2
echo "Found $AI_COUNT newsletters" >&2
# Output the emails
cat "$OUTPUT_FILE"
# Write result to temp file for Python parsing
TMPFILE=$(mktemp)
echo "$RESULT" > "$TMPFILE"
# Parse AI_EMAIL / AI_CONTENT pairs with improved content extraction
PARSED=$(python3 "$DIGEST_DIR/parse-emails.py" "$TMPFILE")
echo "🧠 Generating LLM-powered summary..." >&2
# Use LLM to summarize (or fallback to basic formatting)
echo "$PARSED" | python3 "$DIGEST_DIR/summarize.py"
rm -f "$TMPFILE"