AI Newsletter Digest improvements: fixed QP soft line break decoding, URL extraction, and content cleaning

This commit is contained in:
Krilly
2026-03-04 13:29:22 +00:00
parent 29a98137a7
commit 57dd294675
13706 changed files with 2114953 additions and 237629 deletions

View File

@@ -99,13 +99,13 @@ build_digest() {
[[ -z "$line" ]] && continue
# Check if this is a title line (starts with [)
if [[ "$line" =~ ^\[.*\].*:.*$ ]]; then
if [[ "$line" =~ ^\[.+Z\].*\]\ (.*)$ ]]; then
# Save previous article if exists
if [[ -n "$current_title" ]]; then
local score
score=$(score_article "$current_title" "$current_source" "$current_cats")
local article_formatted="*${current_source}*: ${current_title}\n ${current_url}\n"
local article_formatted="[${current_title}](${current_url}) \n _${current_source}_\n"
if ((score >= 5)); then
must_read+="$article_formatted\n"
@@ -116,16 +116,16 @@ build_digest() {
((total_count++))
fi
# Parse new article
# Parse new article - format: [timestamp] [source] title
current_date=$(echo "$line" | sed 's/^\[\([^]]*\)\].*/\1/')
current_source=$(echo "$line" | sed 's/^\[[^]]*\] \([^:]*\):.*/\1/')
current_title=$(echo "$line" | sed 's/^\[[^]]*\] [^:]*: //')
current_source=$(echo "$line" | sed -E 's/^\[.+Z\] \[([^]]+)\].*/\1/')
current_title=$(echo "$line" | sed -E 's/^\[.+Z\] \[[^]]+\] //')
current_url=""
current_cats=""
# Check if this is a URL line
elif [[ "$line" =~ ^https?:// ]]; then
current_url="$line"
elif [[ "$line" =~ ^[[:space:]]*https?:// ]]; then
current_url="$(echo "$line" | xargs)"
# Check if this is categories line
elif [[ "$line" =~ ^Categories:\ ]]; then
@@ -138,7 +138,7 @@ build_digest() {
local score
score=$(score_article "$current_title" "$current_source" "$current_cats")
local article_formatted="*${current_source}*: ${current_title}\n ${current_url}\n"
local article_formatted="[${current_title}](${current_url}) \n _${current_source}_\n"
if ((score >= 5)); then
must_read+="$article_formatted\n"
@@ -149,16 +149,24 @@ build_digest() {
((total_count++))
fi
# Limit items per section to keep message under Telegram's 4096 char limit
local must_read_limited="$(echo -e "$must_read" | head -n 16)" # ~8 items (2 lines each)
local skimmable_limited="$(echo -e "$skimmable" | head -n 10)" # ~5 items (2 lines each)
# Build final message
output="📰 *FreshRSS Daily Digest*\n\n"
output+="Found *$total_count* unread articles.\n\n"
if [[ -n "$must_read" ]]; then
output+="🔥 *Must Read*\n$must_read\n"
local count_must=$(($(echo -e "$must_read" | grep -c '^•') + 0))
output+="🔥 *Must Read* ($count_must)\n$must_read_limited"
[[ "$must_read" != "$must_read_limited" ]] && output+="\n _...and more_\n\n" || output+="\n"
fi
if [[ -n "$skimmable" ]]; then
output+="📎 *Skimmable*\n$skimmable\n"
local count_skim=$(($(echo -e "$skimmable" | grep -c '^•') + 0))
output+="📎 *Skimmable* ($count_skim)\n$skimmable_limited"
[[ "$skimmable" != "$skimmable_limited" ]] && output+="\n _...and more_\n\n" || output+="\n"
fi
if [[ -z "$must_read" && -z "$skimmable" ]]; then
@@ -167,6 +175,11 @@ build_digest() {
output+="\n📖 [Open FreshRSS]($FRESHRSS_URL)"
# Truncate if still too long (Telegram limit is 4096)
if [[ ${#output} -gt 4000 ]]; then
output="${output:0:3950}...\n\n_(truncated — more articles in FreshRSS)_\n\n📖 [Open FreshRSS]($FRESHRSS_URL)"
fi
echo -e "$output"
}