AI Newsletter Digest improvements: fixed QP soft line break decoding, URL extraction, and content cleaning
This commit is contained in:
@@ -99,13 +99,13 @@ build_digest() {
|
||||
[[ -z "$line" ]] && continue
|
||||
|
||||
# Check if this is a title line (starts with [)
|
||||
if [[ "$line" =~ ^\[.*\].*:.*$ ]]; then
|
||||
if [[ "$line" =~ ^\[.+Z\].*\]\ (.*)$ ]]; then
|
||||
# Save previous article if exists
|
||||
if [[ -n "$current_title" ]]; then
|
||||
local score
|
||||
score=$(score_article "$current_title" "$current_source" "$current_cats")
|
||||
|
||||
local article_formatted="• *${current_source}*: ${current_title}\n ${current_url}\n"
|
||||
local article_formatted="• [${current_title}](${current_url}) \n _${current_source}_\n"
|
||||
|
||||
if ((score >= 5)); then
|
||||
must_read+="$article_formatted\n"
|
||||
@@ -116,16 +116,16 @@ build_digest() {
|
||||
((total_count++))
|
||||
fi
|
||||
|
||||
# Parse new article
|
||||
# Parse new article - format: [timestamp] [source] title
|
||||
current_date=$(echo "$line" | sed 's/^\[\([^]]*\)\].*/\1/')
|
||||
current_source=$(echo "$line" | sed 's/^\[[^]]*\] \([^:]*\):.*/\1/')
|
||||
current_title=$(echo "$line" | sed 's/^\[[^]]*\] [^:]*: //')
|
||||
current_source=$(echo "$line" | sed -E 's/^\[.+Z\] \[([^]]+)\].*/\1/')
|
||||
current_title=$(echo "$line" | sed -E 's/^\[.+Z\] \[[^]]+\] //')
|
||||
current_url=""
|
||||
current_cats=""
|
||||
|
||||
# Check if this is a URL line
|
||||
elif [[ "$line" =~ ^https?:// ]]; then
|
||||
current_url="$line"
|
||||
elif [[ "$line" =~ ^[[:space:]]*https?:// ]]; then
|
||||
current_url="$(echo "$line" | xargs)"
|
||||
|
||||
# Check if this is categories line
|
||||
elif [[ "$line" =~ ^Categories:\ ]]; then
|
||||
@@ -138,7 +138,7 @@ build_digest() {
|
||||
local score
|
||||
score=$(score_article "$current_title" "$current_source" "$current_cats")
|
||||
|
||||
local article_formatted="• *${current_source}*: ${current_title}\n ${current_url}\n"
|
||||
local article_formatted="• [${current_title}](${current_url}) \n _${current_source}_\n"
|
||||
|
||||
if ((score >= 5)); then
|
||||
must_read+="$article_formatted\n"
|
||||
@@ -149,16 +149,24 @@ build_digest() {
|
||||
((total_count++))
|
||||
fi
|
||||
|
||||
# Limit items per section to keep message under Telegram's 4096 char limit
|
||||
local must_read_limited="$(echo -e "$must_read" | head -n 16)" # ~8 items (2 lines each)
|
||||
local skimmable_limited="$(echo -e "$skimmable" | head -n 10)" # ~5 items (2 lines each)
|
||||
|
||||
# Build final message
|
||||
output="📰 *FreshRSS Daily Digest*\n\n"
|
||||
output+="Found *$total_count* unread articles.\n\n"
|
||||
|
||||
if [[ -n "$must_read" ]]; then
|
||||
output+="🔥 *Must Read*\n$must_read\n"
|
||||
local count_must=$(($(echo -e "$must_read" | grep -c '^•') + 0))
|
||||
output+="🔥 *Must Read* ($count_must)\n$must_read_limited"
|
||||
[[ "$must_read" != "$must_read_limited" ]] && output+="\n _...and more_\n\n" || output+="\n"
|
||||
fi
|
||||
|
||||
if [[ -n "$skimmable" ]]; then
|
||||
output+="📎 *Skimmable*\n$skimmable\n"
|
||||
local count_skim=$(($(echo -e "$skimmable" | grep -c '^•') + 0))
|
||||
output+="📎 *Skimmable* ($count_skim)\n$skimmable_limited"
|
||||
[[ "$skimmable" != "$skimmable_limited" ]] && output+="\n _...and more_\n\n" || output+="\n"
|
||||
fi
|
||||
|
||||
if [[ -z "$must_read" && -z "$skimmable" ]]; then
|
||||
@@ -167,6 +175,11 @@ build_digest() {
|
||||
|
||||
output+="\n📖 [Open FreshRSS]($FRESHRSS_URL)"
|
||||
|
||||
# Truncate if still too long (Telegram limit is 4096)
|
||||
if [[ ${#output} -gt 4000 ]]; then
|
||||
output="${output:0:3950}...\n\n_(truncated — more articles in FreshRSS)_\n\n📖 [Open FreshRSS]($FRESHRSS_URL)"
|
||||
fi
|
||||
|
||||
echo -e "$output"
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user