AI Newsletter Digest improvements: fixed QP soft line break decoding, URL extraction, and content cleaning
This commit is contained in:
205
archive/old-projects/openclaw-watchdog/watchdog.sh
Executable file
205
archive/old-projects/openclaw-watchdog/watchdog.sh
Executable file
@@ -0,0 +1,205 @@
|
||||
#!/usr/bin/env bash
|
||||
# openclaw-watchdog — Monitor OpenClaw gateway, auto-recover from crashes
|
||||
# https://github.com/jlgrimes/openclaw-watchdog
|
||||
# MIT License — Jared Grimes
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# ─── Configuration (all overridable via env vars) ────────────────────────────
|
||||
OPENCLAW_CONFIG_PATH="${OPENCLAW_CONFIG_PATH:-$HOME/.openclaw/config.yaml}"
|
||||
HEALTH_URL="${HEALTH_URL:-http://localhost:3000/health}"
|
||||
CHECK_INTERVAL="${CHECK_INTERVAL:-30}" # seconds between checks
|
||||
FAIL_THRESHOLD="${FAIL_THRESHOLD:-3}" # consecutive failures before escalation
|
||||
WATCHDOG_LOG="${WATCHDOG_LOG:-$HOME/.openclaw/watchdog.log}"
|
||||
GOOD_CONFIG_PATH="${GOOD_CONFIG_PATH:-$HOME/.openclaw/config.yaml.good}"
|
||||
|
||||
# Notification settings (Telegram + ntfy)
|
||||
TELEGRAM_BOT_TOKEN="${TELEGRAM_BOT_TOKEN:-}"
|
||||
TELEGRAM_CHAT="${TELEGRAM_CHAT:-1793951355}"
|
||||
NTFY_URL="${NTFY_URL:-}"
|
||||
NTFY_TOPIC="${NTFY_TOPIC:-}"
|
||||
|
||||
# Legacy Discord settings (optional, for backwards compat)
|
||||
DISCORD_CHANNEL_ID="${DISCORD_CHANNEL_ID:-}"
|
||||
DISCORD_BOT_TOKEN="${DISCORD_BOT_TOKEN:-}"
|
||||
|
||||
# ─── State ────────────────────────────────────────────────────────────────────
|
||||
fail_count=0
|
||||
alerted=false # true after SOS sent, prevents spam
|
||||
was_down=false # tracks if we're recovering
|
||||
|
||||
# ─── Logging ──────────────────────────────────────────────────────────────────
|
||||
log() {
|
||||
local ts
|
||||
ts="$(date '+%Y-%m-%d %H:%M:%S')"
|
||||
echo "[$ts] $*" | tee -a "$WATCHDOG_LOG"
|
||||
}
|
||||
|
||||
# ─── Discord messaging ───────────────────────────────────────────────────────
|
||||
discord_send() {
|
||||
local msg="$1"
|
||||
[[ -z "$DISCORD_CHANNEL_ID" ]] && return 0
|
||||
[[ -z "$DISCORD_BOT_TOKEN" ]] && return 0
|
||||
|
||||
curl -sf -X POST \
|
||||
"https://discord.com/api/v10/channels/${DISCORD_CHANNEL_ID}/messages" \
|
||||
-H "Authorization: Bot ${DISCORD_BOT_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"content\": $(printf '%s' "$msg" | jq -Rs .)}" \
|
||||
>/dev/null 2>&1 || log "WARN: Failed to send Discord message"
|
||||
}
|
||||
|
||||
# ─── Telegram messaging ──────────────────────────────────────────────────────
|
||||
telegram_send() {
|
||||
local msg="$1"
|
||||
[[ -z "$TELEGRAM_BOT_TOKEN" ]] && return 0
|
||||
[[ -z "$TELEGRAM_CHAT" ]] && return 0
|
||||
|
||||
curl -sf -X POST \
|
||||
"https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}/sendMessage" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"chat_id\": \"$TELEGRAM_CHAT\", \"text\": $(printf '%s' "$msg" | jq -Rs .), \"parse_mode\": \"Markdown\"}" \
|
||||
>/dev/null 2>&1 || log "WARN: Failed to send Telegram message"
|
||||
}
|
||||
|
||||
# ─── ntfy messaging ─────────────────────────────────────────────────────────
|
||||
ntfy_send() {
|
||||
local title="$1"
|
||||
local msg="$2"
|
||||
local priority="${3:-4}" # 1-5 or low|default|high|urgent
|
||||
local sound="${4:-default}"
|
||||
|
||||
[[ -z "$NTFY_URL" ]] && return 0
|
||||
[[ -z "$NTFY_TOPIC" ]] && return 0
|
||||
|
||||
# Enforce minimum priority (default 4)
|
||||
local minp="${NTFY_MIN_PRIORITY:-4}"
|
||||
if [[ "$priority" =~ ^[0-9]+$ ]] && [[ "$minp" =~ ^[0-9]+$ ]]; then
|
||||
if (( priority < minp )); then
|
||||
priority="$minp"
|
||||
fi
|
||||
fi
|
||||
|
||||
curl -sf -X POST \
|
||||
"${NTFY_URL%/}/${NTFY_TOPIC}" \
|
||||
-H "Title: $title" \
|
||||
-H "Priority: $priority" \
|
||||
-H "Sound: $sound" \
|
||||
-d "$msg" \
|
||||
>/dev/null 2>&1 || log "WARN: Failed to send ntfy message"
|
||||
}
|
||||
|
||||
# ─── Send to all configured channels ─────────────────────────────────────────
|
||||
send_notification() {
|
||||
local title="$1"
|
||||
local msg="$2"
|
||||
local priority="${3:-5}"
|
||||
|
||||
discord_send "$msg"
|
||||
telegram_send "$msg"
|
||||
ntfy_send "$title" "$msg" "4" "default"
|
||||
}
|
||||
|
||||
# ─── Health check ─────────────────────────────────────────────────────────────
|
||||
check_health() {
|
||||
curl -sf --max-time 10 "$HEALTH_URL" >/dev/null 2>&1
|
||||
}
|
||||
|
||||
# ─── Save last-known-good config ─────────────────────────────────────────────
|
||||
save_good_config() {
|
||||
if [[ -f "$OPENCLAW_CONFIG_PATH" ]]; then
|
||||
cp "$OPENCLAW_CONFIG_PATH" "$GOOD_CONFIG_PATH"
|
||||
fi
|
||||
}
|
||||
|
||||
# ─── Revert to last-known-good config ────────────────────────────────────────
|
||||
revert_config() {
|
||||
if [[ -f "$GOOD_CONFIG_PATH" ]]; then
|
||||
log "Reverting config to last-known-good snapshot"
|
||||
cp "$GOOD_CONFIG_PATH" "$OPENCLAW_CONFIG_PATH"
|
||||
return 0
|
||||
else
|
||||
log "WARN: No good config snapshot available to revert"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# ─── Restart gateway ─────────────────────────────────────────────────────────
|
||||
restart_gateway() {
|
||||
log "Restarting OpenClaw gateway..."
|
||||
openclaw gateway restart >/dev/null 2>&1 || true
|
||||
sleep 5
|
||||
}
|
||||
|
||||
# ─── SOS alert ────────────────────────────────────────────────────────────────
|
||||
send_sos() {
|
||||
local hostname
|
||||
hostname="$(hostname 2>/dev/null || echo 'unknown')"
|
||||
local title="🚨 OpenClaw Gateway DOWN"
|
||||
local msg="🚨 **OpenClaw Gateway DOWN** on \`${hostname}\`
|
||||
|
||||
Watchdog tried:
|
||||
1. ✅ Simple restart
|
||||
2. ✅ Config rollback + restart
|
||||
3. ❌ Still unreachable after ${FAIL_THRESHOLD}+ failures
|
||||
|
||||
**Manual intervention needed:**
|
||||
\`\`\`
|
||||
ssh ${USER}@${hostname}
|
||||
openclaw gateway status
|
||||
journalctl -u openclaw-gateway --since '10 min ago'
|
||||
\`\`\`"
|
||||
|
||||
log "CRITICAL: Sending SOS alert"
|
||||
send_notification "$title" "$msg" 10
|
||||
alerted=true
|
||||
}
|
||||
|
||||
# ─── Recovery notification ────────────────────────────────────────────────────
|
||||
send_recovery() {
|
||||
local hostname
|
||||
hostname="$(hostname 2>/dev/null || echo 'unknown')"
|
||||
local title="✅ OpenClaw Gateway Recovered"
|
||||
local msg="✅ **OpenClaw Gateway recovered** on \`${hostname}\` — back online!"
|
||||
log "Gateway recovered"
|
||||
send_notification "$title" "$msg" 5
|
||||
}
|
||||
|
||||
# ─── Main loop ────────────────────────────────────────────────────────────────
|
||||
main() {
|
||||
log "Watchdog started (interval=${CHECK_INTERVAL}s, threshold=${FAIL_THRESHOLD}, health=${HEALTH_URL})"
|
||||
log "Notifications: Telegram=${TELEGRAM_CHAT:+enabled}, ntfy=${NTFY_TOPIC:+enabled}, Discord=${DISCORD_CHANNEL_ID:+enabled}"
|
||||
|
||||
while true; do
|
||||
if check_health; then
|
||||
# ── Healthy ──
|
||||
if [[ "$was_down" == true ]]; then
|
||||
send_recovery
|
||||
was_down=false
|
||||
alerted=false
|
||||
fi
|
||||
fail_count=0
|
||||
save_good_config
|
||||
else
|
||||
# ── Unhealthy ──
|
||||
fail_count=$((fail_count + 1))
|
||||
log "Health check failed (${fail_count}/${FAIL_THRESHOLD})"
|
||||
|
||||
if [[ $fail_count -eq 1 ]]; then
|
||||
# Stage 1: simple restart
|
||||
was_down=true
|
||||
restart_gateway
|
||||
elif [[ $fail_count -eq 2 ]]; then
|
||||
# Stage 2: config revert + restart
|
||||
revert_config && restart_gateway
|
||||
elif [[ $fail_count -ge $FAIL_THRESHOLD && "$alerted" == false ]]; then
|
||||
# Stage 3: SOS (once)
|
||||
send_sos
|
||||
fi
|
||||
fi
|
||||
|
||||
sleep "$CHECK_INTERVAL"
|
||||
done
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user