Initial backup: 18 monitoring scripts + timers + docs
- 18 comprehensive monitoring checks - 5 systemd timers (5min, 15min, hourly, daily, weekly) - Complete documentation - NTFY secure notification system - Fixed debianvm disk space (91% to 57%) - Fixed CloudReve integration - Date: 2026-01-07
This commit is contained in:
40
scripts/check-services.sh
Executable file
40
scripts/check-services.sh
Executable file
@@ -0,0 +1,40 @@
|
||||
#!/bin/bash
|
||||
# Check critical service HTTP endpoints
|
||||
set -euo pipefail
|
||||
|
||||
SEND_NTFY="/usr/local/bin/send-ntfy.sh"
|
||||
|
||||
# Services to check: "NAME:URL:EXPECTED_CODE"
|
||||
# Note: Use actual container/VM IPs that can change with DHCP
|
||||
# Better to check from inside the container when possible
|
||||
SERVICES=(
|
||||
"Home Assistant:http://192.168.178.39:8123:200"
|
||||
)
|
||||
|
||||
for svc_config in "${SERVICES[@]}"; do
|
||||
IFS=':' read -r NAME URL EXPECTED <<< "$svc_config"
|
||||
|
||||
# Check HTTP response with timeout
|
||||
HTTP_CODE=$(timeout 10 curl -s -o /dev/null -w "%{http_code}" "$URL" 2>/dev/null || echo "FAILED")
|
||||
|
||||
if [ "$HTTP_CODE" = "FAILED" ]; then
|
||||
$SEND_NTFY critical "Service Unreachable" "🔴 CRITICAL: $NAME at $URL is UNREACHABLE (timeout or connection failed)" "skull,error,globe_with_meridians"
|
||||
elif [ "$HTTP_CODE" != "$EXPECTED" ]; then
|
||||
$SEND_NTFY warning "Service Issue" "🟡 WARNING: $NAME returned HTTP $HTTP_CODE (expected $EXPECTED)" "warning,globe_with_meridians"
|
||||
else
|
||||
logger -t service-monitor "$NAME: OK (HTTP $HTTP_CODE)"
|
||||
fi
|
||||
done
|
||||
|
||||
# Check CloudReve from inside its container (more reliable than external IP)
|
||||
CLOUDREVE_CHECK=$(pct exec 209 -- curl -s -o /dev/null -w "%{http_code}" http://localhost:5212 --max-time 5 2>/dev/null || echo "FAILED")
|
||||
|
||||
if [ "$CLOUDREVE_CHECK" = "200" ]; then
|
||||
logger -t service-monitor "CloudReve: OK (HTTP 200)"
|
||||
elif [ "$CLOUDREVE_CHECK" = "FAILED" ]; then
|
||||
$SEND_NTFY critical "CloudReve Down" "🔴 CRITICAL: CloudReve (CT 209) is not responding on port 5212" "skull,error,globe_with_meridians"
|
||||
else
|
||||
$SEND_NTFY warning "CloudReve Issue" "🟡 WARNING: CloudReve returned HTTP $CLOUDREVE_CHECK (expected 200)" "warning,globe_with_meridians"
|
||||
fi
|
||||
|
||||
logger -t service-monitor "Service health check completed"
|
||||
Reference in New Issue
Block a user