Initial backup: 18 monitoring scripts + timers + docs

- 18 comprehensive monitoring checks
- 5 systemd timers (5min, 15min, hourly, daily, weekly)
- Complete documentation
- NTFY secure notification system
- Fixed debianvm disk space (91% to 57%)
- Fixed CloudReve integration
- Date: 2026-01-07
This commit is contained in:
PVE Monitoring System
2026-01-07 16:30:34 +08:00
commit 3a14fd2736
34 changed files with 1067 additions and 0 deletions

39
scripts/check-databases.sh Executable file
View File

@@ -0,0 +1,39 @@
#!/bin/bash
# Check critical database services
set -euo pipefail
SEND_NTFY="/usr/local/bin/send-ntfy.sh"
DEBIANVM_HOST="DEBIANVM"
# Check PostgreSQL on debianvm
PG_CHECK=$(timeout 10 sshpass -p 'admin' ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@$DEBIANVM_HOST "docker exec postgresql pg_isready 2>/dev/null" 2>/dev/null || echo "FAILED")
if [[ "$PG_CHECK" == *"accepting connections"* ]]; then
logger -t database-monitor "PostgreSQL: OK"
elif [ "$PG_CHECK" = "FAILED" ]; then
$SEND_NTFY critical "PostgreSQL Down" "🔴 CRITICAL: PostgreSQL on debianvm is DOWN or unreachable! Multiple services affected." "skull,error,database"
else
$SEND_NTFY critical "PostgreSQL Issue" "🔴 CRITICAL: PostgreSQL on debianvm not accepting connections" "skull,error,database"
fi
# Check Redis on debianvm
REDIS_CHECK=$(timeout 10 sshpass -p 'admin' ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@$DEBIANVM_HOST "docker exec redis redis-cli ping 2>/dev/null" 2>/dev/null || echo "FAILED")
if [ "$REDIS_CHECK" = "PONG" ]; then
logger -t database-monitor "Redis: OK"
elif [ "$REDIS_CHECK" = "FAILED" ]; then
$SEND_NTFY critical "Redis Down" "🔴 CRITICAL: Redis on debianvm is DOWN or unreachable!" "skull,error,database"
else
$SEND_NTFY critical "Redis Issue" "🔴 CRITICAL: Redis on debianvm not responding to PING" "skull,error,database"
fi
# Check aria2 RPC (CloudReve depends on this)
ARIA2_CHECK=$(timeout 10 sshpass -p 'admin' ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@$DEBIANVM_HOST "curl -s -m 5 http://localhost:6800 2>/dev/null" || echo "FAILED")
if [[ "$ARIA2_CHECK" != "FAILED" ]]; then
logger -t database-monitor "aria2 RPC: OK"
else
$SEND_NTFY critical "aria2 RPC Down" "🔴 CRITICAL: aria2 RPC on debianvm is DOWN! CloudReve downloads will fail." "skull,error"
fi
logger -t database-monitor "Database health check completed"