Initial backup: 18 monitoring scripts + timers + docs

- 18 comprehensive monitoring checks
- 5 systemd timers (5min, 15min, hourly, daily, weekly)
- Complete documentation
- NTFY secure notification system
- Fixed debianvm disk space (91% to 57%)
- Fixed CloudReve integration
- Date: 2026-01-07
This commit is contained in:
PVE Monitoring System
2026-01-07 16:30:34 +08:00
commit 3a14fd2736
34 changed files with 1067 additions and 0 deletions

50
scripts/check-pve-host.sh Executable file
View File

@@ -0,0 +1,50 @@
#!/bin/bash
# Monitor PVE host itself (disk, cpu, ram, services)
set -euo pipefail
HOSTNAME="pve"
SEND_NTFY="/usr/local/bin/send-ntfy.sh"
# Check root partition
ROOT_USAGE=$(df -h / | tail -1 | awk '{print $5}' | sed 's/%//')
ROOT_USED=$(df -h / | tail -1 | awk '{print $3}')
ROOT_TOTAL=$(df -h / | tail -1 | awk '{print $2}')
ROOT_FREE=$(df -h / | tail -1 | awk '{print $4}')
if [ "$ROOT_USAGE" -gt 90 ]; then
$SEND_NTFY critical "PVE Host - Disk Critical" "🔴 CRITICAL: $HOSTNAME root partition at ${ROOT_USAGE}% (Used: $ROOT_USED/$ROOT_TOTAL, Free: $ROOT_FREE)" "cd,skull"
elif [ "$ROOT_USAGE" -gt 80 ]; then
$SEND_NTFY warning "PVE Host - Disk Warning" "🟡 WARNING: $HOSTNAME root partition at ${ROOT_USAGE}% (Used: $ROOT_USED/$ROOT_TOTAL, Free: $ROOT_FREE)" "cd,warning"
fi
# Check /mnt/ssd0 (local SSD storage)
if mountpoint -q /mnt/ssd0; then
SSD_USAGE=$(df -h /mnt/ssd0 | tail -1 | awk '{print $5}' | sed 's/%//')
SSD_USED=$(df -h /mnt/ssd0 | tail -1 | awk '{print $3}')
SSD_TOTAL=$(df -h /mnt/ssd0 | tail -1 | awk '{print $2}')
if [ "$SSD_USAGE" -gt 90 ]; then
$SEND_NTFY critical "PVE Host - SSD0 Critical" "🔴 CRITICAL: /mnt/ssd0 at ${SSD_USAGE}% (Used: $SSD_USED/$SSD_TOTAL)" "cd,skull"
elif [ "$SSD_USAGE" -gt 80 ]; then
$SEND_NTFY warning "PVE Host - SSD0 Warning" "🟡 WARNING: /mnt/ssd0 at ${SSD_USAGE}% (Used: $SSD_USED/$SSD_TOTAL)" "cd,warning"
fi
fi
# Check RAM usage
MEM_TOTAL=$(free -h | awk '/^Mem:/ {print $2}')
MEM_USED=$(free -h | awk '/^Mem:/ {print $3}')
MEM_PERCENT=$(free | awk '/^Mem:/ {printf "%.0f", $3/$2 * 100}')
if [ "$MEM_PERCENT" -gt 90 ]; then
$SEND_NTFY warning "PVE Host - High RAM" "🟡 WARNING: $HOSTNAME RAM at ${MEM_PERCENT}% (Used: $MEM_USED/$MEM_TOTAL)" "warning"
fi
# Check critical PVE services
CRITICAL_SERVICES=("pveproxy" "pvedaemon" "pve-cluster" "pvestatd")
for service in "${CRITICAL_SERVICES[@]}"; do
if ! systemctl is-active --quiet "$service"; then
$SEND_NTFY critical "PVE Host - Service Down" "🔴 CRITICAL: $HOSTNAME service '$service' is DOWN!" "skull,error"
fi
done
logger -t pve-monitor "PVE host check completed: Root ${ROOT_USAGE}%, RAM ${MEM_PERCENT}%"