Initial backup: 18 monitoring scripts + timers + docs

- 18 comprehensive monitoring checks
- 5 systemd timers (5min, 15min, hourly, daily, weekly)
- Complete documentation
- NTFY secure notification system
- Fixed debianvm disk space (91% to 57%)
- Fixed CloudReve integration
- Date: 2026-01-07
This commit is contained in:
PVE Monitoring System
2026-01-07 16:30:34 +08:00
commit 3a14fd2736
34 changed files with 1067 additions and 0 deletions

43
scripts/check-containers.sh Executable file
View File

@@ -0,0 +1,43 @@
#!/bin/bash
# Check LXC container status and disk usage
set -euo pipefail
SEND_NTFY="/usr/local/bin/send-ntfy.sh"
# Critical containers that should always be running (CT IDs only, not VMs!)
CRITICAL_CONTAINERS=("200:docker" "209:cloudreve" "221:gitea" "299:sftpgo")
for ct_config in "${CRITICAL_CONTAINERS[@]}"; do
IFS=':' read -r CTID NAME <<< "$ct_config"
# Check if container exists first
if ! pct status $CTID >/dev/null 2>&1; then
logger -t container-monitor "CT $CTID ($NAME) does not exist, skipping"
continue
fi
# Check if container is running
STATUS=$(pct status $CTID 2>/dev/null | awk '{print $2}')
if [ "$STATUS" != "running" ]; then
$SEND_NTFY critical "Container Down" "🔴 CRITICAL: Container $NAME (CT $CTID) is $STATUS (expected: running)" "skull,error,package"
continue
fi
# Check disk usage inside container
DISK_INFO=$(pct exec $CTID -- df -h / 2>/dev/null | tail -1 || echo "FAILED")
if [ "$DISK_INFO" != "FAILED" ]; then
USAGE=$(echo "$DISK_INFO" | awk '{print $5}' | sed 's/%//')
USED=$(echo "$DISK_INFO" | awk '{print $3}')
TOTAL=$(echo "$DISK_INFO" | awk '{print $2}')
if [ "$USAGE" -gt 90 ]; then
$SEND_NTFY critical "Container Disk Critical" "🔴 CRITICAL: Container $NAME (CT $CTID) disk at ${USAGE}% (Used: $USED/$TOTAL)" "cd,skull,package"
elif [ "$USAGE" -gt 80 ]; then
$SEND_NTFY warning "Container Disk Warning" "🟡 WARNING: Container $NAME (CT $CTID) disk at ${USAGE}% (Used: $USED/$TOTAL)" "cd,warning,package"
fi
fi
done
logger -t container-monitor "Container check completed"