Initial backup: 18 monitoring scripts + timers + docs

- 18 comprehensive monitoring checks - 5 systemd timers (5min, 15min, hourly, daily, weekly) - Complete documentation - NTFY secure notification system - Fixed debianvm disk space (91% to 57%) - Fixed CloudReve integration - Date: 2026-01-07
2026-01-07 16:30:34 +08:00
commit 3a14fd2736
34 changed files with 1067 additions and 0 deletions
--- a/scripts/check-containers.sh
+++ b/scripts/check-containers.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+# Check LXC container status and disk usage
+set -euo pipefail
+
+SEND_NTFY="/usr/local/bin/send-ntfy.sh"
+
+# Critical containers that should always be running (CT IDs only, not VMs!)
+CRITICAL_CONTAINERS=("200:docker" "209:cloudreve" "221:gitea" "299:sftpgo")
+
+for ct_config in "${CRITICAL_CONTAINERS[@]}"; do
+    IFS=':' read -r CTID NAME <<< "$ct_config"
+    
+    # Check if container exists first
+    if ! pct status $CTID >/dev/null 2>&1; then
+        logger -t container-monitor "CT $CTID ($NAME) does not exist, skipping"
+        continue
+    fi
+    
+    # Check if container is running
+    STATUS=$(pct status $CTID 2>/dev/null | awk '{print $2}')
+    
+    if [ "$STATUS" != "running" ]; then
+        $SEND_NTFY critical "Container Down" "🔴 CRITICAL: Container $NAME (CT $CTID) is $STATUS (expected: running)" "skull,error,package"
+        continue
+    fi
+    
+    # Check disk usage inside container
+    DISK_INFO=$(pct exec $CTID -- df -h / 2>/dev/null | tail -1 || echo "FAILED")
+    
+    if [ "$DISK_INFO" != "FAILED" ]; then
+        USAGE=$(echo "$DISK_INFO" | awk '{print $5}' | sed 's/%//')
+        USED=$(echo "$DISK_INFO" | awk '{print $3}')
+        TOTAL=$(echo "$DISK_INFO" | awk '{print $2}')
+        
+        if [ "$USAGE" -gt 90 ]; then
+            $SEND_NTFY critical "Container Disk Critical" "🔴 CRITICAL: Container $NAME (CT $CTID) disk at ${USAGE}% (Used: $USED/$TOTAL)" "cd,skull,package"
+        elif [ "$USAGE" -gt 80 ]; then
+            $SEND_NTFY warning "Container Disk Warning" "🟡 WARNING: Container $NAME (CT $CTID) disk at ${USAGE}% (Used: $USED/$TOTAL)" "cd,warning,package"
+        fi
+    fi
+done
+
+logger -t container-monitor "Container check completed"