homelab-monitoring/scripts/check-containers.sh

#!/bin/bash
# Check LXC container status and disk usage
set -euo pipefail

SEND_NTFY="/usr/local/bin/send-ntfy.sh"

# Critical containers that should always be running (CT IDs only, not VMs!)
CRITICAL_CONTAINERS=("200:docker" "209:cloudreve" "221:gitea" "299:sftpgo")

for ct_config in "${CRITICAL_CONTAINERS[@]}"; do
    IFS=':' read -r CTID NAME <<< "$ct_config"

    # Check if container exists first
    if ! pct status $CTID >/dev/null 2>&1; then
        logger -t container-monitor "CT $CTID ($NAME) does not exist, skipping"
        continue
    fi

    # Check if container is running
    STATUS=$(pct status $CTID 2>/dev/null | awk '{print $2}')

    if [ "$STATUS" != "running" ]; then
        $SEND_NTFY critical "Container Down" "🔴 CRITICAL: Container $NAME (CT $CTID) is $STATUS (expected: running)" "skull,error,package"
        continue
    fi

    # Check disk usage inside container
    DISK_INFO=$(pct exec $CTID -- df -h / 2>/dev/null | tail -1 || echo "FAILED")

    if [ "$DISK_INFO" != "FAILED" ]; then
        USAGE=$(echo "$DISK_INFO" | awk '{print $5}' | sed 's/%//')
        USED=$(echo "$DISK_INFO" | awk '{print $3}')
        TOTAL=$(echo "$DISK_INFO" | awk '{print $2}')

        if [ "$USAGE" -gt 90 ]; then
            $SEND_NTFY critical "Container Disk Critical" "🔴 CRITICAL: Container $NAME (CT $CTID) disk at ${USAGE}% (Used: $USED/$TOTAL)" "cd,skull,package"
        elif [ "$USAGE" -gt 80 ]; then
            $SEND_NTFY warning "Container Disk Warning" "🟡 WARNING: Container $NAME (CT $CTID) disk at ${USAGE}% (Used: $USED/$TOTAL)" "cd,warning,package"
        fi
    fi
done

logger -t container-monitor "Container check completed"