#!/bin/bash # Monitor PVE host itself (disk, cpu, ram, services) set -euo pipefail HOSTNAME="pve" SEND_NTFY="/usr/local/bin/send-ntfy.sh" # Check root partition ROOT_USAGE=$(df -h / | tail -1 | awk '{print $5}' | sed 's/%//') ROOT_USED=$(df -h / | tail -1 | awk '{print $3}') ROOT_TOTAL=$(df -h / | tail -1 | awk '{print $2}') ROOT_FREE=$(df -h / | tail -1 | awk '{print $4}') if [ "$ROOT_USAGE" -gt 90 ]; then $SEND_NTFY critical "PVE Host - Disk Critical" "🔴 CRITICAL: $HOSTNAME root partition at ${ROOT_USAGE}% (Used: $ROOT_USED/$ROOT_TOTAL, Free: $ROOT_FREE)" "cd,skull" elif [ "$ROOT_USAGE" -gt 80 ]; then $SEND_NTFY warning "PVE Host - Disk Warning" "🟡 WARNING: $HOSTNAME root partition at ${ROOT_USAGE}% (Used: $ROOT_USED/$ROOT_TOTAL, Free: $ROOT_FREE)" "cd,warning" fi # Check /mnt/ssd0 (local SSD storage) if mountpoint -q /mnt/ssd0; then SSD_USAGE=$(df -h /mnt/ssd0 | tail -1 | awk '{print $5}' | sed 's/%//') SSD_USED=$(df -h /mnt/ssd0 | tail -1 | awk '{print $3}') SSD_TOTAL=$(df -h /mnt/ssd0 | tail -1 | awk '{print $2}') if [ "$SSD_USAGE" -gt 90 ]; then $SEND_NTFY critical "PVE Host - SSD0 Critical" "🔴 CRITICAL: /mnt/ssd0 at ${SSD_USAGE}% (Used: $SSD_USED/$SSD_TOTAL)" "cd,skull" elif [ "$SSD_USAGE" -gt 80 ]; then $SEND_NTFY warning "PVE Host - SSD0 Warning" "🟡 WARNING: /mnt/ssd0 at ${SSD_USAGE}% (Used: $SSD_USED/$SSD_TOTAL)" "cd,warning" fi fi # Check RAM usage MEM_TOTAL=$(free -h | awk '/^Mem:/ {print $2}') MEM_USED=$(free -h | awk '/^Mem:/ {print $3}') MEM_PERCENT=$(free | awk '/^Mem:/ {printf "%.0f", $3/$2 * 100}') if [ "$MEM_PERCENT" -gt 90 ]; then $SEND_NTFY warning "PVE Host - High RAM" "🟡 WARNING: $HOSTNAME RAM at ${MEM_PERCENT}% (Used: $MEM_USED/$MEM_TOTAL)" "warning" fi # Check critical PVE services CRITICAL_SERVICES=("pveproxy" "pvedaemon" "pve-cluster" "pvestatd") for service in "${CRITICAL_SERVICES[@]}"; do if ! systemctl is-active --quiet "$service"; then $SEND_NTFY critical "PVE Host - Service Down" "🔴 CRITICAL: $HOSTNAME service '$service' is DOWN!" "skull,error" fi done logger -t pve-monitor "PVE host check completed: Root ${ROOT_USAGE}%, RAM ${MEM_PERCENT}%"