50 lines
1.7 KiB
Bash
Executable File
50 lines
1.7 KiB
Bash
Executable File
#!/bin/bash
|
|
# Monitor Ceph cluster health
|
|
# DISABLED: Ceph is not being used for storage (using NFS/CIFS/LVM instead)
|
|
# Ceph services are running but unconfigured (0 OSDs), causing false warnings
|
|
|
|
set -euo pipefail
|
|
|
|
# Check if Ceph is installed
|
|
if ! command -v ceph &>/dev/null; then
|
|
logger -t ceph-monitor "Ceph not installed, skipping"
|
|
exit 0
|
|
fi
|
|
|
|
# Check if Ceph storage is actually being used
|
|
CEPH_STORAGE=$(pvesm status 2>/dev/null | grep -i ceph || echo "")
|
|
|
|
if [ -z "$CEPH_STORAGE" ]; then
|
|
# Ceph is not in use, skip monitoring
|
|
logger -t ceph-monitor "Ceph not in use, monitoring disabled"
|
|
exit 0
|
|
fi
|
|
|
|
# Only monitor if Ceph storage is actually configured
|
|
SEND_NTFY="/usr/local/bin/send-ntfy.sh"
|
|
|
|
# Get Ceph status
|
|
CEPH_STATUS=$(timeout 10 ceph -s 2>/dev/null || echo "FAILED")
|
|
|
|
if [ "$CEPH_STATUS" = "FAILED" ]; then
|
|
$SEND_NTFY critical "Ceph Check Failed" "🔴 CRITICAL: Unable to get Ceph cluster status!" "skull,error"
|
|
exit 1
|
|
fi
|
|
|
|
# Check overall health
|
|
HEALTH=$(echo "$CEPH_STATUS" | grep -oP 'health: \K\w+' || echo "UNKNOWN")
|
|
|
|
if [ "$HEALTH" = "HEALTH_ERR" ]; then
|
|
$SEND_NTFY critical "Ceph Health Error" "🔴 CRITICAL: Ceph cluster is in HEALTH_ERR state!\n$(ceph health detail 2>/dev/null | head -3)" "skull,error,cd"
|
|
elif [ "$HEALTH" = "HEALTH_WARN" ]; then
|
|
$SEND_NTFY warning "Ceph Health Warning" "🟡 WARNING: Ceph cluster is in HEALTH_WARN state\n$(ceph health detail 2>/dev/null | head -3)" "warning,cd"
|
|
fi
|
|
|
|
# Check for degraded PGs
|
|
DEGRADED=$(echo "$CEPH_STATUS" | grep -i degraded || echo "")
|
|
if [ -n "$DEGRADED" ]; then
|
|
$SEND_NTFY warning "Ceph PGs Degraded" "🟡 WARNING: Ceph has degraded placement groups\n$DEGRADED" "warning,cd"
|
|
fi
|
|
|
|
logger -t ceph-monitor "Ceph health: $HEALTH"
|