#!/bin/bash # Monitor Ceph cluster health set -euo pipefail SEND_NTFY="/usr/local/bin/send-ntfy.sh" # Check if Ceph is installed if ! command -v ceph &>/dev/null; then logger -t ceph-monitor "Ceph not installed, skipping" exit 0 fi # Get Ceph status CEPH_STATUS=$(timeout 10 ceph -s 2>/dev/null || echo "FAILED") if [ "$CEPH_STATUS" = "FAILED" ]; then $SEND_NTFY critical "Ceph Check Failed" "🔴 CRITICAL: Unable to get Ceph cluster status!" "skull,error" exit 1 fi # Check overall health HEALTH=$(echo "$CEPH_STATUS" | grep -oP 'health: \K\w+' || echo "UNKNOWN") if [ "$HEALTH" = "HEALTH_ERR" ]; then $SEND_NTFY critical "Ceph Health Error" "🔴 CRITICAL: Ceph cluster is in HEALTH_ERR state!\n$(ceph health detail 2>/dev/null | head -3)" "skull,error,cd" elif [ "$HEALTH" = "HEALTH_WARN" ]; then $SEND_NTFY warning "Ceph Health Warning" "🟡 WARNING: Ceph cluster is in HEALTH_WARN state\n$(ceph health detail 2>/dev/null | head -3)" "warning,cd" fi # Check for degraded PGs DEGRADED=$(echo "$CEPH_STATUS" | grep -i degraded || echo "") if [ -n "$DEGRADED" ]; then $SEND_NTFY warning "Ceph PGs Degraded" "🟡 WARNING: Ceph has degraded placement groups\n$DEGRADED" "warning,cd" fi logger -t ceph-monitor "Ceph health: $HEALTH"