Initial backup: 18 monitoring scripts + timers + docs

- 18 comprehensive monitoring checks
- 5 systemd timers (5min, 15min, hourly, daily, weekly)
- Complete documentation
- NTFY secure notification system
- Fixed debianvm disk space (91% to 57%)
- Fixed CloudReve integration
- Date: 2026-01-07
This commit is contained in:
PVE Monitoring System
2026-01-07 16:30:34 +08:00
commit 3a14fd2736
34 changed files with 1067 additions and 0 deletions

44
scripts/check-thin-pools.sh Executable file
View File

@@ -0,0 +1,44 @@
#!/bin/bash
# Monitor LVM thin pools - improved to avoid false positives
set -euo pipefail
SEND_NTFY="/usr/local/bin/send-ntfy.sh"
# Check thin pool OVERALL usage (not individual VM disks)
for POOL in $(lvs --noheadings -o vg_name,lv_name,lv_attr 2>/dev/null | grep 't' | awk '{print $1"/"$2}'); do
# Get data and metadata usage for the POOL itself
DATA_PERCENT=$(lvs --noheadings -o data_percent "$POOL" 2>/dev/null | tr -d ' ' | sed 's/\..*//')
META_PERCENT=$(lvs --noheadings -o metadata_percent "$POOL" 2>/dev/null | tr -d ' ' | sed 's/\..*//')
# Skip if empty
if [ -z "$DATA_PERCENT" ] || [ "$DATA_PERCENT" = "" ]; then
continue
fi
POOL_NAME=$(echo $POOL | sed 's/\//--/g')
# Alert on POOL usage, not individual VM disks
if [ "$DATA_PERCENT" -gt 90 ]; then
$SEND_NTFY critical "Thin Pool CRITICAL" "🔴 CRITICAL: Thin pool $POOL_NAME DATA at ${DATA_PERCENT}%! ALL VMs on this pool will FREEZE if full!" "skull,error,cd"
elif [ "$DATA_PERCENT" -gt 80 ]; then
$SEND_NTFY warning "Thin Pool Warning" "🟡 WARNING: Thin pool $POOL_NAME DATA at ${DATA_PERCENT}% - take action before 90%" "warning,cd"
fi
if [ -n "$META_PERCENT" ] && [ "$META_PERCENT" != "" ]; then
if [ "$META_PERCENT" -gt 90 ]; then
$SEND_NTFY critical "Thin Pool Metadata CRITICAL" "🔴 CRITICAL: Thin pool $POOL_NAME METADATA at ${META_PERCENT}%!" "skull,error,cd"
elif [ "$META_PERCENT" -gt 80 ]; then
$SEND_NTFY warning "Thin Pool Metadata Warning" "🟡 WARNING: Thin pool $POOL_NAME METADATA at ${META_PERCENT}%" "warning,cd"
fi
fi
logger -t thin-pool-monitor "$POOL_NAME: Data ${DATA_PERCENT}%, Metadata ${META_PERCENT}%"
done
# Separately check for INDIVIDUAL VM disks that are dangerously full
# This is INFO level since the VM can be expanded
FULL_DISKS=$(lvs --noheadings -o lv_name,data_percent 2>/dev/null | grep "vm-" | awk '$2 > 95 {print $1" at "$2"%"}')
if [ -n "$FULL_DISKS" ]; then
$SEND_NTFY info "VM Disks Nearly Full" " INFO: Some VM disks are >95% full. These can be expanded if needed:\n$FULL_DISKS" "info,cd"
fi