352 lines
8.8 KiB
Bash
352 lines
8.8 KiB
Bash
#!/bin/bash
|
|
set -euo pipefail
|
|
|
|
# Self-Healing System Test Suite
|
|
# Tests all 4 levels of the self-healing system
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Test results
|
|
TESTS_PASSED=0
|
|
TESTS_FAILED=0
|
|
|
|
# ============================================
|
|
# Helper Functions
|
|
# ============================================
|
|
|
|
print_header() {
|
|
echo -e "\n${YELLOW}========================================${NC}"
|
|
echo -e "${YELLOW}$1${NC}"
|
|
echo -e "${YELLOW}========================================${NC}\n"
|
|
}
|
|
|
|
pass() {
|
|
echo -e "${GREEN}✅ PASS:${NC} $1"
|
|
((TESTS_PASSED++))
|
|
}
|
|
|
|
fail() {
|
|
echo -e "${RED}❌ FAIL:${NC} $1"
|
|
((TESTS_FAILED++))
|
|
}
|
|
|
|
warn() {
|
|
echo -e "${YELLOW}⚠️ WARN:${NC} $1"
|
|
}
|
|
|
|
info() {
|
|
echo "$1"
|
|
}
|
|
|
|
check_gateway_running() {
|
|
if pgrep -f "openclaw-gateway" > /dev/null; then
|
|
return 0
|
|
else
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
check_gateway_http() {
|
|
local http_code
|
|
http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:18789/ 2>/dev/null || echo "000")
|
|
|
|
if [ "$http_code" = "200" ]; then
|
|
return 0
|
|
else
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# ============================================
|
|
# Test Cases
|
|
# ============================================
|
|
|
|
test_environment() {
|
|
print_header "Test: Environment Setup"
|
|
|
|
# Check .env file exists
|
|
if [ -f "$HOME/.openclaw/.env" ] || [ -f "$HOME/openclaw/.env" ]; then
|
|
pass "Environment file exists"
|
|
else
|
|
warn "Environment file not found (optional)"
|
|
fi
|
|
|
|
# Check scripts exist
|
|
if [ -f "$HOME/openclaw/scripts/gateway-healthcheck.sh" ]; then
|
|
pass "gateway-healthcheck.sh exists"
|
|
else
|
|
fail "gateway-healthcheck.sh not found"
|
|
fi
|
|
|
|
if [ -f "$HOME/openclaw/scripts/emergency-recovery.sh" ]; then
|
|
pass "emergency-recovery.sh exists"
|
|
else
|
|
fail "emergency-recovery.sh not found"
|
|
fi
|
|
|
|
if [ -f "$HOME/openclaw/scripts/emergency-recovery-monitor.sh" ]; then
|
|
pass "emergency-recovery-monitor.sh exists"
|
|
else
|
|
fail "emergency-recovery-monitor.sh not found"
|
|
fi
|
|
|
|
# Check scripts are executable
|
|
if [ -x "$HOME/openclaw/scripts/gateway-healthcheck.sh" ]; then
|
|
pass "gateway-healthcheck.sh is executable"
|
|
else
|
|
fail "gateway-healthcheck.sh is not executable (run: chmod +x)"
|
|
fi
|
|
}
|
|
|
|
test_dependencies() {
|
|
print_header "Test: Dependencies"
|
|
|
|
# Check tmux
|
|
if command -v tmux &> /dev/null; then
|
|
pass "tmux is installed"
|
|
else
|
|
fail "tmux is not installed (run: brew install tmux)"
|
|
fi
|
|
|
|
# Check Claude CLI
|
|
if command -v claude &> /dev/null; then
|
|
pass "Claude CLI is installed"
|
|
else
|
|
warn "Claude CLI is not installed (Level 3 will fail)"
|
|
fi
|
|
|
|
# Check OpenClaw
|
|
if command -v openclaw &> /dev/null; then
|
|
pass "OpenClaw CLI is installed"
|
|
else
|
|
fail "OpenClaw CLI is not installed"
|
|
fi
|
|
|
|
# Check curl
|
|
if command -v curl &> /dev/null; then
|
|
pass "curl is installed"
|
|
else
|
|
fail "curl is not installed"
|
|
fi
|
|
}
|
|
|
|
test_gateway_status() {
|
|
print_header "Test: Gateway Status"
|
|
|
|
# Check process
|
|
if check_gateway_running; then
|
|
pass "Gateway process is running"
|
|
else
|
|
warn "Gateway process is not running"
|
|
fi
|
|
|
|
# Check HTTP
|
|
if check_gateway_http; then
|
|
pass "Gateway HTTP responds 200"
|
|
else
|
|
warn "Gateway HTTP does not respond 200"
|
|
fi
|
|
|
|
# Check port
|
|
if lsof -i :18789 &> /dev/null; then
|
|
pass "Port 18789 is in use (Gateway)"
|
|
else
|
|
warn "Port 18789 is not in use"
|
|
fi
|
|
}
|
|
|
|
test_level1_watchdog() {
|
|
print_header "Test: Level 1 - Watchdog"
|
|
|
|
# Check LaunchAgent exists
|
|
if [ -f "$HOME/Library/LaunchAgents/ai.openclaw.watchdog.plist" ]; then
|
|
pass "Watchdog LaunchAgent plist exists"
|
|
else
|
|
warn "Watchdog LaunchAgent plist not found (OpenClaw may not auto-restart)"
|
|
fi
|
|
|
|
# Check LaunchAgent is loaded
|
|
if launchctl list | grep -q "ai.openclaw.watchdog"; then
|
|
pass "Watchdog LaunchAgent is loaded"
|
|
else
|
|
warn "Watchdog LaunchAgent is not loaded"
|
|
fi
|
|
}
|
|
|
|
test_level2_healthcheck() {
|
|
print_header "Test: Level 2 - Health Check"
|
|
|
|
# Check LaunchAgent exists
|
|
if [ -f "$HOME/Library/LaunchAgents/com.openclaw.healthcheck.plist" ]; then
|
|
pass "Health Check LaunchAgent plist exists"
|
|
else
|
|
fail "Health Check LaunchAgent plist not found"
|
|
fi
|
|
|
|
# Check LaunchAgent is loaded
|
|
if launchctl list | grep -q "com.openclaw.healthcheck"; then
|
|
pass "Health Check LaunchAgent is loaded"
|
|
else
|
|
warn "Health Check LaunchAgent is not loaded (run: launchctl load)"
|
|
fi
|
|
|
|
# Check logs exist
|
|
local today
|
|
today=$(date +%Y-%m-%d)
|
|
|
|
if [ -f "$HOME/openclaw/memory/healthcheck-$today.log" ]; then
|
|
pass "Health Check log exists (ran today)"
|
|
else
|
|
warn "Health Check log not found (may not have run yet)"
|
|
fi
|
|
|
|
# Test health check script manually
|
|
info "Testing Health Check script manually..."
|
|
if bash "$HOME/openclaw/scripts/gateway-healthcheck.sh" >> /tmp/healthcheck-test.log 2>&1; then
|
|
pass "Health Check script executed successfully"
|
|
else
|
|
fail "Health Check script failed (check: /tmp/healthcheck-test.log)"
|
|
fi
|
|
}
|
|
|
|
test_level3_emergency_recovery() {
|
|
print_header "Test: Level 3 - Emergency Recovery"
|
|
|
|
# Check tmux is available
|
|
if command -v tmux &> /dev/null; then
|
|
pass "tmux is available for Level 3"
|
|
else
|
|
fail "tmux is not available (Level 3 will fail)"
|
|
fi
|
|
|
|
# Check Claude CLI is available
|
|
if command -v claude &> /dev/null; then
|
|
pass "Claude CLI is available for Level 3"
|
|
else
|
|
warn "Claude CLI is not available (Level 3 will fail)"
|
|
fi
|
|
|
|
# Test script syntax (don't actually run it)
|
|
if bash -n "$HOME/openclaw/scripts/emergency-recovery.sh"; then
|
|
pass "Emergency Recovery script syntax is valid"
|
|
else
|
|
fail "Emergency Recovery script has syntax errors"
|
|
fi
|
|
}
|
|
|
|
test_level4_monitor() {
|
|
print_header "Test: Level 4 - Emergency Monitor"
|
|
|
|
# Test script syntax
|
|
if bash -n "$HOME/openclaw/scripts/emergency-recovery-monitor.sh"; then
|
|
pass "Emergency Monitor script syntax is valid"
|
|
else
|
|
fail "Emergency Monitor script has syntax errors"
|
|
fi
|
|
|
|
# Check cron job exists
|
|
if openclaw cron list 2>/dev/null | grep -q "Emergency Recovery"; then
|
|
pass "Emergency Monitor cron job exists"
|
|
else
|
|
warn "Emergency Monitor cron job not found (alerts disabled)"
|
|
fi
|
|
}
|
|
|
|
test_metrics() {
|
|
print_header "Test: Metrics Collection"
|
|
|
|
# Check metrics files exist
|
|
if [ -f "$HOME/openclaw/memory/.healthcheck-metrics.json" ]; then
|
|
pass "Health Check metrics file exists"
|
|
|
|
# Show last 3 entries
|
|
info "Last 3 metrics:"
|
|
tail -3 "$HOME/openclaw/memory/.healthcheck-metrics.json" 2>/dev/null || true
|
|
else
|
|
warn "Health Check metrics file not found (will be created on first run)"
|
|
fi
|
|
|
|
if [ -f "$HOME/openclaw/memory/.emergency-recovery-metrics.json" ]; then
|
|
pass "Emergency Recovery metrics file exists"
|
|
else
|
|
warn "Emergency Recovery metrics file not found (will be created on first run)"
|
|
fi
|
|
}
|
|
|
|
test_log_rotation() {
|
|
print_header "Test: Log Rotation"
|
|
|
|
# Count log files
|
|
local healthcheck_logs
|
|
healthcheck_logs=$(find "$HOME/openclaw/memory" -name "healthcheck-*.log" 2>/dev/null | wc -l)
|
|
|
|
info "Health Check log files: $healthcheck_logs"
|
|
|
|
if [ "$healthcheck_logs" -gt 20 ]; then
|
|
warn "Many Health Check logs ($healthcheck_logs files). Log rotation may not be working."
|
|
else
|
|
pass "Health Check log count is reasonable ($healthcheck_logs files)"
|
|
fi
|
|
|
|
local recovery_logs
|
|
recovery_logs=$(find "$HOME/openclaw/memory" -name "emergency-recovery-*.log" 2>/dev/null | wc -l)
|
|
|
|
info "Emergency Recovery log files: $recovery_logs"
|
|
|
|
if [ "$recovery_logs" -gt 10 ]; then
|
|
warn "Many Emergency Recovery logs ($recovery_logs files). Log rotation may not be working."
|
|
else
|
|
pass "Emergency Recovery log count is reasonable ($recovery_logs files)"
|
|
fi
|
|
}
|
|
|
|
# ============================================
|
|
# Main
|
|
# ============================================
|
|
|
|
main() {
|
|
echo ""
|
|
echo "╔════════════════════════════════════════╗"
|
|
echo "║ Self-Healing System Test Suite ║"
|
|
echo "╚════════════════════════════════════════╝"
|
|
echo ""
|
|
|
|
# Run all tests
|
|
test_environment
|
|
test_dependencies
|
|
test_gateway_status
|
|
test_level1_watchdog
|
|
test_level2_healthcheck
|
|
test_level3_emergency_recovery
|
|
test_level4_monitor
|
|
test_metrics
|
|
test_log_rotation
|
|
|
|
# Summary
|
|
print_header "Test Summary"
|
|
|
|
local total_tests=$((TESTS_PASSED + TESTS_FAILED))
|
|
|
|
echo -e "Total tests: $total_tests"
|
|
echo -e "${GREEN}Passed: $TESTS_PASSED${NC}"
|
|
echo -e "${RED}Failed: $TESTS_FAILED${NC}"
|
|
echo ""
|
|
|
|
if [ "$TESTS_FAILED" -eq 0 ]; then
|
|
echo -e "${GREEN}🎉 All tests passed!${NC}"
|
|
echo ""
|
|
exit 0
|
|
else
|
|
echo -e "${RED}❌ Some tests failed. Please fix the issues above.${NC}"
|
|
echo ""
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
# Run main
|
|
main
|