openclaw-backups/skills/openclaw-self-healing/scripts/test-self-healing.sh

#!/bin/bash
set -euo pipefail

# Self-Healing System Test Suite
# Tests all 4 levels of the self-healing system

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

# Test results
TESTS_PASSED=0
TESTS_FAILED=0

# ============================================
# Helper Functions
# ============================================

print_header() {
  echo -e "\n${YELLOW}========================================${NC}"
  echo -e "${YELLOW}$1${NC}"
  echo -e "${YELLOW}========================================${NC}\n"
}

pass() {
  echo -e "${GREEN}✅ PASS:${NC} $1"
  ((TESTS_PASSED++))
}

fail() {
  echo -e "${RED}❌ FAIL:${NC} $1"
  ((TESTS_FAILED++))
}

warn() {
  echo -e "${YELLOW}⚠️  WARN:${NC} $1"
}

info() {
  echo "$1"
}

check_gateway_running() {
  if pgrep -f "openclaw-gateway" > /dev/null; then
    return 0
  else
    return 1
  fi
}

check_gateway_http() {
  local http_code
  http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:18789/ 2>/dev/null || echo "000")

  if [ "$http_code" = "200" ]; then
    return 0
  else
    return 1
  fi
}

# ============================================
# Test Cases
# ============================================

test_environment() {
  print_header "Test: Environment Setup"

  # Check .env file exists
  if [ -f "$HOME/.openclaw/.env" ] || [ -f "$HOME/openclaw/.env" ]; then
    pass "Environment file exists"
  else
    warn "Environment file not found (optional)"
  fi

  # Check scripts exist
  if [ -f "$HOME/openclaw/scripts/gateway-healthcheck.sh" ]; then
    pass "gateway-healthcheck.sh exists"
  else
    fail "gateway-healthcheck.sh not found"
  fi

  if [ -f "$HOME/openclaw/scripts/emergency-recovery.sh" ]; then
    pass "emergency-recovery.sh exists"
  else
    fail "emergency-recovery.sh not found"
  fi

  if [ -f "$HOME/openclaw/scripts/emergency-recovery-monitor.sh" ]; then
    pass "emergency-recovery-monitor.sh exists"
  else
    fail "emergency-recovery-monitor.sh not found"
  fi

  # Check scripts are executable
  if [ -x "$HOME/openclaw/scripts/gateway-healthcheck.sh" ]; then
    pass "gateway-healthcheck.sh is executable"
  else
    fail "gateway-healthcheck.sh is not executable (run: chmod +x)"
  fi
}

test_dependencies() {
  print_header "Test: Dependencies"

  # Check tmux
  if command -v tmux &> /dev/null; then
    pass "tmux is installed"
  else
    fail "tmux is not installed (run: brew install tmux)"
  fi

  # Check Claude CLI
  if command -v claude &> /dev/null; then
    pass "Claude CLI is installed"
  else
    warn "Claude CLI is not installed (Level 3 will fail)"
  fi

  # Check OpenClaw
  if command -v openclaw &> /dev/null; then
    pass "OpenClaw CLI is installed"
  else
    fail "OpenClaw CLI is not installed"
  fi

  # Check curl
  if command -v curl &> /dev/null; then
    pass "curl is installed"
  else
    fail "curl is not installed"
  fi
}

test_gateway_status() {
  print_header "Test: Gateway Status"

  # Check process
  if check_gateway_running; then
    pass "Gateway process is running"
  else
    warn "Gateway process is not running"
  fi

  # Check HTTP
  if check_gateway_http; then
    pass "Gateway HTTP responds 200"
  else
    warn "Gateway HTTP does not respond 200"
  fi

  # Check port
  if lsof -i :18789 &> /dev/null; then
    pass "Port 18789 is in use (Gateway)"
  else
    warn "Port 18789 is not in use"
  fi
}

test_level1_watchdog() {
  print_header "Test: Level 1 - Watchdog"

  # Check LaunchAgent exists
  if [ -f "$HOME/Library/LaunchAgents/ai.openclaw.watchdog.plist" ]; then
    pass "Watchdog LaunchAgent plist exists"
  else
    warn "Watchdog LaunchAgent plist not found (OpenClaw may not auto-restart)"
  fi

  # Check LaunchAgent is loaded
  if launchctl list | grep -q "ai.openclaw.watchdog"; then
    pass "Watchdog LaunchAgent is loaded"
  else
    warn "Watchdog LaunchAgent is not loaded"
  fi
}

test_level2_healthcheck() {
  print_header "Test: Level 2 - Health Check"

  # Check LaunchAgent exists
  if [ -f "$HOME/Library/LaunchAgents/com.openclaw.healthcheck.plist" ]; then
    pass "Health Check LaunchAgent plist exists"
  else
    fail "Health Check LaunchAgent plist not found"
  fi

  # Check LaunchAgent is loaded
  if launchctl list | grep -q "com.openclaw.healthcheck"; then
    pass "Health Check LaunchAgent is loaded"
  else
    warn "Health Check LaunchAgent is not loaded (run: launchctl load)"
  fi

  # Check logs exist
  local today
  today=$(date +%Y-%m-%d)

  if [ -f "$HOME/openclaw/memory/healthcheck-$today.log" ]; then
    pass "Health Check log exists (ran today)"
  else
    warn "Health Check log not found (may not have run yet)"
  fi

  # Test health check script manually
  info "Testing Health Check script manually..."
  if bash "$HOME/openclaw/scripts/gateway-healthcheck.sh" >> /tmp/healthcheck-test.log 2>&1; then
    pass "Health Check script executed successfully"
  else
    fail "Health Check script failed (check: /tmp/healthcheck-test.log)"
  fi
}

test_level3_emergency_recovery() {
  print_header "Test: Level 3 - Emergency Recovery"

  # Check tmux is available
  if command -v tmux &> /dev/null; then
    pass "tmux is available for Level 3"
  else
    fail "tmux is not available (Level 3 will fail)"
  fi

  # Check Claude CLI is available
  if command -v claude &> /dev/null; then
    pass "Claude CLI is available for Level 3"
  else
    warn "Claude CLI is not available (Level 3 will fail)"
  fi

  # Test script syntax (don't actually run it)
  if bash -n "$HOME/openclaw/scripts/emergency-recovery.sh"; then
    pass "Emergency Recovery script syntax is valid"
  else
    fail "Emergency Recovery script has syntax errors"
  fi
}

test_level4_monitor() {
  print_header "Test: Level 4 - Emergency Monitor"

  # Test script syntax
  if bash -n "$HOME/openclaw/scripts/emergency-recovery-monitor.sh"; then
    pass "Emergency Monitor script syntax is valid"
  else
    fail "Emergency Monitor script has syntax errors"
  fi

  # Check cron job exists
  if openclaw cron list 2>/dev/null | grep -q "Emergency Recovery"; then
    pass "Emergency Monitor cron job exists"
  else
    warn "Emergency Monitor cron job not found (alerts disabled)"
  fi
}

test_metrics() {
  print_header "Test: Metrics Collection"

  # Check metrics files exist
  if [ -f "$HOME/openclaw/memory/.healthcheck-metrics.json" ]; then
    pass "Health Check metrics file exists"

    # Show last 3 entries
    info "Last 3 metrics:"
    tail -3 "$HOME/openclaw/memory/.healthcheck-metrics.json" 2>/dev/null || true
  else
    warn "Health Check metrics file not found (will be created on first run)"
  fi

  if [ -f "$HOME/openclaw/memory/.emergency-recovery-metrics.json" ]; then
    pass "Emergency Recovery metrics file exists"
  else
    warn "Emergency Recovery metrics file not found (will be created on first run)"
  fi
}

test_log_rotation() {
  print_header "Test: Log Rotation"

  # Count log files
  local healthcheck_logs
  healthcheck_logs=$(find "$HOME/openclaw/memory" -name "healthcheck-*.log" 2>/dev/null | wc -l)

  info "Health Check log files: $healthcheck_logs"

  if [ "$healthcheck_logs" -gt 20 ]; then
    warn "Many Health Check logs ($healthcheck_logs files). Log rotation may not be working."
  else
    pass "Health Check log count is reasonable ($healthcheck_logs files)"
  fi

  local recovery_logs
  recovery_logs=$(find "$HOME/openclaw/memory" -name "emergency-recovery-*.log" 2>/dev/null | wc -l)

  info "Emergency Recovery log files: $recovery_logs"

  if [ "$recovery_logs" -gt 10 ]; then
    warn "Many Emergency Recovery logs ($recovery_logs files). Log rotation may not be working."
  else
    pass "Emergency Recovery log count is reasonable ($recovery_logs files)"
  fi
}

# ============================================
# Main
# ============================================

main() {
  echo ""
  echo "╔════════════════════════════════════════╗"
  echo "║  Self-Healing System Test Suite       ║"
  echo "╚════════════════════════════════════════╝"
  echo ""

  # Run all tests
  test_environment
  test_dependencies
  test_gateway_status
  test_level1_watchdog
  test_level2_healthcheck
  test_level3_emergency_recovery
  test_level4_monitor
  test_metrics
  test_log_rotation

  # Summary
  print_header "Test Summary"

  local total_tests=$((TESTS_PASSED + TESTS_FAILED))

  echo -e "Total tests: $total_tests"
  echo -e "${GREEN}Passed: $TESTS_PASSED${NC}"
  echo -e "${RED}Failed: $TESTS_FAILED${NC}"
  echo ""

  if [ "$TESTS_FAILED" -eq 0 ]; then
    echo -e "${GREEN}🎉 All tests passed!${NC}"
    echo ""
    exit 0
  else
    echo -e "${RED}❌ Some tests failed. Please fix the issues above.${NC}"
    echo ""
    exit 1
  fi
}

# Run main
main