openclaw-backups/skills/openclaw-self-healing/lib/parameter-optimizer.js

#!/usr/bin/env node
/**
 * Level 2: Parameter Optimizer (Semi-Automatic)
 *
 * 로그 분석 결과를 기반으로 파라미터 조정 제안 생성
 * - 통계적 검증 포함
 * - 안전 범위 체크
 * - 파라미터 의존성 고려
 * - 수동 승인 필요 (자동 적용 안 함)
 */

// ============================================================================
// Configuration
// ============================================================================

const SAFETY_RULES = {
  maxRetries: {
    min: 2,
    max: 5,
    description: 'Too few retries = high failure rate, too many = slow'
  },
  timeout: {
    min: 10000,
    max: 30000,
    description: 'Must stay within cron interval'
  },
  backoffBase: {
    min: 1000,
    max: 5000,
    description: 'Base delay for exponential backoff'
  }
};

// Minimum sample size for reliable decisions
const MIN_SAMPLE_SIZES = {
  '15min_cron': 96 * 3,   // 3 days for 15-minute cron (96/day)
  'hourly_cron': 24 * 7,  // 7 days for hourly cron
  'daily_cron': 7         // 7 days for daily cron
};

// ============================================================================
// Parameter Optimizer Class
// ============================================================================

class ParameterOptimizer {
  constructor(options = {}) {
    this.options = {
      aggressiveness: options.aggressiveness || 'conservative', // conservative, moderate, aggressive
      requireStatisticalSignificance: options.requireStatisticalSignificance !== false,
      ...options
    };
  }

  /**
   * Generate optimization recommendations from patterns
   * @param {Array} patterns - Detected patterns from LogAnalyzer
   * @param {Object} stats - Full statistics from LogAnalyzer
   * @param {Object} trends - Trend analysis from LogAnalyzer
   * @returns {Array} Array of recommendations
   */
  generateRecommendations(patterns, stats, trends) {
    const recommendations = [];

    // Group patterns by cron
    const byCron = this.groupPatternsByCron(patterns);

    for (const [cron, cronPatterns] of Object.entries(byCron)) {
      const cronStats = stats.byCron[cron];
      const cronTrend = trends[cron];

      // Check sample size first
      if (!this.hasSufficientSamples(cronStats)) {
        console.log(`⚠️  ${cron}: Insufficient samples (${cronStats.total}), skipping`);
        continue;
      }

      // Generate recommendations for each pattern
      for (const pattern of cronPatterns) {
        const rec = this.createRecommendation(pattern, cronStats, cronTrend, cron);
        if (rec) {
          recommendations.push(rec);
        }
      }
    }

    // Validate combinations (check parameter dependencies)
    const validated = this.validateCombinations(recommendations);

    // Sort by priority
    return this.prioritize(validated);
  }

  /**
   * Group patterns by cron name
   */
  groupPatternsByCron(patterns) {
    const byCron = {};
    for (const pattern of patterns) {
      if (pattern.cron) {
        if (!byCron[pattern.cron]) {
          byCron[pattern.cron] = [];
        }
        byCron[pattern.cron].push(pattern);
      }
    }
    return byCron;
  }

  /**
   * Check if cron has sufficient samples for reliable tuning
   */
  hasSufficientSamples(cronStats) {
    const total = cronStats.total;

    // Heuristic: determine cron frequency from sample count
    let minRequired;
    if (total >= 96 * 3) {
      minRequired = MIN_SAMPLE_SIZES['15min_cron'];
    } else if (total >= 24 * 7) {
      minRequired = MIN_SAMPLE_SIZES['hourly_cron'];
    } else {
      minRequired = MIN_SAMPLE_SIZES['daily_cron'];
    }

    return total >= minRequired;
  }

  /**
   * Create recommendation for a specific pattern
   */
  createRecommendation(pattern, cronStats, cronTrend, cron) {
    switch (pattern.type) {
      case 'high_retry_rate':
        return this.recommendMaxRetries(pattern, cronStats, cronTrend, cron);

      case 'high_failure_rate':
        return this.recommendMaxRetries(pattern, cronStats, cronTrend, cron);

      case 'slow_response':
        return this.recommendTimeout(pattern, cronStats, cronTrend, cron);

      case 'recurring_error':
        if (pattern.category === 'http' && pattern.topErrorType === 'HTTP 429') {
          return this.recommendBackoff(pattern, cronStats, cronTrend, cron);
        }
        return null;

      default:
        return null;
    }
  }

  /**
   * Recommend maxRetries adjustment
   */
  recommendMaxRetries(pattern, cronStats, cronTrend, cron) {
    const current = 3; // Current default
    const retryRate = cronStats.retryRate;
    const failureRate = cronStats.failureRate;

    // Dynamic calculation based on severity
    let proposed;
    if (failureRate > 0.05) {
      // Severe: 5%+ failure rate
      proposed = Math.min(current + 2, SAFETY_RULES.maxRetries.max);
    } else if (retryRate > 0.20) {
      // High: 20%+ retry rate
      proposed = Math.min(current + 2, SAFETY_RULES.maxRetries.max);
    } else if (retryRate > 0.10) {
      // Medium: 10%+ retry rate
      proposed = current + 1;
    } else {
      // Mild: under 10%
      proposed = current + 1;
    }

    // Check if trend is improving or degrading
    if (cronTrend) {
      if (cronTrend.retryRate.trend === 'decreasing') {
        // Improving - be conservative
        proposed = Math.min(proposed, current + 1);
      } else if (cronTrend.retryRate.trend === 'increasing') {
        // Degrading - be more aggressive
        proposed = Math.min(proposed + 1, SAFETY_RULES.maxRetries.max);
      }
    }

    // Ensure within safety bounds
    proposed = Math.max(SAFETY_RULES.maxRetries.min, Math.min(proposed, SAFETY_RULES.maxRetries.max));

    if (proposed === current) {
      return null; // No change needed
    }

    // Calculate expected improvement
    const expectedImprovement = this.estimateRetryImprovement(current, proposed, failureRate);

    return {
      cron,
      param: 'maxRetries',
      current,
      proposed,
      reason: `Retry rate ${(retryRate * 100).toFixed(1)}% (threshold: 10%), Failure rate ${(failureRate * 100).toFixed(2)}%`,
      expectedImprovement,
      pattern: pattern.type,
      severity: pattern.severity,
      safe: this.isSafe('maxRetries', proposed),
      confidence: this.calculateConfidence(cronStats, cronTrend),
      metadata: {
        retryRate,
        failureRate,
        trend: cronTrend?.retryRate.trend || 'unknown',
        sampleSize: cronStats.total
      }
    };
  }

  /**
   * Recommend timeout adjustment
   */
  recommendTimeout(pattern, cronStats, cronTrend, cron) {
    const current = 15000; // Current default
    const avgDuration = cronStats.avgDuration;
    const p95Duration = this.calculateP95(cronStats.durations);

    // Use P95 instead of average to account for outliers
    const targetTimeout = Math.ceil(p95Duration * 1.5); // 50% buffer

    // Round to nearest 5 seconds for cleaner values
    const proposed = Math.round(targetTimeout / 5000) * 5000;

    // Ensure within safety bounds
    const bounded = Math.max(
      SAFETY_RULES.timeout.min,
      Math.min(proposed, SAFETY_RULES.timeout.max)
    );

    if (bounded === current) {
      return null; // No change needed
    }

    // Check if we're increasing or decreasing
    if (bounded < current) {
      // Decreasing timeout is risky - require strong evidence
      if (!cronTrend || cronTrend.avgDuration.trend !== 'decreasing') {
        return null; // Don't decrease unless clear improving trend
      }
    }

    return {
      cron,
      param: 'timeout',
      current,
      proposed: bounded,
      reason: `P95 response ${Math.round(p95Duration)}ms, avg ${Math.round(avgDuration)}ms (current timeout: ${current}ms)`,
      expectedImprovement: bounded > current
        ? 'Timeout errors eliminated'
        : 'Faster failure detection',
      pattern: pattern.type,
      severity: pattern.severity,
      safe: this.isSafe('timeout', bounded),
      confidence: this.calculateConfidence(cronStats, cronTrend),
      metadata: {
        avgDuration,
        p95Duration,
        trend: cronTrend?.avgDuration.trend || 'unknown',
        sampleSize: cronStats.total
      }
    };
  }

  /**
   * Recommend backoff adjustment (for rate limiting)
   */
  recommendBackoff(pattern, cronStats, cronTrend, cron) {
    const current = 1000; // Current default base
    const proposed = current * 2; // Double the backoff

    if (proposed > SAFETY_RULES.backoffBase.max) {
      return null; // Already at max
    }

    return {
      cron,
      param: 'backoffBase',
      current,
      proposed,
      reason: `HTTP 429 (Rate Limit) errors: ${pattern.value} times`,
      expectedImprovement: 'Rate limit errors reduced',
      pattern: pattern.type,
      severity: pattern.severity,
      safe: this.isSafe('backoffBase', proposed),
      confidence: 'medium', // Rate limiting is clear
      metadata: {
        errorCount: pattern.value,
        errorType: pattern.topErrorType
      }
    };
  }

  /**
   * Calculate P95 percentile
   */
  calculateP95(durations) {
    if (!durations || durations.length === 0) return 0;
    const sorted = [...durations].sort((a, b) => a - b);
    const index = Math.ceil(0.95 * sorted.length) - 1;
    return sorted[Math.max(0, index)];
  }

  /**
   * Estimate improvement from retry increase
   */
  estimateRetryImprovement(current, proposed, failureRate) {
    // Simple model: each retry recovers ~70% of remaining failures
    const recoveryRate = 0.70;
    const currentRecovery = 1 - Math.pow(1 - recoveryRate, current);
    const proposedRecovery = 1 - Math.pow(1 - recoveryRate, proposed);

    const improvement = (proposedRecovery - currentRecovery) / (1 - currentRecovery);
    return `Final failure rate -${(improvement * 100).toFixed(0)}%`;
  }

  /**
   * Check if proposed value is within safety bounds
   */
  isSafe(param, value) {
    const rule = SAFETY_RULES[param];
    if (!rule) return false;
    return value >= rule.min && value <= rule.max;
  }

  /**
   * Calculate confidence level for recommendation
   */
  calculateConfidence(cronStats, cronTrend) {
    let score = 0;

    // Sample size
    if (cronStats.total >= 500) score += 3;
    else if (cronStats.total >= 200) score += 2;
    else if (cronStats.total >= 100) score += 1;

    // Clear trend
    if (cronTrend) {
      if (Math.abs(cronTrend.retryRate.change) > 50) score += 2; // Strong trend
      else if (Math.abs(cronTrend.retryRate.change) > 20) score += 1; // Weak trend
    }

    // Map to confidence level
    if (score >= 4) return 'high';
    if (score >= 2) return 'medium';
    return 'low';
  }

  /**
   * Validate parameter combinations for dependencies
   */
  validateCombinations(recommendations) {
    const validated = [];
    const byCron = {};

    // Group by cron
    for (const rec of recommendations) {
      if (!byCron[rec.cron]) {
        byCron[rec.cron] = [];
      }
      byCron[rec.cron].push(rec);
    }

    // Check each cron's recommendations
    for (const [cron, recs] of Object.entries(byCron)) {
      // If multiple params for same cron, check combined effect
      if (recs.length > 1) {
        const combined = this.checkCombinedEffect(recs, cron);
        if (!combined.safe) {
          // Mark all as requiring manual review
          for (const rec of recs) {
            rec.safe = false;
            rec.warning = combined.warning;
            rec.recommendation = 'Apply one at a time, verify each before next';
          }
        }
      }
      validated.push(...recs);
    }

    return validated;
  }

  /**
   * Check combined effect of multiple parameter changes
   */
  checkCombinedEffect(recommendations, cron) {
    // Build hypothetical config
    const config = {
      maxRetries: 3,
      timeout: 15000,
      backoffBase: 1000
    };

    for (const rec of recommendations) {
      config[rec.param] = rec.proposed;
    }

    // Calculate worst-case total wait time
    // Exponential backoff: base * (2^0 + 2^1 + ... + 2^(n-1))
    const maxBackoffTime = config.backoffBase * (Math.pow(2, config.maxRetries) - 1);
    const maxTotalTime = config.timeout * config.maxRetries + maxBackoffTime;

    // Assume 15-minute cron interval (900s)
    const cronInterval = 900000; // 15 minutes in ms

    if (maxTotalTime > cronInterval * 0.8) {
      return {
        safe: false,
        warning: `Combined params may exceed cron interval: ${Math.round(maxTotalTime / 1000)}s > ${Math.round(cronInterval * 0.8 / 1000)}s`
      };
    }

    return { safe: true };
  }

  /**
   * Prioritize recommendations
   */
  prioritize(recommendations) {
    const severityOrder = { high: 0, medium: 1, low: 2 };
    const confidenceOrder = { high: 0, medium: 1, low: 2 };

    return recommendations.sort((a, b) => {
      // First by severity
      if (severityOrder[a.severity] !== severityOrder[b.severity]) {
        return severityOrder[a.severity] - severityOrder[b.severity];
      }
      // Then by confidence
      if (confidenceOrder[a.confidence] !== confidenceOrder[b.confidence]) {
        return confidenceOrder[a.confidence] - confidenceOrder[b.confidence];
      }
      // Then by safety
      if (a.safe !== b.safe) {
        return b.safe ? 1 : -1; // Safe first
      }
      return 0;
    });
  }
}

// ============================================================================
// Export
// ============================================================================

module.exports = { ParameterOptimizer, SAFETY_RULES, MIN_SAMPLE_SIZES };