Files
openclaw-backups/skills/openclaw-self-healing/lib/parameter-optimizer.js

454 lines
13 KiB
JavaScript

#!/usr/bin/env node
/**
* Level 2: Parameter Optimizer (Semi-Automatic)
*
* 로그 분석 결과를 기반으로 파라미터 조정 제안 생성
* - 통계적 검증 포함
* - 안전 범위 체크
* - 파라미터 의존성 고려
* - 수동 승인 필요 (자동 적용 안 함)
*/
// ============================================================================
// Configuration
// ============================================================================
const SAFETY_RULES = {
maxRetries: {
min: 2,
max: 5,
description: 'Too few retries = high failure rate, too many = slow'
},
timeout: {
min: 10000,
max: 30000,
description: 'Must stay within cron interval'
},
backoffBase: {
min: 1000,
max: 5000,
description: 'Base delay for exponential backoff'
}
};
// Minimum sample size for reliable decisions
const MIN_SAMPLE_SIZES = {
'15min_cron': 96 * 3, // 3 days for 15-minute cron (96/day)
'hourly_cron': 24 * 7, // 7 days for hourly cron
'daily_cron': 7 // 7 days for daily cron
};
// ============================================================================
// Parameter Optimizer Class
// ============================================================================
class ParameterOptimizer {
constructor(options = {}) {
this.options = {
aggressiveness: options.aggressiveness || 'conservative', // conservative, moderate, aggressive
requireStatisticalSignificance: options.requireStatisticalSignificance !== false,
...options
};
}
/**
* Generate optimization recommendations from patterns
* @param {Array} patterns - Detected patterns from LogAnalyzer
* @param {Object} stats - Full statistics from LogAnalyzer
* @param {Object} trends - Trend analysis from LogAnalyzer
* @returns {Array} Array of recommendations
*/
generateRecommendations(patterns, stats, trends) {
const recommendations = [];
// Group patterns by cron
const byCron = this.groupPatternsByCron(patterns);
for (const [cron, cronPatterns] of Object.entries(byCron)) {
const cronStats = stats.byCron[cron];
const cronTrend = trends[cron];
// Check sample size first
if (!this.hasSufficientSamples(cronStats)) {
console.log(`⚠️ ${cron}: Insufficient samples (${cronStats.total}), skipping`);
continue;
}
// Generate recommendations for each pattern
for (const pattern of cronPatterns) {
const rec = this.createRecommendation(pattern, cronStats, cronTrend, cron);
if (rec) {
recommendations.push(rec);
}
}
}
// Validate combinations (check parameter dependencies)
const validated = this.validateCombinations(recommendations);
// Sort by priority
return this.prioritize(validated);
}
/**
* Group patterns by cron name
*/
groupPatternsByCron(patterns) {
const byCron = {};
for (const pattern of patterns) {
if (pattern.cron) {
if (!byCron[pattern.cron]) {
byCron[pattern.cron] = [];
}
byCron[pattern.cron].push(pattern);
}
}
return byCron;
}
/**
* Check if cron has sufficient samples for reliable tuning
*/
hasSufficientSamples(cronStats) {
const total = cronStats.total;
// Heuristic: determine cron frequency from sample count
let minRequired;
if (total >= 96 * 3) {
minRequired = MIN_SAMPLE_SIZES['15min_cron'];
} else if (total >= 24 * 7) {
minRequired = MIN_SAMPLE_SIZES['hourly_cron'];
} else {
minRequired = MIN_SAMPLE_SIZES['daily_cron'];
}
return total >= minRequired;
}
/**
* Create recommendation for a specific pattern
*/
createRecommendation(pattern, cronStats, cronTrend, cron) {
switch (pattern.type) {
case 'high_retry_rate':
return this.recommendMaxRetries(pattern, cronStats, cronTrend, cron);
case 'high_failure_rate':
return this.recommendMaxRetries(pattern, cronStats, cronTrend, cron);
case 'slow_response':
return this.recommendTimeout(pattern, cronStats, cronTrend, cron);
case 'recurring_error':
if (pattern.category === 'http' && pattern.topErrorType === 'HTTP 429') {
return this.recommendBackoff(pattern, cronStats, cronTrend, cron);
}
return null;
default:
return null;
}
}
/**
* Recommend maxRetries adjustment
*/
recommendMaxRetries(pattern, cronStats, cronTrend, cron) {
const current = 3; // Current default
const retryRate = cronStats.retryRate;
const failureRate = cronStats.failureRate;
// Dynamic calculation based on severity
let proposed;
if (failureRate > 0.05) {
// Severe: 5%+ failure rate
proposed = Math.min(current + 2, SAFETY_RULES.maxRetries.max);
} else if (retryRate > 0.20) {
// High: 20%+ retry rate
proposed = Math.min(current + 2, SAFETY_RULES.maxRetries.max);
} else if (retryRate > 0.10) {
// Medium: 10%+ retry rate
proposed = current + 1;
} else {
// Mild: under 10%
proposed = current + 1;
}
// Check if trend is improving or degrading
if (cronTrend) {
if (cronTrend.retryRate.trend === 'decreasing') {
// Improving - be conservative
proposed = Math.min(proposed, current + 1);
} else if (cronTrend.retryRate.trend === 'increasing') {
// Degrading - be more aggressive
proposed = Math.min(proposed + 1, SAFETY_RULES.maxRetries.max);
}
}
// Ensure within safety bounds
proposed = Math.max(SAFETY_RULES.maxRetries.min, Math.min(proposed, SAFETY_RULES.maxRetries.max));
if (proposed === current) {
return null; // No change needed
}
// Calculate expected improvement
const expectedImprovement = this.estimateRetryImprovement(current, proposed, failureRate);
return {
cron,
param: 'maxRetries',
current,
proposed,
reason: `Retry rate ${(retryRate * 100).toFixed(1)}% (threshold: 10%), Failure rate ${(failureRate * 100).toFixed(2)}%`,
expectedImprovement,
pattern: pattern.type,
severity: pattern.severity,
safe: this.isSafe('maxRetries', proposed),
confidence: this.calculateConfidence(cronStats, cronTrend),
metadata: {
retryRate,
failureRate,
trend: cronTrend?.retryRate.trend || 'unknown',
sampleSize: cronStats.total
}
};
}
/**
* Recommend timeout adjustment
*/
recommendTimeout(pattern, cronStats, cronTrend, cron) {
const current = 15000; // Current default
const avgDuration = cronStats.avgDuration;
const p95Duration = this.calculateP95(cronStats.durations);
// Use P95 instead of average to account for outliers
const targetTimeout = Math.ceil(p95Duration * 1.5); // 50% buffer
// Round to nearest 5 seconds for cleaner values
const proposed = Math.round(targetTimeout / 5000) * 5000;
// Ensure within safety bounds
const bounded = Math.max(
SAFETY_RULES.timeout.min,
Math.min(proposed, SAFETY_RULES.timeout.max)
);
if (bounded === current) {
return null; // No change needed
}
// Check if we're increasing or decreasing
if (bounded < current) {
// Decreasing timeout is risky - require strong evidence
if (!cronTrend || cronTrend.avgDuration.trend !== 'decreasing') {
return null; // Don't decrease unless clear improving trend
}
}
return {
cron,
param: 'timeout',
current,
proposed: bounded,
reason: `P95 response ${Math.round(p95Duration)}ms, avg ${Math.round(avgDuration)}ms (current timeout: ${current}ms)`,
expectedImprovement: bounded > current
? 'Timeout errors eliminated'
: 'Faster failure detection',
pattern: pattern.type,
severity: pattern.severity,
safe: this.isSafe('timeout', bounded),
confidence: this.calculateConfidence(cronStats, cronTrend),
metadata: {
avgDuration,
p95Duration,
trend: cronTrend?.avgDuration.trend || 'unknown',
sampleSize: cronStats.total
}
};
}
/**
* Recommend backoff adjustment (for rate limiting)
*/
recommendBackoff(pattern, cronStats, cronTrend, cron) {
const current = 1000; // Current default base
const proposed = current * 2; // Double the backoff
if (proposed > SAFETY_RULES.backoffBase.max) {
return null; // Already at max
}
return {
cron,
param: 'backoffBase',
current,
proposed,
reason: `HTTP 429 (Rate Limit) errors: ${pattern.value} times`,
expectedImprovement: 'Rate limit errors reduced',
pattern: pattern.type,
severity: pattern.severity,
safe: this.isSafe('backoffBase', proposed),
confidence: 'medium', // Rate limiting is clear
metadata: {
errorCount: pattern.value,
errorType: pattern.topErrorType
}
};
}
/**
* Calculate P95 percentile
*/
calculateP95(durations) {
if (!durations || durations.length === 0) return 0;
const sorted = [...durations].sort((a, b) => a - b);
const index = Math.ceil(0.95 * sorted.length) - 1;
return sorted[Math.max(0, index)];
}
/**
* Estimate improvement from retry increase
*/
estimateRetryImprovement(current, proposed, failureRate) {
// Simple model: each retry recovers ~70% of remaining failures
const recoveryRate = 0.70;
const currentRecovery = 1 - Math.pow(1 - recoveryRate, current);
const proposedRecovery = 1 - Math.pow(1 - recoveryRate, proposed);
const improvement = (proposedRecovery - currentRecovery) / (1 - currentRecovery);
return `Final failure rate -${(improvement * 100).toFixed(0)}%`;
}
/**
* Check if proposed value is within safety bounds
*/
isSafe(param, value) {
const rule = SAFETY_RULES[param];
if (!rule) return false;
return value >= rule.min && value <= rule.max;
}
/**
* Calculate confidence level for recommendation
*/
calculateConfidence(cronStats, cronTrend) {
let score = 0;
// Sample size
if (cronStats.total >= 500) score += 3;
else if (cronStats.total >= 200) score += 2;
else if (cronStats.total >= 100) score += 1;
// Clear trend
if (cronTrend) {
if (Math.abs(cronTrend.retryRate.change) > 50) score += 2; // Strong trend
else if (Math.abs(cronTrend.retryRate.change) > 20) score += 1; // Weak trend
}
// Map to confidence level
if (score >= 4) return 'high';
if (score >= 2) return 'medium';
return 'low';
}
/**
* Validate parameter combinations for dependencies
*/
validateCombinations(recommendations) {
const validated = [];
const byCron = {};
// Group by cron
for (const rec of recommendations) {
if (!byCron[rec.cron]) {
byCron[rec.cron] = [];
}
byCron[rec.cron].push(rec);
}
// Check each cron's recommendations
for (const [cron, recs] of Object.entries(byCron)) {
// If multiple params for same cron, check combined effect
if (recs.length > 1) {
const combined = this.checkCombinedEffect(recs, cron);
if (!combined.safe) {
// Mark all as requiring manual review
for (const rec of recs) {
rec.safe = false;
rec.warning = combined.warning;
rec.recommendation = 'Apply one at a time, verify each before next';
}
}
}
validated.push(...recs);
}
return validated;
}
/**
* Check combined effect of multiple parameter changes
*/
checkCombinedEffect(recommendations, cron) {
// Build hypothetical config
const config = {
maxRetries: 3,
timeout: 15000,
backoffBase: 1000
};
for (const rec of recommendations) {
config[rec.param] = rec.proposed;
}
// Calculate worst-case total wait time
// Exponential backoff: base * (2^0 + 2^1 + ... + 2^(n-1))
const maxBackoffTime = config.backoffBase * (Math.pow(2, config.maxRetries) - 1);
const maxTotalTime = config.timeout * config.maxRetries + maxBackoffTime;
// Assume 15-minute cron interval (900s)
const cronInterval = 900000; // 15 minutes in ms
if (maxTotalTime > cronInterval * 0.8) {
return {
safe: false,
warning: `Combined params may exceed cron interval: ${Math.round(maxTotalTime / 1000)}s > ${Math.round(cronInterval * 0.8 / 1000)}s`
};
}
return { safe: true };
}
/**
* Prioritize recommendations
*/
prioritize(recommendations) {
const severityOrder = { high: 0, medium: 1, low: 2 };
const confidenceOrder = { high: 0, medium: 1, low: 2 };
return recommendations.sort((a, b) => {
// First by severity
if (severityOrder[a.severity] !== severityOrder[b.severity]) {
return severityOrder[a.severity] - severityOrder[b.severity];
}
// Then by confidence
if (confidenceOrder[a.confidence] !== confidenceOrder[b.confidence]) {
return confidenceOrder[a.confidence] - confidenceOrder[b.confidence];
}
// Then by safety
if (a.safe !== b.safe) {
return b.safe ? 1 : -1; // Safe first
}
return 0;
});
}
}
// ============================================================================
// Export
// ============================================================================
module.exports = { ParameterOptimizer, SAFETY_RULES, MIN_SAMPLE_SIZES };