AI Newsletter Digest improvements: fixed QP soft line break decoding, URL extraction, and content cleaning
This commit is contained in:
434
skills/openclaw-self-healing/lib/auto-retry.js
Normal file
434
skills/openclaw-self-healing/lib/auto-retry.js
Normal file
@@ -0,0 +1,434 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Auto-Retry System (Level 1 Self-Improvement)
|
||||
*
|
||||
* Purpose: Automatically retry failed operations with intelligent backoff
|
||||
* Closes the loop: Failure → Analyze → Retry → Success (automatic!)
|
||||
*
|
||||
* Features:
|
||||
* - Verifiable outcomes (exit code, HTTP status, errors)
|
||||
* - Exponential/linear backoff
|
||||
* - Error analysis and classification
|
||||
* - Automatic logging
|
||||
* - Discord notifications (optional)
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
// ============================================================================
|
||||
// Configuration
|
||||
// ============================================================================
|
||||
|
||||
const CONFIG = {
|
||||
// Default retry settings
|
||||
DEFAULT_MAX_RETRIES: 3,
|
||||
DEFAULT_BACKOFF: 'exponential',
|
||||
DEFAULT_BASE_DELAY: 1000, // 1초
|
||||
|
||||
// Retry decision
|
||||
RETRYABLE_ERROR_CODES: [
|
||||
'ETIMEDOUT',
|
||||
'ECONNRESET',
|
||||
'ENOTFOUND',
|
||||
'EAI_AGAIN',
|
||||
'ECONNREFUSED'
|
||||
],
|
||||
|
||||
RETRYABLE_HTTP_STATUS: [408, 429, 500, 502, 503, 504],
|
||||
|
||||
// Logging
|
||||
LOG_DIR: path.join(process.env.HOME, 'openclaw', 'logs'),
|
||||
LOG_FILE: 'auto-retry.jsonl',
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Core: Auto-Retry Engine
|
||||
// ============================================================================
|
||||
|
||||
class AutoRetry {
|
||||
constructor(options = {}) {
|
||||
this.maxRetries = options.maxRetries || CONFIG.DEFAULT_MAX_RETRIES;
|
||||
this.backoff = options.backoff || CONFIG.DEFAULT_BACKOFF;
|
||||
this.baseDelay = options.baseDelay || CONFIG.DEFAULT_BASE_DELAY;
|
||||
this.onRetry = options.onRetry || null;
|
||||
this.onSuccess = options.onSuccess || null;
|
||||
this.onFinalFailure = options.onFinalFailure || null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute function with automatic retry
|
||||
*/
|
||||
async execute(fn, context = {}) {
|
||||
const startTime = Date.now();
|
||||
const attempts = [];
|
||||
|
||||
for (let attempt = 1; attempt <= this.maxRetries; attempt++) {
|
||||
const attemptStart = Date.now();
|
||||
|
||||
try {
|
||||
// Execute function
|
||||
const result = await fn();
|
||||
|
||||
// Success!
|
||||
const duration = Date.now() - attemptStart;
|
||||
const totalDuration = Date.now() - startTime;
|
||||
|
||||
attempts.push({
|
||||
attempt,
|
||||
success: true,
|
||||
duration
|
||||
});
|
||||
|
||||
// Log success
|
||||
await this.logSuccess({
|
||||
context,
|
||||
attempts,
|
||||
totalDuration,
|
||||
result
|
||||
});
|
||||
|
||||
// Callback
|
||||
if (this.onSuccess) {
|
||||
await this.onSuccess(attempt, result, attempts);
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
result,
|
||||
attempts: attempt,
|
||||
totalDuration
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
// Failure
|
||||
const duration = Date.now() - attemptStart;
|
||||
const analysis = this.analyzeError(error);
|
||||
|
||||
attempts.push({
|
||||
attempt,
|
||||
success: false,
|
||||
duration,
|
||||
error: analysis
|
||||
});
|
||||
|
||||
// Last attempt?
|
||||
if (attempt === this.maxRetries) {
|
||||
await this.logFailure({
|
||||
context,
|
||||
attempts,
|
||||
totalDuration: Date.now() - startTime,
|
||||
finalError: analysis
|
||||
});
|
||||
|
||||
if (this.onFinalFailure) {
|
||||
await this.onFinalFailure(attempts, analysis);
|
||||
}
|
||||
|
||||
throw error;
|
||||
}
|
||||
|
||||
// Retryable?
|
||||
if (!analysis.retryable) {
|
||||
await this.logFailure({
|
||||
context,
|
||||
attempts,
|
||||
totalDuration: Date.now() - startTime,
|
||||
finalError: analysis,
|
||||
reason: 'Non-retryable error'
|
||||
});
|
||||
|
||||
throw error;
|
||||
}
|
||||
|
||||
// Calculate backoff delay
|
||||
const delay = this.calculateBackoff(attempt);
|
||||
|
||||
// Callback
|
||||
if (this.onRetry) {
|
||||
await this.onRetry(attempt, error, analysis, delay);
|
||||
}
|
||||
|
||||
// Wait before retry
|
||||
await this.sleep(delay);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze error to determine if retryable
|
||||
*/
|
||||
analyzeError(error) {
|
||||
const analysis = {
|
||||
type: error.code || error.name || 'Unknown',
|
||||
message: error.message,
|
||||
statusCode: error.statusCode || error.status,
|
||||
retryable: false,
|
||||
category: 'unknown',
|
||||
suggestedFix: 'Unknown error'
|
||||
};
|
||||
|
||||
// Network errors
|
||||
if (CONFIG.RETRYABLE_ERROR_CODES.includes(error.code)) {
|
||||
analysis.retryable = true;
|
||||
analysis.category = 'network';
|
||||
analysis.suggestedFix = this.suggestNetworkFix(error.code);
|
||||
}
|
||||
|
||||
// HTTP errors
|
||||
if (CONFIG.RETRYABLE_HTTP_STATUS.includes(error.statusCode)) {
|
||||
analysis.retryable = true;
|
||||
analysis.category = 'http';
|
||||
analysis.suggestedFix = this.suggestHTTPFix(error.statusCode);
|
||||
}
|
||||
|
||||
// Timeout
|
||||
if (error.message && error.message.includes('timeout')) {
|
||||
analysis.retryable = true;
|
||||
analysis.category = 'timeout';
|
||||
analysis.suggestedFix = 'Increase timeout or check network';
|
||||
}
|
||||
|
||||
return analysis;
|
||||
}
|
||||
|
||||
suggestNetworkFix(code) {
|
||||
const fixes = {
|
||||
'ETIMEDOUT': 'Network timeout - check connection or increase timeout',
|
||||
'ECONNRESET': 'Connection reset - server may be restarting',
|
||||
'ENOTFOUND': 'DNS lookup failed - check hostname',
|
||||
'EAI_AGAIN': 'DNS temporary failure - retry should work',
|
||||
'ECONNREFUSED': 'Connection refused - check if service is running'
|
||||
};
|
||||
return fixes[code] || 'Network error';
|
||||
}
|
||||
|
||||
suggestHTTPFix(status) {
|
||||
const fixes = {
|
||||
408: 'Request timeout - increase timeout',
|
||||
429: 'Rate limit exceeded - increase backoff delay',
|
||||
500: 'Internal server error - temporary, retry should work',
|
||||
502: 'Bad gateway - upstream server issue',
|
||||
503: 'Service unavailable - server overloaded',
|
||||
504: 'Gateway timeout - upstream server timeout'
|
||||
};
|
||||
return fixes[status] || 'HTTP error';
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate backoff delay
|
||||
*/
|
||||
calculateBackoff(attempt) {
|
||||
if (this.backoff === 'exponential') {
|
||||
// 1s, 2s, 4s, 8s, 16s...
|
||||
return this.baseDelay * Math.pow(2, attempt - 1);
|
||||
} else if (this.backoff === 'linear') {
|
||||
// 1s, 2s, 3s, 4s...
|
||||
return this.baseDelay * attempt;
|
||||
} else {
|
||||
// Fixed delay
|
||||
return this.baseDelay;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sleep utility
|
||||
*/
|
||||
sleep(ms) {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
/**
|
||||
* Log success
|
||||
*/
|
||||
async logSuccess(data) {
|
||||
await this.writeLog({
|
||||
timestamp: new Date().toISOString(),
|
||||
type: 'success',
|
||||
...data
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Log failure
|
||||
*/
|
||||
async logFailure(data) {
|
||||
await this.writeLog({
|
||||
timestamp: new Date().toISOString(),
|
||||
type: 'failure',
|
||||
...data
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Write log to file (JSONL format)
|
||||
*/
|
||||
async writeLog(entry) {
|
||||
try {
|
||||
// Ensure log directory exists
|
||||
if (!fs.existsSync(CONFIG.LOG_DIR)) {
|
||||
fs.mkdirSync(CONFIG.LOG_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
const logFile = path.join(CONFIG.LOG_DIR, CONFIG.LOG_FILE);
|
||||
const line = JSON.stringify(entry) + '\n';
|
||||
|
||||
fs.appendFileSync(logFile, line);
|
||||
} catch (e) {
|
||||
console.error('Failed to write log:', e.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Convenience Functions
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Simple wrapper for common use cases
|
||||
*/
|
||||
async function executeWithRetry(fn, options = {}) {
|
||||
const retry = new AutoRetry(options);
|
||||
return await retry.execute(fn, options.context || {});
|
||||
}
|
||||
|
||||
/**
|
||||
* Retry with Discord notifications
|
||||
*/
|
||||
async function executeWithNotifications(fn, options = {}) {
|
||||
const { discordWebhook, taskName } = options;
|
||||
|
||||
return await executeWithRetry(fn, {
|
||||
...options,
|
||||
onRetry: async (attempt, error, analysis, delay) => {
|
||||
if (discordWebhook) {
|
||||
await sendDiscordNotification(discordWebhook, {
|
||||
title: '🔄 재시도 중',
|
||||
description: `**${taskName}** (시도 ${attempt}/${options.maxRetries || 3})`,
|
||||
color: 0xFFA500,
|
||||
fields: [
|
||||
{ name: '에러', value: error.message, inline: false },
|
||||
{ name: '카테고리', value: analysis.category, inline: true },
|
||||
{ name: '다음 시도', value: `${delay}ms 후`, inline: true }
|
||||
]
|
||||
});
|
||||
}
|
||||
|
||||
// Console log
|
||||
console.log(`⚠️ Retry ${attempt}: ${error.message} (waiting ${delay}ms)`);
|
||||
},
|
||||
onSuccess: async (attempt, result) => {
|
||||
if (discordWebhook && attempt > 1) {
|
||||
await sendDiscordNotification(discordWebhook, {
|
||||
title: '✅ 재시도 성공',
|
||||
description: `**${taskName}** (${attempt}번째 시도에서 성공)`,
|
||||
color: 0x00FF00
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`✅ Success after ${attempt} attempt(s)`);
|
||||
},
|
||||
onFinalFailure: async (attempts, analysis) => {
|
||||
if (discordWebhook) {
|
||||
await sendDiscordNotification(discordWebhook, {
|
||||
title: '❌ 최종 실패',
|
||||
description: `**${taskName}** (${attempts.length}회 시도 후 실패)`,
|
||||
color: 0xFF0000,
|
||||
fields: [
|
||||
{ name: '제안', value: analysis.suggestedFix, inline: false }
|
||||
]
|
||||
});
|
||||
}
|
||||
|
||||
console.error(`❌ Failed after ${attempts.length} attempts`);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Send Discord notification
|
||||
*/
|
||||
async function sendDiscordNotification(webhookUrl, embed) {
|
||||
const https = require('https');
|
||||
const url = new URL(webhookUrl);
|
||||
|
||||
const message = {
|
||||
embeds: [{
|
||||
title: embed.title,
|
||||
description: embed.description,
|
||||
color: embed.color,
|
||||
fields: embed.fields || [],
|
||||
footer: { text: 'Auto-Retry System' },
|
||||
timestamp: new Date().toISOString()
|
||||
}]
|
||||
};
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const data = JSON.stringify(message);
|
||||
|
||||
const options = {
|
||||
hostname: url.hostname,
|
||||
path: url.pathname,
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Content-Length': data.length
|
||||
}
|
||||
};
|
||||
|
||||
const req = https.request(options, (res) => {
|
||||
if (res.statusCode === 204) {
|
||||
resolve();
|
||||
} else {
|
||||
reject(new Error(`Discord returned ${res.statusCode}`));
|
||||
}
|
||||
});
|
||||
|
||||
req.on('error', reject);
|
||||
req.write(data);
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Exports
|
||||
// ============================================================================
|
||||
|
||||
module.exports = {
|
||||
AutoRetry,
|
||||
executeWithRetry,
|
||||
executeWithNotifications
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// CLI Usage (for testing)
|
||||
// ============================================================================
|
||||
|
||||
if (require.main === module) {
|
||||
const testFn = async () => {
|
||||
// Simulate random failure
|
||||
if (Math.random() < 0.7) {
|
||||
const error = new Error('Simulated network timeout');
|
||||
error.code = 'ETIMEDOUT';
|
||||
throw error;
|
||||
}
|
||||
return { data: 'Success!' };
|
||||
};
|
||||
|
||||
console.log('🧪 Testing Auto-Retry System...\n');
|
||||
|
||||
executeWithRetry(testFn, {
|
||||
maxRetries: 5,
|
||||
backoff: 'exponential',
|
||||
context: { task: 'test' },
|
||||
onRetry: (attempt, error, analysis, delay) => {
|
||||
console.log(` Retry ${attempt}: ${error.message} (waiting ${delay}ms)`);
|
||||
}
|
||||
})
|
||||
.then(result => {
|
||||
console.log('\n✅ Final result:', result);
|
||||
})
|
||||
.catch(err => {
|
||||
console.error('\n❌ Final failure:', err.message);
|
||||
});
|
||||
}
|
||||
647
skills/openclaw-self-healing/lib/log-analyzer.js
Normal file
647
skills/openclaw-self-healing/lib/log-analyzer.js
Normal file
@@ -0,0 +1,647 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Level 2: Log Analyzer
|
||||
*
|
||||
* Auto-Retry 로그를 분석하여 패턴을 감지하고 최적화 제안을 생성
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const readline = require('readline');
|
||||
const path = require('path');
|
||||
|
||||
// ============================================================================
|
||||
// Log Analyzer Class
|
||||
// ============================================================================
|
||||
|
||||
class LogAnalyzer {
|
||||
constructor(options = {}) {
|
||||
this.options = {
|
||||
timeWindow: options.timeWindow || 7 * 24 * 3600 * 1000, // 7 days default
|
||||
minSampleSize: options.minSampleSize || 5, // Minimum samples for pattern detection
|
||||
...options
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze auto-retry logs
|
||||
* @param {string} logPath - Path to auto-retry.jsonl
|
||||
* @returns {Promise<Object>} Analysis results
|
||||
*/
|
||||
async analyze(logPath) {
|
||||
const entries = await this.readLog(logPath);
|
||||
const filteredEntries = this.filterByTimeWindow(entries);
|
||||
const stats = this.calculateStats(filteredEntries);
|
||||
const patterns = this.detectPatterns(stats);
|
||||
const trends = this.analyzeTrends(filteredEntries);
|
||||
|
||||
return {
|
||||
summary: this.generateSummary(stats),
|
||||
stats,
|
||||
patterns,
|
||||
trends,
|
||||
metadata: {
|
||||
totalEntries: entries.length,
|
||||
analyzedEntries: filteredEntries.length,
|
||||
timeWindow: this.options.timeWindow,
|
||||
analyzedAt: new Date().toISOString()
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Read JSONL log file
|
||||
* @param {string} logPath - Path to log file
|
||||
* @returns {Promise<Array>} Array of log entries
|
||||
*/
|
||||
async readLog(logPath) {
|
||||
const entries = [];
|
||||
const fileStream = fs.createReadStream(logPath);
|
||||
const rl = readline.createInterface({
|
||||
input: fileStream,
|
||||
crlfDelay: Infinity
|
||||
});
|
||||
|
||||
for await (const line of rl) {
|
||||
if (!line.trim()) continue;
|
||||
|
||||
try {
|
||||
const entry = JSON.parse(line);
|
||||
entries.push(entry);
|
||||
} catch (error) {
|
||||
console.error(`Invalid JSON line: ${line.substring(0, 50)}...`);
|
||||
}
|
||||
}
|
||||
|
||||
return entries;
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter entries by time window
|
||||
* @param {Array} entries - All log entries
|
||||
* @returns {Array} Filtered entries
|
||||
*/
|
||||
filterByTimeWindow(entries) {
|
||||
const cutoff = Date.now() - this.options.timeWindow;
|
||||
|
||||
return entries.filter(entry => {
|
||||
const timestamp = new Date(entry.timestamp).getTime();
|
||||
return timestamp >= cutoff;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate statistics from entries
|
||||
* @param {Array} entries - Filtered log entries
|
||||
* @returns {Object} Statistics
|
||||
*/
|
||||
calculateStats(entries) {
|
||||
const stats = {
|
||||
overall: {
|
||||
total: 0,
|
||||
success: 0,
|
||||
failure: 0,
|
||||
retries: 0,
|
||||
retryRate: 0,
|
||||
failureRate: 0,
|
||||
avgAttempts: 0,
|
||||
avgDuration: 0,
|
||||
durations: []
|
||||
},
|
||||
byCron: {},
|
||||
byError: {},
|
||||
performance: {}
|
||||
};
|
||||
|
||||
// Separate actual cron executions from tests
|
||||
const cronEntries = entries.filter(e => e.context?.cron);
|
||||
const testEntries = entries.filter(e => !e.context?.cron);
|
||||
|
||||
// Analyze cron entries
|
||||
for (const entry of cronEntries) {
|
||||
const cron = entry.context.cron;
|
||||
|
||||
// Initialize cron stats
|
||||
if (!stats.byCron[cron]) {
|
||||
stats.byCron[cron] = {
|
||||
total: 0,
|
||||
success: 0,
|
||||
failure: 0,
|
||||
retries: 0,
|
||||
retryRate: 0,
|
||||
failureRate: 0,
|
||||
avgAttempts: 0,
|
||||
avgDuration: 0,
|
||||
durations: [],
|
||||
errors: []
|
||||
};
|
||||
}
|
||||
|
||||
const cronStats = stats.byCron[cron];
|
||||
const attempts = entry.attempts.length;
|
||||
const duration = entry.totalDuration || 0;
|
||||
|
||||
// Update counts
|
||||
stats.overall.total++;
|
||||
cronStats.total++;
|
||||
|
||||
if (entry.type === 'success') {
|
||||
stats.overall.success++;
|
||||
cronStats.success++;
|
||||
} else {
|
||||
stats.overall.failure++;
|
||||
cronStats.failure++;
|
||||
}
|
||||
|
||||
if (attempts > 1) {
|
||||
stats.overall.retries++;
|
||||
cronStats.retries++;
|
||||
}
|
||||
|
||||
// Track durations
|
||||
stats.overall.durations.push(duration);
|
||||
cronStats.durations.push(duration);
|
||||
|
||||
// Track errors
|
||||
if (entry.type === 'failure' || attempts > 1) {
|
||||
for (const attempt of entry.attempts) {
|
||||
if (attempt.error) {
|
||||
const errorCategory = attempt.error.category || 'unknown';
|
||||
const errorType = attempt.error.type || 'unknown';
|
||||
|
||||
// By error category
|
||||
if (!stats.byError[errorCategory]) {
|
||||
stats.byError[errorCategory] = {
|
||||
count: 0,
|
||||
types: {}
|
||||
};
|
||||
}
|
||||
stats.byError[errorCategory].count++;
|
||||
|
||||
// By error type
|
||||
if (!stats.byError[errorCategory].types[errorType]) {
|
||||
stats.byError[errorCategory].types[errorType] = 0;
|
||||
}
|
||||
stats.byError[errorCategory].types[errorType]++;
|
||||
|
||||
// Track in cron stats
|
||||
cronStats.errors.push({
|
||||
category: errorCategory,
|
||||
type: errorType,
|
||||
message: attempt.error.message
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate averages and rates
|
||||
this.calculateAverages(stats.overall);
|
||||
for (const cron in stats.byCron) {
|
||||
this.calculateAverages(stats.byCron[cron]);
|
||||
}
|
||||
|
||||
// Calculate performance metrics (percentiles)
|
||||
stats.performance = this.calculatePerformanceMetrics(stats.overall.durations);
|
||||
|
||||
// Add test stats separately
|
||||
stats.tests = {
|
||||
total: testEntries.length,
|
||||
success: testEntries.filter(e => e.type === 'success').length,
|
||||
failure: testEntries.filter(e => e.type === 'failure').length
|
||||
};
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate averages and rates for a stats object
|
||||
* @param {Object} statsObj - Stats object to update
|
||||
*/
|
||||
calculateAverages(statsObj) {
|
||||
if (statsObj.total > 0) {
|
||||
statsObj.retryRate = statsObj.retries / statsObj.total;
|
||||
statsObj.failureRate = statsObj.failure / statsObj.total;
|
||||
statsObj.avgAttempts = statsObj.total > 0
|
||||
? (statsObj.total + statsObj.retries) / statsObj.total
|
||||
: 0;
|
||||
}
|
||||
|
||||
if (statsObj.durations.length > 0) {
|
||||
statsObj.avgDuration = statsObj.durations.reduce((a, b) => a + b, 0) / statsObj.durations.length;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate performance metrics (percentiles)
|
||||
* @param {Array} durations - Array of durations
|
||||
* @returns {Object} Performance metrics
|
||||
*/
|
||||
calculatePerformanceMetrics(durations) {
|
||||
if (durations.length === 0) {
|
||||
return { p50: 0, p95: 0, p99: 0, min: 0, max: 0 };
|
||||
}
|
||||
|
||||
const sorted = [...durations].sort((a, b) => a - b);
|
||||
const percentile = (p) => {
|
||||
const index = Math.ceil((p / 100) * sorted.length) - 1;
|
||||
return sorted[Math.max(0, index)];
|
||||
};
|
||||
|
||||
return {
|
||||
p50: percentile(50),
|
||||
p95: percentile(95),
|
||||
p99: percentile(99),
|
||||
min: sorted[0],
|
||||
max: sorted[sorted.length - 1]
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect patterns that need attention
|
||||
* @param {Object} stats - Statistics object
|
||||
* @returns {Array} Array of detected patterns
|
||||
*/
|
||||
detectPatterns(stats) {
|
||||
const patterns = [];
|
||||
|
||||
// Pattern detection for each cron
|
||||
for (const [cron, data] of Object.entries(stats.byCron)) {
|
||||
// Skip if insufficient data
|
||||
if (data.total < this.options.minSampleSize) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Pattern 1: High retry rate
|
||||
if (data.retryRate > 0.10) {
|
||||
patterns.push({
|
||||
type: 'high_retry_rate',
|
||||
severity: data.retryRate > 0.20 ? 'high' : 'medium',
|
||||
cron,
|
||||
value: data.retryRate,
|
||||
threshold: 0.10,
|
||||
description: `${(data.retryRate * 100).toFixed(1)}% of executions needed retry (threshold: 10%)`,
|
||||
suggestion: 'increase maxRetries',
|
||||
affectedExecutions: data.retries
|
||||
});
|
||||
}
|
||||
|
||||
// Pattern 2: High failure rate
|
||||
if (data.failureRate > 0.01) {
|
||||
patterns.push({
|
||||
type: 'high_failure_rate',
|
||||
severity: data.failureRate > 0.05 ? 'high' : 'medium',
|
||||
cron,
|
||||
value: data.failureRate,
|
||||
threshold: 0.01,
|
||||
description: `${(data.failureRate * 100).toFixed(1)}% final failure rate (threshold: 1%)`,
|
||||
suggestion: 'increase maxRetries or investigate root cause',
|
||||
affectedExecutions: data.failure
|
||||
});
|
||||
}
|
||||
|
||||
// Pattern 3: Slow response (approaching timeout)
|
||||
const timeoutThreshold = 15000 * 0.8; // 80% of 15s timeout
|
||||
if (data.avgDuration > timeoutThreshold) {
|
||||
patterns.push({
|
||||
type: 'slow_response',
|
||||
severity: data.avgDuration > 15000 * 0.9 ? 'high' : 'medium',
|
||||
cron,
|
||||
value: data.avgDuration,
|
||||
threshold: timeoutThreshold,
|
||||
description: `Avg response ${Math.round(data.avgDuration)}ms > 80% of timeout (12s)`,
|
||||
suggestion: 'increase timeout',
|
||||
currentTimeout: 15000,
|
||||
recommendedTimeout: Math.ceil(data.avgDuration * 1.5)
|
||||
});
|
||||
}
|
||||
|
||||
// Pattern 4: High P95/P99 (inconsistent performance)
|
||||
const performance = this.calculatePerformanceMetrics(data.durations);
|
||||
if (performance.p95 > performance.p50 * 2) {
|
||||
patterns.push({
|
||||
type: 'inconsistent_performance',
|
||||
severity: 'low',
|
||||
cron,
|
||||
value: performance.p95 / performance.p50,
|
||||
description: `P95 (${Math.round(performance.p95)}ms) is ${(performance.p95 / performance.p50).toFixed(1)}x higher than median`,
|
||||
suggestion: 'investigate outliers',
|
||||
metrics: performance
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Pattern 5: Specific error categories
|
||||
for (const [category, data] of Object.entries(stats.byError)) {
|
||||
if (data.count > 3) {
|
||||
const topType = Object.entries(data.types)
|
||||
.sort((a, b) => b[1] - a[1])[0];
|
||||
|
||||
patterns.push({
|
||||
type: 'recurring_error',
|
||||
severity: data.count > 10 ? 'high' : 'medium',
|
||||
category,
|
||||
value: data.count,
|
||||
description: `${category} errors occurred ${data.count} times`,
|
||||
topErrorType: topType ? topType[0] : 'unknown',
|
||||
suggestion: this.getSuggestionForError(category, topType ? topType[0] : null)
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by severity
|
||||
const severityOrder = { high: 0, medium: 1, low: 2 };
|
||||
patterns.sort((a, b) => severityOrder[a.severity] - severityOrder[b.severity]);
|
||||
|
||||
return patterns;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get suggestion for specific error
|
||||
* @param {string} category - Error category
|
||||
* @param {string} type - Error type
|
||||
* @returns {string} Suggestion
|
||||
*/
|
||||
getSuggestionForError(category, type) {
|
||||
const suggestions = {
|
||||
'timeout': 'Increase timeout or check network latency',
|
||||
'http': type === 'HTTP 429'
|
||||
? 'Increase backoff delay to avoid rate limits'
|
||||
: 'Check API status and retry logic',
|
||||
'network': 'Check network connectivity and DNS resolution',
|
||||
'unknown': 'Investigate error logs for root cause'
|
||||
};
|
||||
|
||||
return suggestions[category] || suggestions['unknown'];
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze trends over time
|
||||
* @param {Array} entries - Filtered log entries
|
||||
* @returns {Object} Trend analysis
|
||||
*/
|
||||
analyzeTrends(entries) {
|
||||
// Group by day
|
||||
const byDay = {};
|
||||
|
||||
for (const entry of entries) {
|
||||
if (!entry.context?.cron) continue;
|
||||
|
||||
const date = new Date(entry.timestamp);
|
||||
const day = date.toISOString().split('T')[0];
|
||||
const cron = entry.context.cron;
|
||||
|
||||
if (!byDay[day]) {
|
||||
byDay[day] = {};
|
||||
}
|
||||
|
||||
if (!byDay[day][cron]) {
|
||||
byDay[day][cron] = {
|
||||
total: 0,
|
||||
success: 0,
|
||||
retries: 0,
|
||||
durations: []
|
||||
};
|
||||
}
|
||||
|
||||
byDay[day][cron].total++;
|
||||
if (entry.type === 'success') {
|
||||
byDay[day][cron].success++;
|
||||
}
|
||||
if (entry.attempts.length > 1) {
|
||||
byDay[day][cron].retries++;
|
||||
}
|
||||
byDay[day][cron].durations.push(entry.totalDuration || 0);
|
||||
}
|
||||
|
||||
// Calculate daily averages
|
||||
const dailyStats = {};
|
||||
for (const [day, crons] of Object.entries(byDay)) {
|
||||
for (const [cron, data] of Object.entries(crons)) {
|
||||
if (!dailyStats[cron]) {
|
||||
dailyStats[cron] = [];
|
||||
}
|
||||
|
||||
dailyStats[cron].push({
|
||||
date: day,
|
||||
retryRate: data.retries / data.total,
|
||||
avgDuration: data.durations.reduce((a, b) => a + b, 0) / data.durations.length,
|
||||
successRate: data.success / data.total
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Detect trends (improving/degrading)
|
||||
const trends = {};
|
||||
for (const [cron, days] of Object.entries(dailyStats)) {
|
||||
if (days.length < 2) continue;
|
||||
|
||||
// Sort by date
|
||||
days.sort((a, b) => a.date.localeCompare(b.date));
|
||||
|
||||
// Simple linear trend (first half vs second half)
|
||||
const mid = Math.floor(days.length / 2);
|
||||
const firstHalf = days.slice(0, mid);
|
||||
const secondHalf = days.slice(mid);
|
||||
|
||||
const avgFirst = {
|
||||
retryRate: firstHalf.reduce((a, b) => a + b.retryRate, 0) / firstHalf.length,
|
||||
avgDuration: firstHalf.reduce((a, b) => a + b.avgDuration, 0) / firstHalf.length
|
||||
};
|
||||
|
||||
const avgSecond = {
|
||||
retryRate: secondHalf.reduce((a, b) => a + b.retryRate, 0) / secondHalf.length,
|
||||
avgDuration: secondHalf.reduce((a, b) => a + b.avgDuration, 0) / secondHalf.length
|
||||
};
|
||||
|
||||
trends[cron] = {
|
||||
retryRate: {
|
||||
trend: avgSecond.retryRate > avgFirst.retryRate ? 'increasing' : 'decreasing',
|
||||
change: ((avgSecond.retryRate - avgFirst.retryRate) / (avgFirst.retryRate || 1)) * 100,
|
||||
firstHalf: avgFirst.retryRate,
|
||||
secondHalf: avgSecond.retryRate
|
||||
},
|
||||
avgDuration: {
|
||||
trend: avgSecond.avgDuration > avgFirst.avgDuration ? 'increasing' : 'decreasing',
|
||||
change: ((avgSecond.avgDuration - avgFirst.avgDuration) / avgFirst.avgDuration) * 100,
|
||||
firstHalf: avgFirst.avgDuration,
|
||||
secondHalf: avgSecond.avgDuration
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
return trends;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate human-readable summary
|
||||
* @param {Object} stats - Statistics object
|
||||
* @returns {Object} Summary
|
||||
*/
|
||||
generateSummary(stats) {
|
||||
const summary = {
|
||||
overall: {
|
||||
totalExecutions: stats.overall.total,
|
||||
successRate: `${((stats.overall.success / stats.overall.total) * 100).toFixed(1)}%`,
|
||||
retryRate: `${(stats.overall.retryRate * 100).toFixed(1)}%`,
|
||||
failureRate: `${(stats.overall.failureRate * 100).toFixed(1)}%`,
|
||||
avgAttempts: stats.overall.avgAttempts.toFixed(2),
|
||||
avgDuration: `${Math.round(stats.overall.avgDuration)}ms`
|
||||
},
|
||||
crons: {},
|
||||
topErrors: []
|
||||
};
|
||||
|
||||
// Summarize each cron
|
||||
for (const [cron, data] of Object.entries(stats.byCron)) {
|
||||
summary.crons[cron] = {
|
||||
executions: data.total,
|
||||
successRate: `${((data.success / data.total) * 100).toFixed(1)}%`,
|
||||
retryRate: `${(data.retryRate * 100).toFixed(1)}%`,
|
||||
avgDuration: `${Math.round(data.avgDuration)}ms`
|
||||
};
|
||||
}
|
||||
|
||||
// Top errors
|
||||
const errorList = Object.entries(stats.byError)
|
||||
.map(([category, data]) => ({
|
||||
category,
|
||||
count: data.count,
|
||||
topType: Object.entries(data.types).sort((a, b) => b[1] - a[1])[0]
|
||||
}))
|
||||
.sort((a, b) => b.count - a.count)
|
||||
.slice(0, 5);
|
||||
|
||||
summary.topErrors = errorList.map(e => ({
|
||||
category: e.category,
|
||||
count: e.count,
|
||||
topType: e.topType ? e.topType[0] : 'unknown'
|
||||
}));
|
||||
|
||||
return summary;
|
||||
}
|
||||
|
||||
/**
|
||||
* Print analysis results to console
|
||||
* @param {Object} analysis - Analysis results
|
||||
*/
|
||||
printResults(analysis) {
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('📊 Level 2: Auto-Retry Log Analysis');
|
||||
console.log('='.repeat(60) + '\n');
|
||||
|
||||
// Summary
|
||||
console.log('📈 Overall Summary:');
|
||||
console.log(` Total Executions: ${analysis.summary.overall.totalExecutions}`);
|
||||
console.log(` Success Rate: ${analysis.summary.overall.successRate}`);
|
||||
console.log(` Retry Rate: ${analysis.summary.overall.retryRate}`);
|
||||
console.log(` Failure Rate: ${analysis.summary.overall.failureRate}`);
|
||||
console.log(` Avg Attempts: ${analysis.summary.overall.avgAttempts}`);
|
||||
console.log(` Avg Duration: ${analysis.summary.overall.avgDuration}`);
|
||||
|
||||
// Performance metrics
|
||||
if (analysis.stats.performance) {
|
||||
const perf = analysis.stats.performance;
|
||||
console.log('\n⚡ Performance Metrics:');
|
||||
console.log(` P50 (median): ${Math.round(perf.p50)}ms`);
|
||||
console.log(` P95: ${Math.round(perf.p95)}ms`);
|
||||
console.log(` P99: ${Math.round(perf.p99)}ms`);
|
||||
console.log(` Min: ${Math.round(perf.min)}ms`);
|
||||
console.log(` Max: ${Math.round(perf.max)}ms`);
|
||||
}
|
||||
|
||||
// By Cron
|
||||
console.log('\n📋 By Cron:');
|
||||
for (const [cron, data] of Object.entries(analysis.summary.crons)) {
|
||||
console.log(`\n ${cron}:`);
|
||||
console.log(` Executions: ${data.executions}`);
|
||||
console.log(` Success: ${data.successRate}`);
|
||||
console.log(` Retry: ${data.retryRate}`);
|
||||
console.log(` Avg Duration: ${data.avgDuration}`);
|
||||
}
|
||||
|
||||
// Patterns
|
||||
if (analysis.patterns.length > 0) {
|
||||
console.log('\n⚠️ Detected Patterns:');
|
||||
for (const pattern of analysis.patterns) {
|
||||
const icon = pattern.severity === 'high' ? '🔴' : pattern.severity === 'medium' ? '🟡' : '🟢';
|
||||
console.log(`\n ${icon} ${pattern.type} (${pattern.severity})`);
|
||||
console.log(` ${pattern.description}`);
|
||||
console.log(` 💡 Suggestion: ${pattern.suggestion}`);
|
||||
}
|
||||
} else {
|
||||
console.log('\n✅ No patterns detected - all metrics within normal range');
|
||||
}
|
||||
|
||||
// Trends
|
||||
if (Object.keys(analysis.trends).length > 0) {
|
||||
console.log('\n📈 Trends:');
|
||||
for (const [cron, trend] of Object.entries(analysis.trends)) {
|
||||
console.log(`\n ${cron}:`);
|
||||
|
||||
const retryIcon = trend.retryRate.trend === 'increasing' ? '📈' : '📉';
|
||||
const retryColor = trend.retryRate.trend === 'increasing' ? '⚠️' : '✅';
|
||||
console.log(` ${retryIcon} Retry Rate: ${retryColor} ${trend.retryRate.trend} (${trend.retryRate.change > 0 ? '+' : ''}${trend.retryRate.change.toFixed(1)}%)`);
|
||||
|
||||
const durationIcon = trend.avgDuration.trend === 'increasing' ? '📈' : '📉';
|
||||
const durationColor = trend.avgDuration.trend === 'increasing' ? '⚠️' : '✅';
|
||||
console.log(` ${durationIcon} Avg Duration: ${durationColor} ${trend.avgDuration.trend} (${trend.avgDuration.change > 0 ? '+' : ''}${trend.avgDuration.change.toFixed(1)}%)`);
|
||||
}
|
||||
}
|
||||
|
||||
// Top errors
|
||||
if (analysis.summary.topErrors.length > 0) {
|
||||
console.log('\n🚨 Top Errors:');
|
||||
for (const error of analysis.summary.topErrors) {
|
||||
console.log(` - ${error.category} (${error.count}x) - ${error.topType}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Metadata
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log(`Analyzed: ${analysis.metadata.analyzedEntries} entries (${analysis.metadata.totalEntries} total)`);
|
||||
console.log(`Time Window: ${Math.round(analysis.metadata.timeWindow / (24 * 3600 * 1000))} days`);
|
||||
console.log(`Analyzed At: ${new Date(analysis.metadata.analyzedAt).toLocaleString()}`);
|
||||
console.log('='.repeat(60) + '\n');
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// CLI Interface
|
||||
// ============================================================================
|
||||
|
||||
async function main() {
|
||||
const logPath = process.argv[2] || path.join(process.env.HOME, 'openclaw/logs/auto-retry.jsonl');
|
||||
const timeWindow = parseInt(process.argv[3]) || 7; // days
|
||||
|
||||
console.log(`Analyzing: ${logPath}`);
|
||||
console.log(`Time Window: ${timeWindow} days\n`);
|
||||
|
||||
const analyzer = new LogAnalyzer({
|
||||
timeWindow: timeWindow * 24 * 3600 * 1000,
|
||||
minSampleSize: 5
|
||||
});
|
||||
|
||||
try {
|
||||
const analysis = await analyzer.analyze(logPath);
|
||||
analyzer.printResults(analysis);
|
||||
|
||||
// Save results to file
|
||||
const outputPath = path.join(path.dirname(logPath), 'log-analysis.json');
|
||||
fs.writeFileSync(outputPath, JSON.stringify(analysis, null, 2));
|
||||
console.log(`📁 Full analysis saved to: ${outputPath}\n`);
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error analyzing logs:', error.message);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Export
|
||||
// ============================================================================
|
||||
|
||||
if (require.main === module) {
|
||||
main();
|
||||
}
|
||||
|
||||
module.exports = { LogAnalyzer };
|
||||
453
skills/openclaw-self-healing/lib/parameter-optimizer.js
Normal file
453
skills/openclaw-self-healing/lib/parameter-optimizer.js
Normal file
@@ -0,0 +1,453 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Level 2: Parameter Optimizer (Semi-Automatic)
|
||||
*
|
||||
* 로그 분석 결과를 기반으로 파라미터 조정 제안 생성
|
||||
* - 통계적 검증 포함
|
||||
* - 안전 범위 체크
|
||||
* - 파라미터 의존성 고려
|
||||
* - 수동 승인 필요 (자동 적용 안 함)
|
||||
*/
|
||||
|
||||
// ============================================================================
|
||||
// Configuration
|
||||
// ============================================================================
|
||||
|
||||
const SAFETY_RULES = {
|
||||
maxRetries: {
|
||||
min: 2,
|
||||
max: 5,
|
||||
description: 'Too few retries = high failure rate, too many = slow'
|
||||
},
|
||||
timeout: {
|
||||
min: 10000,
|
||||
max: 30000,
|
||||
description: 'Must stay within cron interval'
|
||||
},
|
||||
backoffBase: {
|
||||
min: 1000,
|
||||
max: 5000,
|
||||
description: 'Base delay for exponential backoff'
|
||||
}
|
||||
};
|
||||
|
||||
// Minimum sample size for reliable decisions
|
||||
const MIN_SAMPLE_SIZES = {
|
||||
'15min_cron': 96 * 3, // 3 days for 15-minute cron (96/day)
|
||||
'hourly_cron': 24 * 7, // 7 days for hourly cron
|
||||
'daily_cron': 7 // 7 days for daily cron
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Parameter Optimizer Class
|
||||
// ============================================================================
|
||||
|
||||
class ParameterOptimizer {
|
||||
constructor(options = {}) {
|
||||
this.options = {
|
||||
aggressiveness: options.aggressiveness || 'conservative', // conservative, moderate, aggressive
|
||||
requireStatisticalSignificance: options.requireStatisticalSignificance !== false,
|
||||
...options
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate optimization recommendations from patterns
|
||||
* @param {Array} patterns - Detected patterns from LogAnalyzer
|
||||
* @param {Object} stats - Full statistics from LogAnalyzer
|
||||
* @param {Object} trends - Trend analysis from LogAnalyzer
|
||||
* @returns {Array} Array of recommendations
|
||||
*/
|
||||
generateRecommendations(patterns, stats, trends) {
|
||||
const recommendations = [];
|
||||
|
||||
// Group patterns by cron
|
||||
const byCron = this.groupPatternsByCron(patterns);
|
||||
|
||||
for (const [cron, cronPatterns] of Object.entries(byCron)) {
|
||||
const cronStats = stats.byCron[cron];
|
||||
const cronTrend = trends[cron];
|
||||
|
||||
// Check sample size first
|
||||
if (!this.hasSufficientSamples(cronStats)) {
|
||||
console.log(`⚠️ ${cron}: Insufficient samples (${cronStats.total}), skipping`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Generate recommendations for each pattern
|
||||
for (const pattern of cronPatterns) {
|
||||
const rec = this.createRecommendation(pattern, cronStats, cronTrend, cron);
|
||||
if (rec) {
|
||||
recommendations.push(rec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Validate combinations (check parameter dependencies)
|
||||
const validated = this.validateCombinations(recommendations);
|
||||
|
||||
// Sort by priority
|
||||
return this.prioritize(validated);
|
||||
}
|
||||
|
||||
/**
|
||||
* Group patterns by cron name
|
||||
*/
|
||||
groupPatternsByCron(patterns) {
|
||||
const byCron = {};
|
||||
for (const pattern of patterns) {
|
||||
if (pattern.cron) {
|
||||
if (!byCron[pattern.cron]) {
|
||||
byCron[pattern.cron] = [];
|
||||
}
|
||||
byCron[pattern.cron].push(pattern);
|
||||
}
|
||||
}
|
||||
return byCron;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if cron has sufficient samples for reliable tuning
|
||||
*/
|
||||
hasSufficientSamples(cronStats) {
|
||||
const total = cronStats.total;
|
||||
|
||||
// Heuristic: determine cron frequency from sample count
|
||||
let minRequired;
|
||||
if (total >= 96 * 3) {
|
||||
minRequired = MIN_SAMPLE_SIZES['15min_cron'];
|
||||
} else if (total >= 24 * 7) {
|
||||
minRequired = MIN_SAMPLE_SIZES['hourly_cron'];
|
||||
} else {
|
||||
minRequired = MIN_SAMPLE_SIZES['daily_cron'];
|
||||
}
|
||||
|
||||
return total >= minRequired;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create recommendation for a specific pattern
|
||||
*/
|
||||
createRecommendation(pattern, cronStats, cronTrend, cron) {
|
||||
switch (pattern.type) {
|
||||
case 'high_retry_rate':
|
||||
return this.recommendMaxRetries(pattern, cronStats, cronTrend, cron);
|
||||
|
||||
case 'high_failure_rate':
|
||||
return this.recommendMaxRetries(pattern, cronStats, cronTrend, cron);
|
||||
|
||||
case 'slow_response':
|
||||
return this.recommendTimeout(pattern, cronStats, cronTrend, cron);
|
||||
|
||||
case 'recurring_error':
|
||||
if (pattern.category === 'http' && pattern.topErrorType === 'HTTP 429') {
|
||||
return this.recommendBackoff(pattern, cronStats, cronTrend, cron);
|
||||
}
|
||||
return null;
|
||||
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Recommend maxRetries adjustment
|
||||
*/
|
||||
recommendMaxRetries(pattern, cronStats, cronTrend, cron) {
|
||||
const current = 3; // Current default
|
||||
const retryRate = cronStats.retryRate;
|
||||
const failureRate = cronStats.failureRate;
|
||||
|
||||
// Dynamic calculation based on severity
|
||||
let proposed;
|
||||
if (failureRate > 0.05) {
|
||||
// Severe: 5%+ failure rate
|
||||
proposed = Math.min(current + 2, SAFETY_RULES.maxRetries.max);
|
||||
} else if (retryRate > 0.20) {
|
||||
// High: 20%+ retry rate
|
||||
proposed = Math.min(current + 2, SAFETY_RULES.maxRetries.max);
|
||||
} else if (retryRate > 0.10) {
|
||||
// Medium: 10%+ retry rate
|
||||
proposed = current + 1;
|
||||
} else {
|
||||
// Mild: under 10%
|
||||
proposed = current + 1;
|
||||
}
|
||||
|
||||
// Check if trend is improving or degrading
|
||||
if (cronTrend) {
|
||||
if (cronTrend.retryRate.trend === 'decreasing') {
|
||||
// Improving - be conservative
|
||||
proposed = Math.min(proposed, current + 1);
|
||||
} else if (cronTrend.retryRate.trend === 'increasing') {
|
||||
// Degrading - be more aggressive
|
||||
proposed = Math.min(proposed + 1, SAFETY_RULES.maxRetries.max);
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure within safety bounds
|
||||
proposed = Math.max(SAFETY_RULES.maxRetries.min, Math.min(proposed, SAFETY_RULES.maxRetries.max));
|
||||
|
||||
if (proposed === current) {
|
||||
return null; // No change needed
|
||||
}
|
||||
|
||||
// Calculate expected improvement
|
||||
const expectedImprovement = this.estimateRetryImprovement(current, proposed, failureRate);
|
||||
|
||||
return {
|
||||
cron,
|
||||
param: 'maxRetries',
|
||||
current,
|
||||
proposed,
|
||||
reason: `Retry rate ${(retryRate * 100).toFixed(1)}% (threshold: 10%), Failure rate ${(failureRate * 100).toFixed(2)}%`,
|
||||
expectedImprovement,
|
||||
pattern: pattern.type,
|
||||
severity: pattern.severity,
|
||||
safe: this.isSafe('maxRetries', proposed),
|
||||
confidence: this.calculateConfidence(cronStats, cronTrend),
|
||||
metadata: {
|
||||
retryRate,
|
||||
failureRate,
|
||||
trend: cronTrend?.retryRate.trend || 'unknown',
|
||||
sampleSize: cronStats.total
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Recommend timeout adjustment
|
||||
*/
|
||||
recommendTimeout(pattern, cronStats, cronTrend, cron) {
|
||||
const current = 15000; // Current default
|
||||
const avgDuration = cronStats.avgDuration;
|
||||
const p95Duration = this.calculateP95(cronStats.durations);
|
||||
|
||||
// Use P95 instead of average to account for outliers
|
||||
const targetTimeout = Math.ceil(p95Duration * 1.5); // 50% buffer
|
||||
|
||||
// Round to nearest 5 seconds for cleaner values
|
||||
const proposed = Math.round(targetTimeout / 5000) * 5000;
|
||||
|
||||
// Ensure within safety bounds
|
||||
const bounded = Math.max(
|
||||
SAFETY_RULES.timeout.min,
|
||||
Math.min(proposed, SAFETY_RULES.timeout.max)
|
||||
);
|
||||
|
||||
if (bounded === current) {
|
||||
return null; // No change needed
|
||||
}
|
||||
|
||||
// Check if we're increasing or decreasing
|
||||
if (bounded < current) {
|
||||
// Decreasing timeout is risky - require strong evidence
|
||||
if (!cronTrend || cronTrend.avgDuration.trend !== 'decreasing') {
|
||||
return null; // Don't decrease unless clear improving trend
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
cron,
|
||||
param: 'timeout',
|
||||
current,
|
||||
proposed: bounded,
|
||||
reason: `P95 response ${Math.round(p95Duration)}ms, avg ${Math.round(avgDuration)}ms (current timeout: ${current}ms)`,
|
||||
expectedImprovement: bounded > current
|
||||
? 'Timeout errors eliminated'
|
||||
: 'Faster failure detection',
|
||||
pattern: pattern.type,
|
||||
severity: pattern.severity,
|
||||
safe: this.isSafe('timeout', bounded),
|
||||
confidence: this.calculateConfidence(cronStats, cronTrend),
|
||||
metadata: {
|
||||
avgDuration,
|
||||
p95Duration,
|
||||
trend: cronTrend?.avgDuration.trend || 'unknown',
|
||||
sampleSize: cronStats.total
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Recommend backoff adjustment (for rate limiting)
|
||||
*/
|
||||
recommendBackoff(pattern, cronStats, cronTrend, cron) {
|
||||
const current = 1000; // Current default base
|
||||
const proposed = current * 2; // Double the backoff
|
||||
|
||||
if (proposed > SAFETY_RULES.backoffBase.max) {
|
||||
return null; // Already at max
|
||||
}
|
||||
|
||||
return {
|
||||
cron,
|
||||
param: 'backoffBase',
|
||||
current,
|
||||
proposed,
|
||||
reason: `HTTP 429 (Rate Limit) errors: ${pattern.value} times`,
|
||||
expectedImprovement: 'Rate limit errors reduced',
|
||||
pattern: pattern.type,
|
||||
severity: pattern.severity,
|
||||
safe: this.isSafe('backoffBase', proposed),
|
||||
confidence: 'medium', // Rate limiting is clear
|
||||
metadata: {
|
||||
errorCount: pattern.value,
|
||||
errorType: pattern.topErrorType
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate P95 percentile
|
||||
*/
|
||||
calculateP95(durations) {
|
||||
if (!durations || durations.length === 0) return 0;
|
||||
const sorted = [...durations].sort((a, b) => a - b);
|
||||
const index = Math.ceil(0.95 * sorted.length) - 1;
|
||||
return sorted[Math.max(0, index)];
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimate improvement from retry increase
|
||||
*/
|
||||
estimateRetryImprovement(current, proposed, failureRate) {
|
||||
// Simple model: each retry recovers ~70% of remaining failures
|
||||
const recoveryRate = 0.70;
|
||||
const currentRecovery = 1 - Math.pow(1 - recoveryRate, current);
|
||||
const proposedRecovery = 1 - Math.pow(1 - recoveryRate, proposed);
|
||||
|
||||
const improvement = (proposedRecovery - currentRecovery) / (1 - currentRecovery);
|
||||
return `Final failure rate -${(improvement * 100).toFixed(0)}%`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if proposed value is within safety bounds
|
||||
*/
|
||||
isSafe(param, value) {
|
||||
const rule = SAFETY_RULES[param];
|
||||
if (!rule) return false;
|
||||
return value >= rule.min && value <= rule.max;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate confidence level for recommendation
|
||||
*/
|
||||
calculateConfidence(cronStats, cronTrend) {
|
||||
let score = 0;
|
||||
|
||||
// Sample size
|
||||
if (cronStats.total >= 500) score += 3;
|
||||
else if (cronStats.total >= 200) score += 2;
|
||||
else if (cronStats.total >= 100) score += 1;
|
||||
|
||||
// Clear trend
|
||||
if (cronTrend) {
|
||||
if (Math.abs(cronTrend.retryRate.change) > 50) score += 2; // Strong trend
|
||||
else if (Math.abs(cronTrend.retryRate.change) > 20) score += 1; // Weak trend
|
||||
}
|
||||
|
||||
// Map to confidence level
|
||||
if (score >= 4) return 'high';
|
||||
if (score >= 2) return 'medium';
|
||||
return 'low';
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate parameter combinations for dependencies
|
||||
*/
|
||||
validateCombinations(recommendations) {
|
||||
const validated = [];
|
||||
const byCron = {};
|
||||
|
||||
// Group by cron
|
||||
for (const rec of recommendations) {
|
||||
if (!byCron[rec.cron]) {
|
||||
byCron[rec.cron] = [];
|
||||
}
|
||||
byCron[rec.cron].push(rec);
|
||||
}
|
||||
|
||||
// Check each cron's recommendations
|
||||
for (const [cron, recs] of Object.entries(byCron)) {
|
||||
// If multiple params for same cron, check combined effect
|
||||
if (recs.length > 1) {
|
||||
const combined = this.checkCombinedEffect(recs, cron);
|
||||
if (!combined.safe) {
|
||||
// Mark all as requiring manual review
|
||||
for (const rec of recs) {
|
||||
rec.safe = false;
|
||||
rec.warning = combined.warning;
|
||||
rec.recommendation = 'Apply one at a time, verify each before next';
|
||||
}
|
||||
}
|
||||
}
|
||||
validated.push(...recs);
|
||||
}
|
||||
|
||||
return validated;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check combined effect of multiple parameter changes
|
||||
*/
|
||||
checkCombinedEffect(recommendations, cron) {
|
||||
// Build hypothetical config
|
||||
const config = {
|
||||
maxRetries: 3,
|
||||
timeout: 15000,
|
||||
backoffBase: 1000
|
||||
};
|
||||
|
||||
for (const rec of recommendations) {
|
||||
config[rec.param] = rec.proposed;
|
||||
}
|
||||
|
||||
// Calculate worst-case total wait time
|
||||
// Exponential backoff: base * (2^0 + 2^1 + ... + 2^(n-1))
|
||||
const maxBackoffTime = config.backoffBase * (Math.pow(2, config.maxRetries) - 1);
|
||||
const maxTotalTime = config.timeout * config.maxRetries + maxBackoffTime;
|
||||
|
||||
// Assume 15-minute cron interval (900s)
|
||||
const cronInterval = 900000; // 15 minutes in ms
|
||||
|
||||
if (maxTotalTime > cronInterval * 0.8) {
|
||||
return {
|
||||
safe: false,
|
||||
warning: `Combined params may exceed cron interval: ${Math.round(maxTotalTime / 1000)}s > ${Math.round(cronInterval * 0.8 / 1000)}s`
|
||||
};
|
||||
}
|
||||
|
||||
return { safe: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* Prioritize recommendations
|
||||
*/
|
||||
prioritize(recommendations) {
|
||||
const severityOrder = { high: 0, medium: 1, low: 2 };
|
||||
const confidenceOrder = { high: 0, medium: 1, low: 2 };
|
||||
|
||||
return recommendations.sort((a, b) => {
|
||||
// First by severity
|
||||
if (severityOrder[a.severity] !== severityOrder[b.severity]) {
|
||||
return severityOrder[a.severity] - severityOrder[b.severity];
|
||||
}
|
||||
// Then by confidence
|
||||
if (confidenceOrder[a.confidence] !== confidenceOrder[b.confidence]) {
|
||||
return confidenceOrder[a.confidence] - confidenceOrder[b.confidence];
|
||||
}
|
||||
// Then by safety
|
||||
if (a.safe !== b.safe) {
|
||||
return b.safe ? 1 : -1; // Safe first
|
||||
}
|
||||
return 0;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Export
|
||||
// ============================================================================
|
||||
|
||||
module.exports = { ParameterOptimizer, SAFETY_RULES, MIN_SAMPLE_SIZES };
|
||||
38
skills/openclaw-self-healing/lib/self-review-lib.sh
Normal file
38
skills/openclaw-self-healing/lib/self-review-lib.sh
Normal file
@@ -0,0 +1,38 @@
|
||||
#!/bin/bash
|
||||
# self-review-lib.sh
|
||||
# Version: 5.0.1
|
||||
# Common library for cron self-review (AOP pattern)
|
||||
|
||||
# 환경 검증
|
||||
if [[ -z "$HOME" ]]; then
|
||||
echo "ERROR: HOME environment variable not set" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
# 메인 자기평가 함수 (네임스페이스: sr_)
|
||||
sr_log_review() {
|
||||
local cron_name="$1"
|
||||
local duration="$2"
|
||||
local input_tokens="$3"
|
||||
local output_tokens="$4"
|
||||
local review_status="$5"
|
||||
local what_went_wrong="$6"
|
||||
local why="$7"
|
||||
local next_action="$8"
|
||||
|
||||
# self-review-logger.sh 호출 (실패해도 크론은 계속)
|
||||
"$HOME/openclaw/scripts/self-review-logger.sh" \
|
||||
"$cron_name" "$duration" "$input_tokens" "$output_tokens" "$review_status" \
|
||||
"$what_went_wrong" "$why" "$next_action" 2>&1 || {
|
||||
echo "WARN: Self-review logging failed (continuing cron execution)" >&2
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
# 버전 정보 출력 함수
|
||||
sr_version() {
|
||||
echo "self-review-lib.sh v5.0.1"
|
||||
}
|
||||
|
||||
# 초기화 메시지 (source 시 실행)
|
||||
echo "[self-review-lib] Loaded v5.0.1" >&2
|
||||
Reference in New Issue
Block a user