AI Newsletter Digest improvements: fixed QP soft line break decoding, URL extraction, and content cleaning

This commit is contained in:
Krilly
2026-03-04 13:29:22 +00:00
parent 29a98137a7
commit 57dd294675
13706 changed files with 2114953 additions and 237629 deletions

View File

@@ -0,0 +1,647 @@
#!/usr/bin/env node
/**
* Level 2: Log Analyzer
*
* Auto-Retry 로그를 분석하여 패턴을 감지하고 최적화 제안을 생성
*/
const fs = require('fs');
const readline = require('readline');
const path = require('path');
// ============================================================================
// Log Analyzer Class
// ============================================================================
class LogAnalyzer {
constructor(options = {}) {
this.options = {
timeWindow: options.timeWindow || 7 * 24 * 3600 * 1000, // 7 days default
minSampleSize: options.minSampleSize || 5, // Minimum samples for pattern detection
...options
};
}
/**
* Analyze auto-retry logs
* @param {string} logPath - Path to auto-retry.jsonl
* @returns {Promise<Object>} Analysis results
*/
async analyze(logPath) {
const entries = await this.readLog(logPath);
const filteredEntries = this.filterByTimeWindow(entries);
const stats = this.calculateStats(filteredEntries);
const patterns = this.detectPatterns(stats);
const trends = this.analyzeTrends(filteredEntries);
return {
summary: this.generateSummary(stats),
stats,
patterns,
trends,
metadata: {
totalEntries: entries.length,
analyzedEntries: filteredEntries.length,
timeWindow: this.options.timeWindow,
analyzedAt: new Date().toISOString()
}
};
}
/**
* Read JSONL log file
* @param {string} logPath - Path to log file
* @returns {Promise<Array>} Array of log entries
*/
async readLog(logPath) {
const entries = [];
const fileStream = fs.createReadStream(logPath);
const rl = readline.createInterface({
input: fileStream,
crlfDelay: Infinity
});
for await (const line of rl) {
if (!line.trim()) continue;
try {
const entry = JSON.parse(line);
entries.push(entry);
} catch (error) {
console.error(`Invalid JSON line: ${line.substring(0, 50)}...`);
}
}
return entries;
}
/**
* Filter entries by time window
* @param {Array} entries - All log entries
* @returns {Array} Filtered entries
*/
filterByTimeWindow(entries) {
const cutoff = Date.now() - this.options.timeWindow;
return entries.filter(entry => {
const timestamp = new Date(entry.timestamp).getTime();
return timestamp >= cutoff;
});
}
/**
* Calculate statistics from entries
* @param {Array} entries - Filtered log entries
* @returns {Object} Statistics
*/
calculateStats(entries) {
const stats = {
overall: {
total: 0,
success: 0,
failure: 0,
retries: 0,
retryRate: 0,
failureRate: 0,
avgAttempts: 0,
avgDuration: 0,
durations: []
},
byCron: {},
byError: {},
performance: {}
};
// Separate actual cron executions from tests
const cronEntries = entries.filter(e => e.context?.cron);
const testEntries = entries.filter(e => !e.context?.cron);
// Analyze cron entries
for (const entry of cronEntries) {
const cron = entry.context.cron;
// Initialize cron stats
if (!stats.byCron[cron]) {
stats.byCron[cron] = {
total: 0,
success: 0,
failure: 0,
retries: 0,
retryRate: 0,
failureRate: 0,
avgAttempts: 0,
avgDuration: 0,
durations: [],
errors: []
};
}
const cronStats = stats.byCron[cron];
const attempts = entry.attempts.length;
const duration = entry.totalDuration || 0;
// Update counts
stats.overall.total++;
cronStats.total++;
if (entry.type === 'success') {
stats.overall.success++;
cronStats.success++;
} else {
stats.overall.failure++;
cronStats.failure++;
}
if (attempts > 1) {
stats.overall.retries++;
cronStats.retries++;
}
// Track durations
stats.overall.durations.push(duration);
cronStats.durations.push(duration);
// Track errors
if (entry.type === 'failure' || attempts > 1) {
for (const attempt of entry.attempts) {
if (attempt.error) {
const errorCategory = attempt.error.category || 'unknown';
const errorType = attempt.error.type || 'unknown';
// By error category
if (!stats.byError[errorCategory]) {
stats.byError[errorCategory] = {
count: 0,
types: {}
};
}
stats.byError[errorCategory].count++;
// By error type
if (!stats.byError[errorCategory].types[errorType]) {
stats.byError[errorCategory].types[errorType] = 0;
}
stats.byError[errorCategory].types[errorType]++;
// Track in cron stats
cronStats.errors.push({
category: errorCategory,
type: errorType,
message: attempt.error.message
});
}
}
}
}
// Calculate averages and rates
this.calculateAverages(stats.overall);
for (const cron in stats.byCron) {
this.calculateAverages(stats.byCron[cron]);
}
// Calculate performance metrics (percentiles)
stats.performance = this.calculatePerformanceMetrics(stats.overall.durations);
// Add test stats separately
stats.tests = {
total: testEntries.length,
success: testEntries.filter(e => e.type === 'success').length,
failure: testEntries.filter(e => e.type === 'failure').length
};
return stats;
}
/**
* Calculate averages and rates for a stats object
* @param {Object} statsObj - Stats object to update
*/
calculateAverages(statsObj) {
if (statsObj.total > 0) {
statsObj.retryRate = statsObj.retries / statsObj.total;
statsObj.failureRate = statsObj.failure / statsObj.total;
statsObj.avgAttempts = statsObj.total > 0
? (statsObj.total + statsObj.retries) / statsObj.total
: 0;
}
if (statsObj.durations.length > 0) {
statsObj.avgDuration = statsObj.durations.reduce((a, b) => a + b, 0) / statsObj.durations.length;
}
}
/**
* Calculate performance metrics (percentiles)
* @param {Array} durations - Array of durations
* @returns {Object} Performance metrics
*/
calculatePerformanceMetrics(durations) {
if (durations.length === 0) {
return { p50: 0, p95: 0, p99: 0, min: 0, max: 0 };
}
const sorted = [...durations].sort((a, b) => a - b);
const percentile = (p) => {
const index = Math.ceil((p / 100) * sorted.length) - 1;
return sorted[Math.max(0, index)];
};
return {
p50: percentile(50),
p95: percentile(95),
p99: percentile(99),
min: sorted[0],
max: sorted[sorted.length - 1]
};
}
/**
* Detect patterns that need attention
* @param {Object} stats - Statistics object
* @returns {Array} Array of detected patterns
*/
detectPatterns(stats) {
const patterns = [];
// Pattern detection for each cron
for (const [cron, data] of Object.entries(stats.byCron)) {
// Skip if insufficient data
if (data.total < this.options.minSampleSize) {
continue;
}
// Pattern 1: High retry rate
if (data.retryRate > 0.10) {
patterns.push({
type: 'high_retry_rate',
severity: data.retryRate > 0.20 ? 'high' : 'medium',
cron,
value: data.retryRate,
threshold: 0.10,
description: `${(data.retryRate * 100).toFixed(1)}% of executions needed retry (threshold: 10%)`,
suggestion: 'increase maxRetries',
affectedExecutions: data.retries
});
}
// Pattern 2: High failure rate
if (data.failureRate > 0.01) {
patterns.push({
type: 'high_failure_rate',
severity: data.failureRate > 0.05 ? 'high' : 'medium',
cron,
value: data.failureRate,
threshold: 0.01,
description: `${(data.failureRate * 100).toFixed(1)}% final failure rate (threshold: 1%)`,
suggestion: 'increase maxRetries or investigate root cause',
affectedExecutions: data.failure
});
}
// Pattern 3: Slow response (approaching timeout)
const timeoutThreshold = 15000 * 0.8; // 80% of 15s timeout
if (data.avgDuration > timeoutThreshold) {
patterns.push({
type: 'slow_response',
severity: data.avgDuration > 15000 * 0.9 ? 'high' : 'medium',
cron,
value: data.avgDuration,
threshold: timeoutThreshold,
description: `Avg response ${Math.round(data.avgDuration)}ms > 80% of timeout (12s)`,
suggestion: 'increase timeout',
currentTimeout: 15000,
recommendedTimeout: Math.ceil(data.avgDuration * 1.5)
});
}
// Pattern 4: High P95/P99 (inconsistent performance)
const performance = this.calculatePerformanceMetrics(data.durations);
if (performance.p95 > performance.p50 * 2) {
patterns.push({
type: 'inconsistent_performance',
severity: 'low',
cron,
value: performance.p95 / performance.p50,
description: `P95 (${Math.round(performance.p95)}ms) is ${(performance.p95 / performance.p50).toFixed(1)}x higher than median`,
suggestion: 'investigate outliers',
metrics: performance
});
}
}
// Pattern 5: Specific error categories
for (const [category, data] of Object.entries(stats.byError)) {
if (data.count > 3) {
const topType = Object.entries(data.types)
.sort((a, b) => b[1] - a[1])[0];
patterns.push({
type: 'recurring_error',
severity: data.count > 10 ? 'high' : 'medium',
category,
value: data.count,
description: `${category} errors occurred ${data.count} times`,
topErrorType: topType ? topType[0] : 'unknown',
suggestion: this.getSuggestionForError(category, topType ? topType[0] : null)
});
}
}
// Sort by severity
const severityOrder = { high: 0, medium: 1, low: 2 };
patterns.sort((a, b) => severityOrder[a.severity] - severityOrder[b.severity]);
return patterns;
}
/**
* Get suggestion for specific error
* @param {string} category - Error category
* @param {string} type - Error type
* @returns {string} Suggestion
*/
getSuggestionForError(category, type) {
const suggestions = {
'timeout': 'Increase timeout or check network latency',
'http': type === 'HTTP 429'
? 'Increase backoff delay to avoid rate limits'
: 'Check API status and retry logic',
'network': 'Check network connectivity and DNS resolution',
'unknown': 'Investigate error logs for root cause'
};
return suggestions[category] || suggestions['unknown'];
}
/**
* Analyze trends over time
* @param {Array} entries - Filtered log entries
* @returns {Object} Trend analysis
*/
analyzeTrends(entries) {
// Group by day
const byDay = {};
for (const entry of entries) {
if (!entry.context?.cron) continue;
const date = new Date(entry.timestamp);
const day = date.toISOString().split('T')[0];
const cron = entry.context.cron;
if (!byDay[day]) {
byDay[day] = {};
}
if (!byDay[day][cron]) {
byDay[day][cron] = {
total: 0,
success: 0,
retries: 0,
durations: []
};
}
byDay[day][cron].total++;
if (entry.type === 'success') {
byDay[day][cron].success++;
}
if (entry.attempts.length > 1) {
byDay[day][cron].retries++;
}
byDay[day][cron].durations.push(entry.totalDuration || 0);
}
// Calculate daily averages
const dailyStats = {};
for (const [day, crons] of Object.entries(byDay)) {
for (const [cron, data] of Object.entries(crons)) {
if (!dailyStats[cron]) {
dailyStats[cron] = [];
}
dailyStats[cron].push({
date: day,
retryRate: data.retries / data.total,
avgDuration: data.durations.reduce((a, b) => a + b, 0) / data.durations.length,
successRate: data.success / data.total
});
}
}
// Detect trends (improving/degrading)
const trends = {};
for (const [cron, days] of Object.entries(dailyStats)) {
if (days.length < 2) continue;
// Sort by date
days.sort((a, b) => a.date.localeCompare(b.date));
// Simple linear trend (first half vs second half)
const mid = Math.floor(days.length / 2);
const firstHalf = days.slice(0, mid);
const secondHalf = days.slice(mid);
const avgFirst = {
retryRate: firstHalf.reduce((a, b) => a + b.retryRate, 0) / firstHalf.length,
avgDuration: firstHalf.reduce((a, b) => a + b.avgDuration, 0) / firstHalf.length
};
const avgSecond = {
retryRate: secondHalf.reduce((a, b) => a + b.retryRate, 0) / secondHalf.length,
avgDuration: secondHalf.reduce((a, b) => a + b.avgDuration, 0) / secondHalf.length
};
trends[cron] = {
retryRate: {
trend: avgSecond.retryRate > avgFirst.retryRate ? 'increasing' : 'decreasing',
change: ((avgSecond.retryRate - avgFirst.retryRate) / (avgFirst.retryRate || 1)) * 100,
firstHalf: avgFirst.retryRate,
secondHalf: avgSecond.retryRate
},
avgDuration: {
trend: avgSecond.avgDuration > avgFirst.avgDuration ? 'increasing' : 'decreasing',
change: ((avgSecond.avgDuration - avgFirst.avgDuration) / avgFirst.avgDuration) * 100,
firstHalf: avgFirst.avgDuration,
secondHalf: avgSecond.avgDuration
}
};
}
return trends;
}
/**
* Generate human-readable summary
* @param {Object} stats - Statistics object
* @returns {Object} Summary
*/
generateSummary(stats) {
const summary = {
overall: {
totalExecutions: stats.overall.total,
successRate: `${((stats.overall.success / stats.overall.total) * 100).toFixed(1)}%`,
retryRate: `${(stats.overall.retryRate * 100).toFixed(1)}%`,
failureRate: `${(stats.overall.failureRate * 100).toFixed(1)}%`,
avgAttempts: stats.overall.avgAttempts.toFixed(2),
avgDuration: `${Math.round(stats.overall.avgDuration)}ms`
},
crons: {},
topErrors: []
};
// Summarize each cron
for (const [cron, data] of Object.entries(stats.byCron)) {
summary.crons[cron] = {
executions: data.total,
successRate: `${((data.success / data.total) * 100).toFixed(1)}%`,
retryRate: `${(data.retryRate * 100).toFixed(1)}%`,
avgDuration: `${Math.round(data.avgDuration)}ms`
};
}
// Top errors
const errorList = Object.entries(stats.byError)
.map(([category, data]) => ({
category,
count: data.count,
topType: Object.entries(data.types).sort((a, b) => b[1] - a[1])[0]
}))
.sort((a, b) => b.count - a.count)
.slice(0, 5);
summary.topErrors = errorList.map(e => ({
category: e.category,
count: e.count,
topType: e.topType ? e.topType[0] : 'unknown'
}));
return summary;
}
/**
* Print analysis results to console
* @param {Object} analysis - Analysis results
*/
printResults(analysis) {
console.log('\n' + '='.repeat(60));
console.log('📊 Level 2: Auto-Retry Log Analysis');
console.log('='.repeat(60) + '\n');
// Summary
console.log('📈 Overall Summary:');
console.log(` Total Executions: ${analysis.summary.overall.totalExecutions}`);
console.log(` Success Rate: ${analysis.summary.overall.successRate}`);
console.log(` Retry Rate: ${analysis.summary.overall.retryRate}`);
console.log(` Failure Rate: ${analysis.summary.overall.failureRate}`);
console.log(` Avg Attempts: ${analysis.summary.overall.avgAttempts}`);
console.log(` Avg Duration: ${analysis.summary.overall.avgDuration}`);
// Performance metrics
if (analysis.stats.performance) {
const perf = analysis.stats.performance;
console.log('\n⚡ Performance Metrics:');
console.log(` P50 (median): ${Math.round(perf.p50)}ms`);
console.log(` P95: ${Math.round(perf.p95)}ms`);
console.log(` P99: ${Math.round(perf.p99)}ms`);
console.log(` Min: ${Math.round(perf.min)}ms`);
console.log(` Max: ${Math.round(perf.max)}ms`);
}
// By Cron
console.log('\n📋 By Cron:');
for (const [cron, data] of Object.entries(analysis.summary.crons)) {
console.log(`\n ${cron}:`);
console.log(` Executions: ${data.executions}`);
console.log(` Success: ${data.successRate}`);
console.log(` Retry: ${data.retryRate}`);
console.log(` Avg Duration: ${data.avgDuration}`);
}
// Patterns
if (analysis.patterns.length > 0) {
console.log('\n⚠ Detected Patterns:');
for (const pattern of analysis.patterns) {
const icon = pattern.severity === 'high' ? '🔴' : pattern.severity === 'medium' ? '🟡' : '🟢';
console.log(`\n ${icon} ${pattern.type} (${pattern.severity})`);
console.log(` ${pattern.description}`);
console.log(` 💡 Suggestion: ${pattern.suggestion}`);
}
} else {
console.log('\n✅ No patterns detected - all metrics within normal range');
}
// Trends
if (Object.keys(analysis.trends).length > 0) {
console.log('\n📈 Trends:');
for (const [cron, trend] of Object.entries(analysis.trends)) {
console.log(`\n ${cron}:`);
const retryIcon = trend.retryRate.trend === 'increasing' ? '📈' : '📉';
const retryColor = trend.retryRate.trend === 'increasing' ? '⚠️' : '✅';
console.log(` ${retryIcon} Retry Rate: ${retryColor} ${trend.retryRate.trend} (${trend.retryRate.change > 0 ? '+' : ''}${trend.retryRate.change.toFixed(1)}%)`);
const durationIcon = trend.avgDuration.trend === 'increasing' ? '📈' : '📉';
const durationColor = trend.avgDuration.trend === 'increasing' ? '⚠️' : '✅';
console.log(` ${durationIcon} Avg Duration: ${durationColor} ${trend.avgDuration.trend} (${trend.avgDuration.change > 0 ? '+' : ''}${trend.avgDuration.change.toFixed(1)}%)`);
}
}
// Top errors
if (analysis.summary.topErrors.length > 0) {
console.log('\n🚨 Top Errors:');
for (const error of analysis.summary.topErrors) {
console.log(` - ${error.category} (${error.count}x) - ${error.topType}`);
}
}
// Metadata
console.log('\n' + '='.repeat(60));
console.log(`Analyzed: ${analysis.metadata.analyzedEntries} entries (${analysis.metadata.totalEntries} total)`);
console.log(`Time Window: ${Math.round(analysis.metadata.timeWindow / (24 * 3600 * 1000))} days`);
console.log(`Analyzed At: ${new Date(analysis.metadata.analyzedAt).toLocaleString()}`);
console.log('='.repeat(60) + '\n');
}
}
// ============================================================================
// CLI Interface
// ============================================================================
async function main() {
const logPath = process.argv[2] || path.join(process.env.HOME, 'openclaw/logs/auto-retry.jsonl');
const timeWindow = parseInt(process.argv[3]) || 7; // days
console.log(`Analyzing: ${logPath}`);
console.log(`Time Window: ${timeWindow} days\n`);
const analyzer = new LogAnalyzer({
timeWindow: timeWindow * 24 * 3600 * 1000,
minSampleSize: 5
});
try {
const analysis = await analyzer.analyze(logPath);
analyzer.printResults(analysis);
// Save results to file
const outputPath = path.join(path.dirname(logPath), 'log-analysis.json');
fs.writeFileSync(outputPath, JSON.stringify(analysis, null, 2));
console.log(`📁 Full analysis saved to: ${outputPath}\n`);
} catch (error) {
console.error('Error analyzing logs:', error.message);
process.exit(1);
}
}
// ============================================================================
// Export
// ============================================================================
if (require.main === module) {
main();
}
module.exports = { LogAnalyzer };