AI Newsletter Digest improvements: fixed QP soft line break decoding, URL extraction, and content cleaning

2026-03-04 13:29:22 +00:00
parent 29a98137a7
commit 57dd294675
13706 changed files with 2114953 additions and 237629 deletions
--- a/skills/openclaw-self-healing/scripts/validate-self-review.js
+++ b/skills/openclaw-self-healing/scripts/validate-self-review.js
@@ -0,0 +1,525 @@
+#!/usr/bin/env node
+/**
+ * Self-Review Validation Script (V3 - Stage 2)
+ * 
+ * Purpose: External validation of self-evaluation results
+ * Runs: 1 minute after each self-evaluating cron
+ * 
+ * Validation Layers:
+ * 1. Metric Verification (tool errors, completion time, token usage)
+ * 2. Format Verification (emoji count, separator count, forbidden phrases)
+ * 3. Consistency Verification (compare with recent evaluations)
+ * 
+ * Output: validation-YYYY-MM-DD.jsonl
+ */
+
+const fs = require('fs');
+const path = require('path');
+
+// ============================================================================
+// Configuration
+// ============================================================================
+
+const CONFIG = {
+  // Forbidden phrases (from Response Guard)
+  FORBIDDEN_PHRASES: [
+    '알겠습니다',
+    '완료!',
+    '완료했습니다',
+    '처리했습니다',
+    '설정했습니다',
+    '확인했습니다',
+    '기록했습니다'
+  ],
+  
+  // Format limits
+  MAX_EMOJIS: 3,
+  MAX_SEPARATORS: 2,
+  
+  // Metric thresholds
+  MAX_TOOL_ERRORS: 2,
+  COMPLETION_TIME_MULTIPLIER: 1.5, // 150% of baseline
+  TOKEN_USAGE_MULTIPLIER: 1.3, // 130% of baseline
+  
+  // Paths
+  MEMORY_DIR: path.join(process.env.HOME, 'openclaw', 'memory'),
+  VALIDATION_DIR: path.join(process.env.HOME, 'openclaw', 'memory'),
+  BASELINE_FILE: path.join(process.env.HOME, 'openclaw', 'memory', 'cron-baselines.json')
+};
+
+// ============================================================================
+// Utility Functions
+// ============================================================================
+
+/**
+ * Count emojis in text
+ */
+function countEmojis(text) {
+  // Unicode emoji ranges
+  const emojiRegex = /[\u{1F300}-\u{1F9FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}]/gu;
+  const matches = text.match(emojiRegex);
+  return matches ? matches.length : 0;
+}
+
+/**
+ * Count markdown separators (---)
+ */
+function countSeparators(text) {
+  const lines = text.split('\n');
+  let count = 0;
+  for (const line of lines) {
+    const trimmed = line.trim();
+    if (trimmed === '---' || /^-{3,}$/.test(trimmed)) {
+      count++;
+    }
+  }
+  return count;
+}
+
+/**
+ * Detect forbidden phrases
+ */
+function detectForbiddenPhrases(text) {
+  const found = [];
+  for (const phrase of CONFIG.FORBIDDEN_PHRASES) {
+    if (text.includes(phrase)) {
+      found.push(phrase);
+    }
+  }
+  return found;
+}
+
+/**
+ * Parse self-evaluation section from cron output
+ */
+function parseSelfEvaluation(output) {
+  const evalSection = output.match(/##\s*자기평가[\s\S]*?(?=\n##|$)/i);
+  if (!evalSection) return null;
+  
+  const text = evalSection[0];
+  const result = {
+    completeness: null,
+    accuracy: null,
+    tone: null,
+    conciseness: null,
+    improvement: null,
+    raw: text
+  };
+  
+  // Parse completeness (e.g., "완성도: 3/3")
+  const completenessMatch = text.match(/완성도[:：]\s*(\d+)\/(\d+)/i);
+  if (completenessMatch) {
+    result.completeness = {
+      score: parseInt(completenessMatch[1]),
+      total: parseInt(completenessMatch[2])
+    };
+  }
+  
+  // Parse accuracy (e.g., "정확성: OK" or "WARNING")
+  const accuracyMatch = text.match(/정확성[:：]\s*(OK|WARNING|FAIL)/i);
+  if (accuracyMatch) {
+    result.accuracy = accuracyMatch[1].toUpperCase();
+  }
+  
+  // Parse tone (e.g., "톤: Jarvis" or "ChatGPT-like")
+  const toneMatch = text.match(/톤[:：]\s*(Jarvis|ChatGPT[-\s]?like?)/i);
+  if (toneMatch) {
+    result.tone = toneMatch[1].toLowerCase().includes('jarvis') ? 'Jarvis' : 'ChatGPT-like';
+  }
+  
+  // Parse conciseness (e.g., "간결성: 2 emojis")
+  const concisenessMatch = text.match(/간결성[:：].*?(\d+)\s*emojis?/i);
+  if (concisenessMatch) {
+    result.conciseness = {
+      emojis: parseInt(concisenessMatch[1])
+    };
+  }
+  
+  // Parse improvement suggestion
+  const improvementMatch = text.match(/개선[:：]\s*(.+)/i);
+  if (improvementMatch) {
+    result.improvement = improvementMatch[1].trim();
+  }
+  
+  return result;
+}
+
+/**
+ * Load recent evaluations for consistency check
+ */
+function loadRecentEvaluations(days = 7) {
+  const evaluations = [];
+  const now = new Date();
+  
+  for (let i = 0; i < days; i++) {
+    const date = new Date(now);
+    date.setDate(date.getDate() - i);
+    const dateStr = date.toISOString().split('T')[0];
+    const filePath = path.join(CONFIG.VALIDATION_DIR, `validation-${dateStr}.jsonl`);
+    
+    if (fs.existsSync(filePath)) {
+      const lines = fs.readFileSync(filePath, 'utf8').split('\n').filter(l => l.trim());
+      for (const line of lines) {
+        try {
+          evaluations.push(JSON.parse(line));
+        } catch (e) {
+          // Skip malformed lines
+        }
+      }
+    }
+  }
+  
+  return evaluations;
+}
+
+/**
+ * Load baseline metrics for a cron
+ */
+function loadBaseline(cronId) {
+  if (!fs.existsSync(CONFIG.BASELINE_FILE)) {
+    return null;
+  }
+  
+  const baselines = JSON.parse(fs.readFileSync(CONFIG.BASELINE_FILE, 'utf8'));
+  return baselines[cronId] || null;
+}
+
+/**
+ * Update baseline metrics
+ */
+function updateBaseline(cronId, metrics) {
+  let baselines = {};
+  if (fs.existsSync(CONFIG.BASELINE_FILE)) {
+    baselines = JSON.parse(fs.readFileSync(CONFIG.BASELINE_FILE, 'utf8'));
+  }
+  
+  if (!baselines[cronId]) {
+    baselines[cronId] = {
+      samples: [],
+      avg: {}
+    };
+  }
+  
+  // Add new sample
+  baselines[cronId].samples.push({
+    timestamp: Date.now(),
+    completionTime: metrics.completionTime,
+    tokenUsage: metrics.tokenUsage,
+    toolErrors: metrics.toolErrors
+  });
+  
+  // Keep only last 30 samples
+  if (baselines[cronId].samples.length > 30) {
+    baselines[cronId].samples = baselines[cronId].samples.slice(-30);
+  }
+  
+  // Recalculate averages
+  const samples = baselines[cronId].samples;
+  baselines[cronId].avg = {
+    completionTime: samples.reduce((sum, s) => sum + s.completionTime, 0) / samples.length,
+    tokenUsage: samples.reduce((sum, s) => sum + s.tokenUsage, 0) / samples.length,
+    toolErrors: samples.reduce((sum, s) => sum + s.toolErrors, 0) / samples.length
+  };
+  
+  fs.writeFileSync(CONFIG.BASELINE_FILE, JSON.stringify(baselines, null, 2));
+}
+
+// ============================================================================
+// Validation Logic
+// ============================================================================
+
+/**
+ * Validate metrics
+ */
+function validateMetrics(metrics, baseline) {
+  const flags = [];
+  
+  // Tool errors
+  if (metrics.toolErrors > CONFIG.MAX_TOOL_ERRORS) {
+    flags.push({
+      type: 'HIGH_ERROR_RATE',
+      severity: 'HIGH',
+      detail: `Tool errors: ${metrics.toolErrors} (threshold: ${CONFIG.MAX_TOOL_ERRORS})`,
+      evidence: metrics.toolErrorDetails || []
+    });
+  }
+  
+  // Completion time (if baseline exists)
+  if (baseline && baseline.avg.completionTime) {
+    const threshold = baseline.avg.completionTime * CONFIG.COMPLETION_TIME_MULTIPLIER;
+    if (metrics.completionTime > threshold) {
+      flags.push({
+        type: 'PERFORMANCE_DEGRADATION',
+        severity: 'MEDIUM',
+        detail: `Completion time: ${metrics.completionTime}ms (baseline avg: ${baseline.avg.completionTime}ms, threshold: ${threshold}ms)`
+      });
+    }
+  }
+  
+  // Token usage (if baseline exists)
+  if (baseline && baseline.avg.tokenUsage) {
+    const threshold = baseline.avg.tokenUsage * CONFIG.TOKEN_USAGE_MULTIPLIER;
+    if (metrics.tokenUsage > threshold) {
+      flags.push({
+        type: 'TOKEN_USAGE_HIGH',
+        severity: 'LOW',
+        detail: `Token usage: ${metrics.tokenUsage} (baseline avg: ${baseline.avg.tokenUsage}, threshold: ${threshold})`
+      });
+    }
+  }
+  
+  return flags;
+}
+
+/**
+ * Validate format
+ */
+function validateFormat(output, selfEval) {
+  const flags = [];
+  
+  // Count actual emojis
+  const actualEmojis = countEmojis(output);
+  
+  // Count actual separators
+  const actualSeparators = countSeparators(output);
+  
+  // Detect forbidden phrases
+  const forbiddenFound = detectForbiddenPhrases(output);
+  
+  // Check emoji count
+  if (actualEmojis > CONFIG.MAX_EMOJIS) {
+    flags.push({
+      type: 'EMOJI_OVERFLOW',
+      severity: 'LOW',
+      detail: `Actual emojis: ${actualEmojis} (limit: ${CONFIG.MAX_EMOJIS})`
+    });
+  }
+  
+  // Check separator count
+  if (actualSeparators > CONFIG.MAX_SEPARATORS) {
+    flags.push({
+      type: 'SEPARATOR_OVERFLOW',
+      severity: 'LOW',
+      detail: `Actual separators: ${actualSeparators} (limit: ${CONFIG.MAX_SEPARATORS})`
+    });
+  }
+  
+  // Check forbidden phrases
+  if (forbiddenFound.length > 0) {
+    flags.push({
+      type: 'FORBIDDEN_PHRASE',
+      severity: 'MEDIUM',
+      detail: `Forbidden phrases detected: ${forbiddenFound.join(', ')}`
+    });
+  }
+  
+  // Check self-eval accuracy
+  if (selfEval && selfEval.conciseness) {
+    if (selfEval.conciseness.emojis !== actualEmojis) {
+      flags.push({
+        type: 'INACCURATE_SELF_EVALUATION',
+        severity: 'MEDIUM',
+        detail: `Self-reported ${selfEval.conciseness.emojis} emojis, actual: ${actualEmojis}`,
+        evidence: {
+          selfReported: selfEval.conciseness.emojis,
+          actual: actualEmojis
+        }
+      });
+    }
+  }
+  
+  return flags;
+}
+
+/**
+ * Validate consistency
+ */
+function validateConsistency(selfEval, recentEvals, forbiddenFound) {
+  const flags = [];
+  
+  if (!selfEval) return flags;
+  
+  // Check tone consistency
+  if (selfEval.tone === 'Jarvis' && forbiddenFound.length > 0) {
+    flags.push({
+      type: 'TONE_MISMATCH',
+      severity: 'MEDIUM',
+      detail: `Self-reported 'Jarvis' but forbidden phrases detected: ${forbiddenFound.join(', ')}`
+    });
+  }
+  
+  // Check accuracy consistency (if recent evals show pattern)
+  const recentAccuracyIssues = recentEvals.filter(e => 
+    e.selfEvaluation && e.selfEvaluation.accuracy === 'OK' && 
+    e.validationFlags.some(f => f.type === 'HIGH_ERROR_RATE')
+  );
+  
+  if (recentAccuracyIssues.length >= 3 && selfEval.accuracy === 'OK') {
+    flags.push({
+      type: 'ACCURACY_OPTIMISM_BIAS',
+      severity: 'LOW',
+      detail: `Self-reported 'OK' but recent history shows ${recentAccuracyIssues.length} false OKs in past 7 days`
+    });
+  }
+  
+  return flags;
+}
+
+// ============================================================================
+// Main Validation Function
+// ============================================================================
+
+/**
+ * Validate a cron execution
+ * 
+ * @param {Object} input
+ * @param {string} input.cronId - Cron job ID
+ * @param {string} input.cronName - Cron job name
+ * @param {string} input.output - Cron output text
+ * @param {Object} input.metrics - Execution metrics
+ * @param {number} input.metrics.completionTime - Completion time in ms
+ * @param {number} input.metrics.tokenUsage - Token usage
+ * @param {number} input.metrics.toolErrors - Number of tool errors
+ * @param {Array} input.metrics.toolErrorDetails - Details of tool errors
+ */
+function validate(input) {
+  const { cronId, cronName, output, metrics } = input;
+  const timestamp = Date.now();
+  
+  // Parse self-evaluation
+  const selfEval = parseSelfEvaluation(output);
+  
+  // Load baseline
+  const baseline = loadBaseline(cronId);
+  
+  // Load recent evaluations
+  const recentEvals = loadRecentEvaluations(7);
+  const recentSameCron = recentEvals.filter(e => e.cronId === cronId);
+  
+  // Detect forbidden phrases
+  const forbiddenFound = detectForbiddenPhrases(output);
+  
+  // Validate
+  const metricFlags = validateMetrics(metrics, baseline);
+  const formatFlags = validateFormat(output, selfEval);
+  const consistencyFlags = validateConsistency(selfEval, recentSameCron, forbiddenFound);
+  
+  const allFlags = [...metricFlags, ...formatFlags, ...consistencyFlags];
+  
+  // Determine verdict
+  const verdict = allFlags.length === 0 ? 'PASS' : 
+                 allFlags.some(f => f.severity === 'HIGH') ? 'FAIL' :
+                 allFlags.some(f => f.severity === 'MEDIUM') ? 'WARN' :
+                 'INFO';
+  
+  // Update baseline
+  updateBaseline(cronId, metrics);
+  
+  // Prepare result
+  const result = {
+    cronId,
+    cronName,
+    timestamp,
+    selfEvaluation: selfEval,
+    validationFlags: allFlags,
+    verdict,
+    metrics: {
+      actual: metrics,
+      baseline: baseline ? baseline.avg : null
+    },
+    formatChecks: {
+      emojis: {
+        actual: countEmojis(output),
+        selfReported: selfEval && selfEval.conciseness ? selfEval.conciseness.emojis : null,
+        limit: CONFIG.MAX_EMOJIS
+      },
+      separators: {
+        actual: countSeparators(output),
+        limit: CONFIG.MAX_SEPARATORS
+      },
+      forbiddenPhrases: forbiddenFound
+    }
+  };
+  
+  // Write to JSONL
+  const dateStr = new Date().toISOString().split('T')[0];
+  const outputPath = path.join(CONFIG.VALIDATION_DIR, `validation-${dateStr}.jsonl`);
+  fs.appendFileSync(outputPath, JSON.stringify(result) + '\n');
+  
+  return result;
+}
+
+// ============================================================================
+// CLI Interface
+// ============================================================================
+
+if (require.main === module) {
+  // Read input from stdin or command line
+  const args = process.argv.slice(2);
+  
+  if (args.length === 0) {
+    console.error('Usage: validate-self-review.js <cronId> <cronName> <outputFile> <completionTime> <tokenUsage> <toolErrors>');
+    console.error('   or: cat output.txt | validate-self-review.js <cronId> <cronName> <completionTime> <tokenUsage> <toolErrors>');
+    process.exit(1);
+  }
+  
+  let output;
+  let cronId, cronName, completionTime, tokenUsage, toolErrors;
+  
+  // Check if reading from file or stdin
+  if (args.length >= 6) {
+    // From file
+    cronId = args[0];
+    cronName = args[1];
+    const outputFile = args[2];
+    completionTime = parseInt(args[3]);
+    tokenUsage = parseInt(args[4]);
+    toolErrors = parseInt(args[5]);
+    
+    output = fs.readFileSync(outputFile, 'utf8');
+  } else {
+    // From stdin
+    cronId = args[0];
+    cronName = args[1];
+    completionTime = parseInt(args[2]);
+    tokenUsage = parseInt(args[3]);
+    toolErrors = parseInt(args[4]);
+    
+    output = fs.readFileSync(0, 'utf8'); // Read from stdin
+  }
+  
+  const result = validate({
+    cronId,
+    cronName,
+    output,
+    metrics: {
+      completionTime,
+      tokenUsage,
+      toolErrors,
+      toolErrorDetails: []
+    }
+  });
+  
+  // Print result
+  console.log(JSON.stringify(result, null, 2));
+  
+  // Exit with error code if FAIL
+  if (result.verdict === 'FAIL') {
+    process.exit(1);
+  }
+}
+
+// ============================================================================
+// Exports
+// ============================================================================
+
+module.exports = {
+  validate,
+  countEmojis,
+  countSeparators,
+  detectForbiddenPhrases,
+  parseSelfEvaluation,
+  loadRecentEvaluations,
+  loadBaseline,
+  updateBaseline
+};