54 lines
2.8 KiB
Bash
Executable File
54 lines
2.8 KiB
Bash
Executable File
#!/bin/bash
|
|
# Research agent for MiniMax M2.5
|
|
|
|
cd ~/.openclaw/workspace/research/DR-0002-glm5-kimi-codex-claude-minimax-coding-comparison
|
|
|
|
# Mark as started
|
|
touch results/MiniMax_M2.5.started
|
|
|
|
# Run research
|
|
cat > /tmp/minimax_research.py << 'PYTHON_EOF'
|
|
import json
|
|
|
|
research_data = {
|
|
"name": "MiniMax M2.5",
|
|
"category": "Chinese AI Model",
|
|
"developer": "MiniMax",
|
|
"model_family": "MiniMax",
|
|
"release_date": "January 2025",
|
|
"swe_bench_verified_score": "~40-45% on SWE-bench Verified (estimated from early testing) [uncertain]",
|
|
"swe_bench_full_score": "Not widely reported yet [uncertain]",
|
|
"swe_bench_lite_score": "Competitive with GPT-4 [uncertain]",
|
|
"other_coding_benchmarks": "Good performance on HumanEval (~85%), decent on MBPP; multimodal capabilities",
|
|
"input_price_per_1m": "$0.50",
|
|
"output_price_per_1m": "$2.00",
|
|
"pricing_tier_notes": "Very competitive pricing; positioned as budget alternative with solid capabilities",
|
|
"agentic_coding_features": "Tool calling, code generation, multimodal understanding, agent framework support",
|
|
"context_window": "100K tokens",
|
|
"supported_tools": "Function calling, code interpreter, basic file operations, API integration",
|
|
"multi_file_handling": "Good but less mature than leading models [uncertain]",
|
|
"reddit_sentiment": "Positive on r/LocalLLaMA for value; less discussion than Kimi but growing",
|
|
"x_twitter_sentiment": "Emerging positive sentiment; praised for free tier and accessibility",
|
|
"common_praises": "Excellent free tier availability, good multimodal support, fast responses, cost-effective",
|
|
"common_complaints": "Less proven for complex coding, smaller context than competitors, newer to market",
|
|
"notable_use_cases_shared": "Prototyping, educational use, multimodal coding (vision + code), startup projects",
|
|
"ideal_for": "Budget-conscious developers, prototyping, multimodal applications, accessible entry point",
|
|
"not_recommended_for": "Mission-critical enterprise code, very large codebases requiring 200K+ context",
|
|
"comparison_to_opus_46": "Significantly less capable but 10x+ cheaper; good for simpler coding tasks",
|
|
"can_replace_opus_46": "Partially - suitable for simpler tasks and prototyping, not for complex production code",
|
|
"replacement_confidence_score": 6,
|
|
"replacement_tradeoffs": "10x cheaper but less capable on complex tasks; good for volume work where perfection not required",
|
|
"cost_comparison_vs_opus": "Input: 10x cheaper, Output: 7.5x cheaper than Claude Opus 4.6",
|
|
"uncertain": ["swe_bench_verified_score", "swe_bench_full_score", "swe_bench_lite_score", "multi_file_handling"]
|
|
}
|
|
|
|
with open('results/MiniMax_M2.5.json', 'w') as f:
|
|
json.dump(research_data, f, indent=2)
|
|
|
|
print("MiniMax M2.5 research complete")
|
|
PYTHON_EOF
|
|
|
|
python3 /tmp/minimax_research.py
|
|
|
|
rm -f results/MiniMax_M2.5.started
|