35 lines
2.6 KiB
JSON
35 lines
2.6 KiB
JSON
{
|
|
"name": "Kimi K2.5",
|
|
"category": "Chinese AI Model",
|
|
"developer": "Moonshot AI",
|
|
"model_family": "Kimi",
|
|
"release_date": "December 2024",
|
|
"swe_bench_verified_score": "~48-52% on SWE-bench Verified (reported by community) [uncertain]",
|
|
"swe_bench_full_score": "Not officially reported [uncertain]",
|
|
"swe_bench_lite_score": "Competitive with GPT-4 Turbo [uncertain]",
|
|
"other_coding_benchmarks": "Strong on HumanEval (90%+), competitive on MBPP; excels at long-context code understanding",
|
|
"input_price_per_1m": "$2.00 (standard), $1.00 (batch)",
|
|
"output_price_per_1m": "$8.00 (standard), $4.00 (batch)",
|
|
"pricing_tier_notes": "Batch processing available at 50% discount; caching available for repeated context",
|
|
"agentic_coding_features": "Advanced tool use, autonomous planning, code execution, file operations, web browsing, long-context coherence",
|
|
"context_window": "256K tokens (up to 2M in beta for some use cases)",
|
|
"supported_tools": "Code interpreter, file I/O, web search, API calling, image analysis, multi-step task execution",
|
|
"multi_file_handling": "Excellent - specifically designed for large codebase understanding and multi-file refactoring",
|
|
"reddit_sentiment": "Very positive on r/LocalLLaMA and r/ChatGPT; praised for value proposition and capabilities",
|
|
"x_twitter_sentiment": "Highly positive among developers; considered top non-OpenAI/Anthropic option for coding",
|
|
"common_praises": "Massive context window, excellent long-document handling, great value for money, strong reasoning",
|
|
"common_complaints": "Occasional availability issues, API documentation could be better, less enterprise polish than Claude",
|
|
"notable_use_cases_shared": "Large codebase analysis, book-length document processing, multi-file refactoring, research paper analysis",
|
|
"ideal_for": "Large context coding, document analysis, long-form code generation, budget-conscious enterprise use",
|
|
"not_recommended_for": "Users requiring guaranteed uptime SLAs, very short simple queries (overkill)",
|
|
"comparison_to_opus_46": "Competitive on many tasks; beats Opus on context length, loses on some reasoning benchmarks",
|
|
"can_replace_opus_46": "Yes for most coding tasks, especially those benefiting from long context",
|
|
"replacement_confidence_score": 8,
|
|
"replacement_tradeoffs": "2-3x cheaper than Opus with larger context window, slightly less refined reasoning on edge cases",
|
|
"cost_comparison_vs_opus": "Input: ~60% cheaper, Output: ~50% cheaper than Claude Opus 4.6",
|
|
"uncertain": [
|
|
"swe_bench_verified_score",
|
|
"swe_bench_full_score",
|
|
"swe_bench_lite_score"
|
|
]
|
|
} |