[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-XiaomiMiMo--MiMo-V2-Flash":3,"tool-XiaomiMiMo--MiMo-V2-Flash":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",151314,2,"2026-04-11T23:32:58",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",108322,"2026-04-10T11:39:34",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[52,13,15,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":10,"last_commit_at":59,"category_tags":60,"status":17},4487,"LLMs-from-scratch","rasbt\u002FLLMs-from-scratch","LLMs-from-scratch 是一个基于 PyTorch 的开源教育项目，旨在引导用户从零开始一步步构建一个类似 ChatGPT 的大型语言模型（LLM）。它不仅是同名技术著作的官方代码库，更提供了一套完整的实践方案，涵盖模型开发、预训练及微调的全过程。\n\n该项目主要解决了大模型领域“黑盒化”的学习痛点。许多开发者虽能调用现成模型，却难以深入理解其内部架构与训练机制。通过亲手编写每一行核心代码，用户能够透彻掌握 Transformer 架构、注意力机制等关键原理，从而真正理解大模型是如何“思考”的。此外，项目还包含了加载大型预训练权重进行微调的代码，帮助用户将理论知识延伸至实际应用。\n\nLLMs-from-scratch 特别适合希望深入底层原理的 AI 开发者、研究人员以及计算机专业的学生。对于不满足于仅使用 API，而是渴望探究模型构建细节的技术人员而言，这是极佳的学习资源。其独特的技术亮点在于“循序渐进”的教学设计：将复杂的系统工程拆解为清晰的步骤，配合详细的图表与示例，让构建一个虽小但功能完备的大模型变得触手可及。无论你是想夯实理论基础，还是为未来研发更大规模的模型做准备",90106,"2026-04-06T11:19:32",[35,15,13,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":72,"owner_name":73,"owner_avatar_url":74,"owner_bio":75,"owner_company":76,"owner_location":76,"owner_email":77,"owner_twitter":72,"owner_website":76,"owner_url":78,"languages":76,"stars":79,"forks":80,"last_commit_at":81,"license":82,"difficulty_score":83,"env_os":84,"env_gpu":85,"env_ram":86,"env_deps":87,"category_tags":90,"github_topics":76,"view_count":32,"oss_zip_url":76,"oss_zip_packed_at":76,"status":17,"created_at":91,"updated_at":92,"faqs":93,"releases":104},6801,"XiaomiMiMo\u002FMiMo-V2-Flash","MiMo-V2-Flash","MiMo-V2-Flash: Efficient Reasoning, Coding, and Agentic Foundation Model","MiMo-V2-Flash 是小米开源的一款高效混合专家（MoE）大语言模型，专为高速推理、代码生成及智能体任务打造。它拥有 3090 亿总参数，但每次推理仅激活 150 亿参数，在保持顶尖性能的同时大幅降低了计算成本。\n\n针对传统大模型在处理长上下文时显存占用高、生成速度慢的痛点，MiMo-V2-Flash 引入了创新的混合注意力架构，通过滑动窗口与全局注意力的巧妙结合，将键值缓存存储需求降低近 6 倍，并支持长达 256k 的上下文窗口。此外，其独有的多令牌预测（MTP）技术能让输出速度提升三倍，显著加速推理过程及强化学习训练。在智能体能力方面，经过大规模代理强化学习优化，它在 SWE-Bench 等复杂编程与推理基准测试中表现卓越。\n\n这款模型非常适合需要部署高性能 AI 应用的开发者、追求极致效率的研究人员，以及希望构建自主智能体系统的工程师。无论是处理超长文档分析、复杂代码编写，还是开发自动化工作流，MiMo-V2-Flash 都能提供强劲且经济的技术支持。","\u003Cbr\u002F>\u003Cbr\u002F>\n\n\u003Cdiv align=\"center\">\n  \u003Cpicture>\n    \u003Csource srcset=\"https:\u002F\u002Fgithub.com\u002FXiaomiMiMo\u002FMiMo-V2-Flash\u002Fraw\u002Fmain\u002Ffigures\u002FXiaomi_MiMo_darkmode.png?raw=true\" media=\"(prefers-color-scheme: dark)\">\n    \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FXiaomiMiMo_MiMo-V2-Flash_readme_55036878a571.png\" width=\"60%\" alt=\"Xiaomi-MiMo\" \u002F>\n  \u003C\u002Fpicture>\n\u003C\u002Fdiv>\n\n\u003Cbr\u002F>\n\n\u003Cdiv align=\"center\" style=\"line-height: 1;\">\n  |\n  \u003Ca href=\"https:\u002F\u002Fhuggingface.co\u002FXiaomiMiMo\u002FMiMo-V2-Flash\" target=\"_blank\">🤗 HuggingFace\u003C\u002Fa>\n  &nbsp;|\n  \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FXiaomiMiMo\u002FMiMo-V2-Flash\u002Fblob\u002Fmain\u002Fpaper.pdf\" target=\"_blank\">📔 Technical Report \u003C\u002Fa>\n  &nbsp;|\n  \u003Ca href=\"https:\u002F\u002Fmimo.xiaomi.com\u002Fblog\u002Fmimo-v2-flash\" target=\"_blank\">📰 Blog \u003C\u002Fa>\n  &nbsp;|\n  \u003Cbr\u002F>\u003Cbr\u002F>\n  \u003Cstrong>Play around!\u003C\u002Fstrong> &nbsp;\n  \u003Ca href=\"https:\u002F\u002Faistudio.xiaomimimo.com\" target=\"_blank\">🗨️ Xiaomi MiMo Studio \u003C\u002Fa>\n  &nbsp;\n  \u003Ca href=\"https:\u002F\u002Fplatform.xiaomimimo.com\u002F\" target=\"_blank\">🎨 Xiaomi MiMo API Platform \u003C\u002Fa>\n\u003C\u002Fdiv>\n\u003Cbr\u002F>\n\n# MiMo-V2-Flash\n\n**MiMo-V2-Flash** is a Mixture-of-Experts (MoE) language model with **309B total parameters** and **15B active parameters**. Designed for high-speed reasoning and agentic workflows, it utilizes a novel hybrid attention architecture and Multi-Token Prediction (MTP) to achieve state-of-the-art performance while significantly reducing inference costs.\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"80%\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FXiaomiMiMo_MiMo-V2-Flash_readme_27bb8220ca56.jpg\">\n\u003C\u002Fp>\n\n-----\n\n## 1. Introduction\n\nMiMo-V2-Flash creates a new balance between long-context modeling capability and inference efficiency. Key features include:\n\n  * **Hybrid Attention Architecture**: Interleaves Sliding Window Attention (SWA) and Global Attention (GA) with a 5:1 ratio and an aggressive 128-token window. This reduces KV-cache storage by nearly 6x while maintaining long-context performance via learnable **attention sink bias**.\n  * **Multi-Token Prediction (MTP)**: Equipped with a lightweight MTP module (0.33B params\u002Fblock) using dense FFNs. This triples output speed during inference and will be good to accelerates rollout in RL training.\n  * **Efficient Pre-Training**: Trained on 27T tokens using FP8 mixed precision and native 32k seq length. The context window supports up to 256k length.\n  * **Agentic Capabilities**: Post-training utilizes Multi-Teacher On-Policy Distillation (MOPD) and large-scale agentic RL, achieving superior performance on **SWE-Bench** and complex reasoning tasks.\n\n-----\n\n## 2. Model Downloads\n\n| Model                  | Total Params | Active Params | Context Length |                               Download                                |\n| :--------------------- | :----------: | :-----------: | :------------: | :-------------------------------------------------------------------: |\n| **MiMo-V2-Flash-Base** |     309B     |      15B      |      256k      | [🤗 HuggingFace](https:\u002F\u002Fhuggingface.co\u002FXiaomiMiMo\u002FMiMo-V2-Flash-Base) |\n| **MiMo-V2-Flash**      |     309B     |      15B      |      256k      |   [🤗 HuggingFace](https:\u002F\u002Fhuggingface.co\u002FXiaomiMiMo\u002FMiMo-V2-Flash)    |\n\n> [!IMPORTANT]\n> We also open-source the 3-layer MTP weights to foster community research.\n\n-----\n\n## 3. Evaluation Results\n\n### Base Model Evaluation\n\nMiMo-V2-Flash-Base demonstrates strong performance across standard benchmarks, surpassing models with significantly larger parameter counts.\n\n| Category         | Benchmark               | Setting\u002FLength | MiMo-V2-Flash Base |  Kimi-K2 Base   | DeepSeek-V3.1 Base | DeepSeek-V3.2 Exp Base |\n| :--------------- | :---------------------- | :------------- | :----------------: | :-------------: | :----------------: | :--------------------: |\n| **Params**       | **#Activated \u002F #Total** | -              |   **15B \u002F 309B**   | **32B \u002F 1043B** |   **37B \u002F 671B**   |     **37B \u002F 671B**     |\n| **General**      | BBH                     | 3-shot         |        88.5        |      88.7       |        88.2        |          88.7          |\n|                  | MMLU                    | 5-shot         |        86.7        |      87.8       |        87.4        |          87.8          |\n|                  | MMLU-Redux              | 5-shot         |        90.6        |      90.2       |        90.0        |          90.4          |\n|                  | MMLU-Pro                | 5-shot         |        73.2        |      69.2       |        58.8        |          62.1          |\n|                  | DROP                    | 3-shot         |        84.7        |      83.6       |        86.3        |          86.6          |\n|                  | ARC-Challenge           | 25-shot        |        95.9        |      96.2       |        95.6        |          95.5          |\n|                  | HellaSwag               | 10-shot        |        88.5        |      94.6       |        89.2        |          89.4          |\n|                  | WinoGrande              | 5-shot         |        83.8        |      85.3       |        85.9        |          85.6          |\n|                  | TriviaQA                | 5-shot         |        80.3        |      85.1       |        83.5        |          83.9          |\n|                  | GPQA-Diamond            | 5-shot         |        55.1        |      48.1       |        51.0        |          52.0          |\n|                  | SuperGPQA               | 5-shot         |        41.1        |      44.7       |        42.3        |          43.6          |\n|                  | SimpleQA                | 5-shot         |        20.6        |      35.3       |        26.3        |          27.0          |\n| **Math**         | GSM8K                   | 8-shot         |        92.3        |      92.1       |        91.4        |          91.1          |\n|                  | MATH                    | 4-shot         |        71.0        |      70.2       |        62.6        |          62.5          |\n|                  | AIME 24&25              | 2-shot         |        35.3        |      31.6       |        21.6        |          24.8          |\n| **Code**         | HumanEval+              | 1-shot         |        70.7        |      84.8       |        64.6        |          67.7          |\n|                  | MBPP+                   | 3-shot         |        71.4        |      73.8       |        72.2        |          69.8          |\n|                  | CRUXEval-I              | 1-shot         |        67.5        |      74.0       |        62.1        |          63.9          |\n|                  | CRUXEval-O              | 1-shot         |        79.1        |      83.5       |        76.4        |          74.9          |\n|                  | MultiPL-E HumanEval     | 0-shot         |        59.5        |      60.5       |        45.9        |          45.7          |\n|                  | MultiPL-E MBPP          | 0-shot         |        56.7        |      58.8       |        52.5        |          50.6          |\n|                  | BigCodeBench            | 0-shot         |        70.1        |      61.7       |        63.0        |          62.9          |\n|                  | LiveCodeBench v6        | 1-shot         |        30.8        |      26.3       |        24.8        |          24.9          |\n|                  | SWE-Bench (AgentLess)   | 3-shot         |        30.8        |      28.2       |        24.8        |          9.4*          |\n| **Chinese**      | C-Eval                  | 5-shot         |        87.9        |      92.5       |        90.0        |          91.0          |\n|                  | CMMLU                   | 5-shot         |        87.4        |      90.9       |        88.8        |          88.9          |\n|                  | C-SimpleQA              | 5-shot         |        61.5        |      77.6       |        70.9        |          68.0          |\n| **Multilingual** | GlobalMMLU              | 5-shot         |        76.6        |      80.7       |        81.9        |          82.0          |\n|                  | INCLUDE                 | 5-shot         |        71.4        |      75.3       |        77.2        |          77.2          |\n| **Long Context** | NIAH-Multi              | 32K            |        99.3        |      99.8       |        99.7        |         85.6*          |\n|                  |                         | 64K            |        99.9        |      100.0      |        98.6        |         85.9*          |\n|                  |                         | 128K           |        98.6        |      99.5       |        97.2        |         94.3*          |\n|                  |                         | 256K           |        96.7        |        -        |         -          |           -            |\n|                  | GSM-Infinite Hard       | 16K            |        37.7        |      34.6       |        41.5        |          50.4          |\n|                  |                         | 32K            |        33.7        |      26.1       |        38.8        |          45.2          |\n|                  |                         | 64K            |        31.5        |      16.0       |        34.7        |          32.6          |\n|                  |                         | 128K           |        29.0        |       8.8       |        28.7        |          25.7          |\n\n> \\* indicates the model may fail to follow the prompt or format.\n\n### Post-training Model Evaluation\n\nFollowing our Post-Training Paradigm with MOPD and Agentic RL, the model achieves SOTA reasoning and agentic performance.\n\n\n\n| Benchmark                      | MiMo-V2 Flash | Kimi-K2 Thinking | DeepSeek-V3.2 Thinking | Gemini-3.0 Pro | Claude Sonnet 4.5 | GPT-5 High |\n| :----------------------------- | :-----------: | :--------------: | :--------------------: | :------------: | :---------------: | :--------: |\n| **Reasoning**                  |               |                  |                        |                |                   |            |\n| MMLU-Pro                       |     84.9      |       84.6       |          85.0          |      90.1      |       88.2        |    87.5    |\n| GPQA-Diamond                   |     83.7      |       84.5       |          82.4          |      91.9      |       83.4        |    85.7    |\n| HLE (no tools)                 |     22.1      |       23.9       |          25.1          |      37.5      |       13.7        |    26.3    |\n| AIME 2025                      |     94.1      |       94.5       |          93.1          |      95.0      |       87.0        |    94.6    |\n| HMMT Feb. 2025                 |     84.4      |       89.4       |          92.5          |      97.5      |       79.2        |    88.3    |\n| LiveCodeBench-v6               |     80.6      |       83.1       |          83.3          |      90.7      |       64.0        |    84.5    |\n| **General Writing**            |               |                  |                        |                |                   |            |\n| Arena-Hard (Hard Prompt)       |     54.1      |       71.9       |          53.4          |      72.6      |       63.3        |    71.9    |\n| Arena-Hard (Creative Writing)  |     86.2      |       80.1       |          88.8          |      93.6      |       76.7        |    92.2    |\n| **Long Context**               |               |                  |                        |                |                   |            |\n| LongBench V2                   |     60.6      |       45.1       |          58.4          |      65.6      |       61.8        |     -      |\n| MRCR                           |     45.7      |       44.2       |          55.5          |      89.7      |       55.4        |     -      |\n| **Code Agent**                 |               |                  |                        |                |                   |            |\n| SWE-Bench Verified             |     73.4      |       71.3       |          73.1          |      76.2      |       77.2        |    74.9    |\n| SWE-Bench Multilingual         |     71.7      |       61.1       |          70.2          |       -        |       68.0        |    55.3    |\n| Terminal-Bench Hard            |     30.5      |       30.6       |          35.4          |      39.0      |       33.3        |    30.5    |\n| Terminal-Bench 2.0             |     38.5      |       35.7       |          46.4          |      54.2      |       42.8        |    35.2    |\n| **General Agent**              |               |                  |                        |                |                   |            |\n| BrowseComp                     |     45.4      |        -         |          51.4          |       -        |       24.1        |    54.9    |\n| BrowseComp (w\u002F Context Manage) |     58.3      |       60.2       |          67.6          |      59.2      |         -         |     -      |\n| $\\tau^2$-Bench                 |     80.3      |       74.3       |          80.3          |      85.4      |       84.7        |    80.2    |\n\n-----\n\n## 4. Model Architecture\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"80%\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FXiaomiMiMo_MiMo-V2-Flash_readme_b5f026610d49.png\">\n\u003C\u002Fp>\n\n### Hybrid Sliding Window Attention\n\nMiMo-V2-Flash addresses the quadratic complexity of long contexts by interleaving Local Sliding Window Attention (SWA) and Global Attention (GA).\n\n  * **Configuration**: Stacks of $M=8$ hybrid blocks. Each block contains $N=5$ SWA layers followed by 1 GA layer.\n  * **Efficiency**: SWA layers use a window size of 128 tokens, reducing KV cache significantly.\n  * **Sink Bias**: Learnable attention sink bias is applied to maintain performance despite the aggressive window size.\n\n### Lightweight Multi-Token Prediction (MTP)\n\nUnlike traditional speculative decoding, our MTP module is natively integrated for training and inference.\n\n  * **Structure**: Uses a dense FFN (instead of MoE) and SWA (instead of GA) to keep the parameter count low (0.33B per block).\n  * **Performance**: Facilitates self-speculative decoding, tripling generation speed and mitigating GPU idleness during small-batch RL training.\n\n-----\n\n## 5. Post-Training Technical Highlights\n\nMiMo-V2-Flash leverages a post-training pipeline designed to maximize reasoning and agentic capabilities through innovative distillation and reinforcement learning strategies.\n\n### 5.1 Multi-Teacher On-Policy Distillation (MOPD)\n\nWe introduce **Multi-Teacher On-Policy Distillation (MOPD)**, a new paradigm that formulates knowledge distillation as a reinforcement learning process.\n* **Dense Token-Level Guidance**: Unlike methods relying on sparse sequence-level feedback, MOPD utilizes domain-specific expert models (teachers) to provide supervision at every token position.\n* **On-Policy Optimization**: The student model learns from its own generated responses rather than a fixed dataset. This eliminates exposure bias and ensures smaller, more stable gradient updates.\n* **Inherent Reward Robustness**: Rewards are derived from the distribution divergence between student and teacher, making the process naturally resistant to reward hacking.\n\n### 5.2 Scaling Agentic RL\n\nWe significantly scale up the agentic training environments to improve intelligence and generalization.\n* **Massive Code Agent Environments**: We utilize real-world GitHub issues to create over 100,000 verifiable tasks. Our automated pipeline maintains a Kubernetes cluster capable of running over 10,000 concurrent pods with a 70% environment setup success rate.\n* **Multimodal Verifier for WebDev**: For web development tasks, we employ a vision-based verifier that evaluates code execution via recorded videos rather than static screenshots. This reduces visual hallucination and ensures functional correctness.\n* **Cross-Domain Generalization**: Our experiments show that large-scale RL training on code agents effectively generalizes to other domains, boosting performance in Math and General Agent tasks.\n\n### 5.3 Advanced RL Infrastructure\n\nTo support high-throughput RL training for large-scale MoE models, we implemented several infrastructure optimizations on top of SGLang and Megatron-LM.\n* **Rollout Routing Replay (R3)**: Addresses numerical precision inconsistencies in MoE routing between inference and training. R3 reuses the exact routed experts from rollout during the training pass, ensuring consistency with negligible overhead.\n* **Request-Level Prefix Cache**: In multi-turn agent training, this cache stores KV states and routed experts from prior turns. It avoids re-computation and ensures sampling consistency across turns.\n* **Fine-Grained Data Scheduler**: We extend the rollout engine to schedule fine-grained sequences instead of micro-batches. Combined with partial rollout, this significantly reduces GPU idleness caused by long-tail stragglers.\n* **Toolbox & Tool Manager**: A two-layer design using Ray actor pools to handle resource contention. It eliminates cold-start delays for tool execution and isolates task logic from system policies.\n\n-----\n\n## 6. Inference & Deployment\n\nMiMo-V2-Flash supports FP8 mixed precision inference. We recommend using **SGLang** for optimal performance.\n\n### Quick Start with SGLang\n\nFollowing https:\u002F\u002Flmsys.org\u002Fblog\u002F2025-12-16-mimo-v2-flash\u002F, please use the compatible SGLang version as follows.\n\n```bash\npip install sglang==0.5.6.post2.dev8005+pr.15207.g39d5bd57a \\\n  --index-url https:\u002F\u002Fsgl-project.github.io\u002Fwhl\u002Fpr\u002F \\\n  --extra-index-url https:\u002F\u002Fpypi.org\u002Fsimple\n\n#Launch the server\nSGLANG_ENABLE_SPEC_V2=1 python3 -m sglang.launch_server \\\n        --model-path XiaomiMiMo\u002FMiMo-V2-Flash \\\n        --served-model-name mimo-v2-flash \\\n        --pp-size 1 \\\n        --dp-size 2 \\\n        --enable-dp-attention \\\n        --tp-size 8 \\\n        --moe-a2a-backend deepep \\\n        --page-size 1 \\\n        --host 0.0.0.0 \\\n        --port 9001 \\\n        --trust-remote-code \\\n        --mem-fraction-static 0.75 \\\n        --max-running-requests 128 \\\n        --chunked-prefill-size 16384 \\\n        --reasoning-parser qwen3 \\\n        --tool-call-parser mimo \\\n        --context-length 262144 \\\n        --attention-backend fa3 \\\n        --speculative-algorithm EAGLE \\\n        --speculative-num-steps 3 \\\n        --speculative-eagle-topk 1 \\\n        --speculative-num-draft-tokens 4 \\\n        --enable-mtp\n\n# Send request\ncurl -i http:\u002F\u002Flocalhost:9001\u002Fv1\u002Fchat\u002Fcompletions \\\n    -H 'Content-Type:application\u002Fjson' \\\n    -d  '{\n            \"messages\" : [{\n                \"role\": \"user\",\n                \"content\": \"Nice to meet you MiMo\"\n            }],\n            \"model\": \"mimo-v2-flash\",\n            \"max_tokens\": 4096,\n            \"temperature\": 0.8,\n            \"top_p\": 0.95,\n            \"stream\": true,\n            \"chat_template_kwargs\": {\n                \"enable_thinking\": true\n            }\n        }'\n```\n\n### Notifications\n\n#### 1. System prompt\n\n> [!IMPORTANT]\n> The following system prompts are **HIGHLY** recommended, please choose from English and Chinese version.\n\nEnglish\n\n```plaintext\nYou are MiMo, an AI assistant developed by Xiaomi.\n\nToday's date: {date} {week}. Your knowledge cutoff date is December 2024.\n```\n\nChinese\n\n```plaintext\n你是MiMo（中文名称也是MiMo），是小米公司研发的AI智能助手。\n\n今天的日期：{date} {week}，你的知识截止日期是2024年12月。\n```\n\n#### 2. Sampling parameters\n\n> [!IMPORTANT]\n> Recommended sampling parameters:\n>\n> `top_p=0.95`\n> \n> `temperature=0.8` for math, writing, web-dev\n> \n> `temperature=0.3` for agentic taks (e.g., vibe-coding, tool-use)\n\n#### 3. Tool-use practice\n\n> [!IMPORTANT]\n> In the thinking mode with multi-turn tool calls, the model returns a `reasoning_content` field alongside `tool_calls`. To continue the conversation, the user must persist all history `reasoning_content` in the `messages` array of each subsequent request.\n\n-----\n\n## 7. Citation\n\nIf you find our work helpful, please cite our technical report:\n\n```bibtex\n@misc{xiao2026mimov2flashtechnicalreport,\n      title={MiMo-V2-Flash Technical Report}, \n      author={LLM-Core Xiaomi},\n      year={2026},\n      eprint={2601.02780},\n      archivePrefix={arXiv},\n      primaryClass={cs.CL},\n      url={https:\u002F\u002Farxiv.org\u002Fabs\u002F2601.02780}, \n}\n```\n\n## 8. Contact\n\nPlease contact us at [mimo@xiaomi.com](mailto:mimo@xiaomi.com), join our WeChat group below or open an issue if you have any questions.\n\n\u003Cp align=\"center\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FXiaomiMiMo_MiMo-V2-Flash_readme_2951e13ddf1b.jpg\" width=\"20%\" \u002F>\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FXiaomiMiMo_MiMo-V2-Flash_readme_4ba994e7fdb5.jpg\" width=\"20%\" \u002F>\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FXiaomiMiMo_MiMo-V2-Flash_readme_ed0eaac9ae4a.jpg\" width=\"20%\" \u002F>\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FXiaomiMiMo_MiMo-V2-Flash_readme_4824cddb8d35.jpg\" width=\"20%\" \u002F>\n\u003C\u002Fp>\n","\u003Cbr\u002F>\u003Cbr\u002F>\n\n\u003Cdiv align=\"center\">\n  \u003Cpicture>\n    \u003Csource srcset=\"https:\u002F\u002Fgithub.com\u002FXiaomiMiMo\u002FMiMo-V2-Flash\u002Fraw\u002Fmain\u002Ffigures\u002FXiaomi_MiMo_darkmode.png?raw=true\" media=\"(prefers-color-scheme: dark)\">\n    \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FXiaomiMiMo_MiMo-V2-Flash_readme_55036878a571.png\" width=\"60%\" alt=\"Xiaomi-MiMo\" \u002F>\n  \u003C\u002Fpicture>\n\u003C\u002Fdiv>\n\n\u003Cbr\u002F>\n\n\u003Cdiv align=\"center\" style=\"line-height: 1;\">\n  |\n  \u003Ca href=\"https:\u002F\u002Fhuggingface.co\u002FXiaomiMiMo\u002FMiMo-V2-Flash\" target=\"_blank\">🤗 HuggingFace\u003C\u002Fa>\n  &nbsp;|\n  \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FXiaomiMiMo\u002FMiMo-V2-Flash\u002Fblob\u002Fmain\u002Fpaper.pdf\" target=\"_blank\">📔 技术报告 \u003C\u002Fa>\n  &nbsp;|\n  \u003Ca href=\"https:\u002F\u002Fmimo.xiaomi.com\u002Fblog\u002Fmimo-v2-flash\" target=\"_blank\">📰 博客 \u003C\u002Fa>\n  &nbsp;|\n  \u003Cbr\u002F>\u003Cbr\u002F>\n  \u003Cstrong>快来体验吧！\u003C\u002Fstrong> &nbsp;\n  \u003Ca href=\"https:\u002F\u002Faistudio.xiaomimimo.com\" target=\"_blank\">🗨️ 小米MiMo Studio \u003C\u002Fa>\n  &nbsp;\n  \u003Ca href=\"https:\u002F\u002Fplatform.xiaomimimo.com\u002F\" target=\"_blank\">🎨 小米MiMo API平台 \u003C\u002Fa>\n\u003C\u002Fdiv>\n\u003Cbr\u002F>\n\n# MiMo-V2-Flash\n\n**MiMo-V2-Flash** 是一种专家混合模型（MoE），总参数量达 **3090亿**，活跃参数量为 **150亿**。该模型专为高速推理和智能体工作流而设计，采用新颖的混合注意力架构和多标记预测（MTP）技术，在显著降低推理成本的同时，实现了业界领先的性能。\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"80%\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FXiaomiMiMo_MiMo-V2-Flash_readme_27bb8220ca56.jpg\">\n\u003C\u002Fp>\n\n-----\n\n## 1. 简介\n\nMiMo-V2-Flash 在长上下文建模能力和推理效率之间取得了全新的平衡。其主要特性包括：\n\n  * **混合注意力架构**：以 5:1 的比例交替使用滑动窗口注意力（SWA）和全局注意力（GA），并采用激进的 128 个标记窗口。这一设计将 KV 缓存存储需求降低了近 6 倍，同时通过可学习的 **注意力汇点偏置** 维持了长上下文性能。\n  * **多标记预测（MTP）**：配备轻量级 MTP 模块（每层 0.33B 参数），采用密集前馈网络实现。这使得推理时的输出速度提升至三倍，并有望加速强化学习训练中的部署过程。\n  * **高效的预训练**：使用 FP8 混合精度和原生 32k 序列长度，在 27T 标记数据上进行训练。上下文窗口最长可达 256k 标记。\n  * **智能体能力**：在后训练阶段采用多教师在线策略蒸馏（MOPD）和大规模智能体强化学习，从而在 **SWE-Bench** 和复杂推理任务中表现出色。\n\n-----\n\n## 2. 模型下载\n\n| 模型                  | 总参数 | 活跃参数 | 上下文长度 |                               下载                                |\n| :--------------------- | :----------: | :-----------: | :------------: | :-------------------------------------------------------------------: |\n| **MiMo-V2-Flash-Base** |     309B     |      15B      |      256k      | [🤗 HuggingFace](https:\u002F\u002Fhuggingface.co\u002FXiaomiMiMo\u002FMiMo-V2-Flash-Base) |\n| **MiMo-V2-Flash**      |     309B     |      15B      |      256k      |   [🤗 HuggingFace](https:\u002F\u002Fhuggingface.co\u002FXiaomiMiMo\u002FMiMo-V2-Flash)    |\n\n> [!重要提示]\n> 我们还开源了 3 层 MTP 权重，以促进社区研究。\n\n-----\n\n## 3. 评估结果\n\n### 基础模型评估\n\nMiMo-V2-Flash-Base 在标准基准测试中表现出色，超越了参数量远超其自身的模型。\n\n| 类别         | 基准测试               | 设置\u002F长度 | MiMo-V2-Flash Base |  Kimi-K2 Base   | DeepSeek-V3.1 Base | DeepSeek-V3.2 Exp Base |\n| :--------------- | :---------------------- | :------------- | :----------------: | :-------------: | :----------------: | :--------------------: |\n| **参数量**       | **激活参数 \u002F 总参数** | -              |   **150亿 \u002F 3090亿**   | **320亿 \u002F 10430亿** |   **370亿 \u002F 6710亿**   |     **370亿 \u002F 6710亿**     |\n| **通用任务**      | BBH                     | 3-shot         |        88.5        |      88.7       |        88.2        |          88.7          |\n|                  | MMLU                    | 5-shot         |        86.7        |      87.8       |        87.4        |          87.8          |\n|                  | MMLU-Redux              | 5-shot         |        90.6        |      90.2       |        90.0        |          90.4          |\n|                  | MMLU-Pro                | 5-shot         |        73.2        |      69.2       |        58.8        |          62.1          |\n|                  | DROP                    | 3-shot         |        84.7        |      83.6       |        86.3        |          86.6          |\n|                  | ARC-Challenge           | 25-shot        |        95.9        |      96.2       |        95.6        |          95.5          |\n|                  | HellaSwag               | 10-shot        |        88.5        |      94.6       |        89.2        |          89.4          |\n|                  | WinoGrande              | 5-shot         |        83.8        |      85.3       |        85.9        |          85.6          |\n|                  | TriviaQA                | 5-shot         |        80.3        |      85.1       |        83.5        |          83.9          |\n|                  | GPQA-Diamond            | 5-shot         |        55.1        |      48.1       |        51.0        |          52.0          |\n|                  | SuperGPQA               | 5-shot         |        41.1        |      44.7       |        42.3        |          43.6          |\n|                  | SimpleQA                | 5-shot         |        20.6        |      35.3       |        26.3        |          27.0          |\n| **数学**         | GSM8K                   | 8-shot         |        92.3        |      92.1       |        91.4        |          91.1          |\n|                  | MATH                    | 4-shot         |        71.0        |      70.2       |        62.6        |          62.5          |\n|                  | AIME 24&25              | 2-shot         |        35.3        |      31.6       |        21.6        |          24.8          |\n| **代码**         | HumanEval+              | 1-shot         |        70.7        |      84.8       |        64.6        |          67.7          |\n|                  | MBPP+                   | 3-shot         |        71.4        |      73.8       |        72.2        |          69.8          |\n|                  | CRUXEval-I              | 1-shot         |        67.5        |      74.0       |        62.1        |          63.9          |\n|                  | CRUXEval-O              | 1-shot         |        79.1        |      83.5       |        76.4        |          74.9          |\n|                  | MultiPL-E HumanEval     | 0-shot         |        59.5        |      60.5       |        45.9        |          45.7          |\n|                  | MultiPL-E MBPP          | 0-shot         |        56.7        |      58.8       |        52.5        |          50.6          |\n|                  | BigCodeBench            | 0-shot         |        70.1        |      61.7       |        63.0        |          62.9          |\n|                  | LiveCodeBench v6        | 1-shot         |        30.8        |      26.3       |        24.8        |          24.9          |\n|                  | SWE-Bench (AgentLess)   | 3-shot         |        30.8        |      28.2       |        24.8        |          9.4*          |\n| **中文**      | C-Eval                  | 5-shot         |        87.9        |      92.5       |        90.0        |          91.0          |\n|                  | CMMLU                   | 5-shot         |        87.4        |      90.9       |        88.8        |          88.9          |\n|                  | C-SimpleQA              | 5-shot         |        61.5        |      77.6       |        70.9        |          68.0          |\n| **多语言** | GlobalMMLU              | 5-shot         |        76.6        |      80.7       |        81.9        |          82.0          |\n|                  | INCLUDE                 | 5-shot         |        71.4        |      75.3       |        77.2        |          77.2          |\n| **长上下文** | NIAH-Multi              | 32K            |        99.3        |      99.8       |        99.7        |         85.6*          |\n|                  |                         | 64K            |        99.9        |      100.0      |        98.6        |         85.9*          |\n|                  |                         | 128K           |        98.6        |      99.5       |        97.2        |         94.3*          |\n|                  |                         | 256K           |        96.7        |        -        |         -          |           -            |\n|                  | GSM-Infinite Hard       | 16K            |        37.7        |      34.6       |        41.5        |          50.4          |\n|                  |                         | 32K            |        33.7        |      26.1       |        38.8        |          45.2          |\n|                  |                         | 64K            |        31.5        |      16.0       |        34.7        |          32.6          |\n|                  |                         | 128K           |        29.0        |       8.8       |        28.7        |          25.7          |\n\n> \\* 表示该模型可能未能遵循提示或格式要求。\n\n### 训练后模型评估\n\n遵循我们的基于MOPD和智能体强化学习的训练后范式，该模型在推理能力和智能体性能方面均达到了当前最优水平。\n\n\n\n| 基准测试                      | MiMo-V2 Flash | Kimi-K2 Thinking | DeepSeek-V3.2 Thinking | Gemini-3.0 Pro | Claude Sonnet 4.5 | GPT-5 High |\n| :----------------------------- | :-----------: | :--------------: | :--------------------: | :------------: | :---------------: | :--------: |\n| **推理**                  |               |                  |                        |                |                   |            |\n| MMLU-Pro                       |     84.9      |       84.6       |          85.0          |      90.1      |       88.2        |    87.5    |\n| GPQA-Diamond                   |     83.7      |       84.5       |          82.4          |      91.9      |       83.4        |    85.7    |\n| HLE (无工具)                 |     22.1      |       23.9       |          25.1          |      37.5      |       13.7        |    26.3    |\n| AIME 2025                      |     94.1      |       94.5       |          93.1          |      95.0      |       87.0        |    94.6    |\n| HMMT Feb. 2025                 |     84.4      |       89.4       |          92.5          |      97.5      |       79.2        |    88.3    |\n| LiveCodeBench-v6               |     80.6      |       83.1       |          83.3          |      90.7      |       64.0        |    84.5    |\n| **通用写作**            |               |                  |                        |                |                   |            |\n| Arena-Hard (困难提示)       |     54.1      |       71.9       |          53.4          |      72.6      |       63.3        |    71.9    |\n| Arena-Hard (创意写作)  |     86.2      |       80.1       |          88.8          |      93.6      |       76.7        |    92.2    |\n| **长上下文**               |               |                  |                        |                |                   |            |\n| LongBench V2                   |     60.6      |       45.1       |          58.4          |      65.6      |       61.8        |     -      |\n| MRCR                           |     45.7      |       44.2       |          55.5          |      89.7      |       55.4        |     -      |\n| **代码智能体**                 |               |                  |                        |                |                   |            |\n| SWE-Bench 验证版             |     73.4      |       71.3       |          73.1          |      76.2      |       77.2        |    74.9    |\n| SWE-Bench 多语言版         |     71.7      |       61.1       |          70.2          |       -        |       68.0        |    55.3    |\n| Terminal-Bench 困难版            |     30.5      |       30.6       |          35.4          |      39.0      |       33.3        |    30.5    |\n| Terminal-Bench 2.0             |     38.5      |       35.7       |          46.4          |      54.2      |       42.8        |    35.2    |\n| **通用智能体**              |               |                  |                        |                |                   |            |\n| BrowseComp                     |     45.4      |        -         |          51.4          |       -        |       24.1        |    54.9    |\n| BrowseComp (带上下文管理) |     58.3      |       60.2       |          67.6          |      59.2      |         -         |     -      |\n| $\\tau^2$-Bench                 |     80.3      |       74.3       |          80.3          |      85.4      |       84.7        |    80.2    |\n\n-----\n\n## 4. 模型架构\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"80%\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FXiaomiMiMo_MiMo-V2-Flash_readme_b5f026610d49.png\">\n\u003C\u002Fp>\n\n### 混合滑动窗口注意力\n\nMiMo-V2-Flash通过交错使用局部滑动窗口注意力（SWA）和全局注意力（GA），解决了长上下文带来的二次复杂度问题。\n\n  * **配置**: 由$M=8$个混合块堆叠而成。每个块包含$N=5$层SWA，随后是一层GA。\n  * **效率**: SWA层采用128个标记的窗口大小，显著减少了KV缓存。\n  * **Sink偏置**: 应用了可学习的注意力sink偏置，以在激进的窗口大小下仍保持性能。\n\n### 轻量级多标记预测（MTP）\n\n与传统的推测解码不同，我们的MTP模块在训练和推理中都是原生集成的。\n\n  * **结构**: 使用密集FFN（而非MoE）和SWA（而非GA），以保持较低的参数量（每个块0.33B）。\n  * **性能**: 促进了自推测解码，使生成速度提高三倍，并缓解了小批量强化学习训练中的GPU空闲问题。\n\n-----\n\n## 5. 训练后技术亮点\n\nMiMo-V2-Flash利用一套训练后流水线，通过创新的知识蒸馏和强化学习策略，最大限度地提升推理和智能体能力。\n\n### 5.1 多教师在线策略知识蒸馏（MOPD）\n\n我们引入了**多教师在线策略知识蒸馏（MOPD）**，这是一种将知识蒸馏表述为强化学习过程的新范式。\n* **密集的标记级指导**: 与依赖稀疏序列级反馈的方法不同，MOPD利用领域专家模型（教师）在每个标记位置提供监督。\n* **在线策略优化**: 学生模型从自身生成的响应中学习，而不是从固定的数据集中学习。这消除了暴露偏差，并确保更小、更稳定的梯度更新。\n* **固有的奖励鲁棒性**: 奖励来源于学生和教师之间的分布差异，使整个过程自然抵抗奖励欺骗。\n\n### 5.2 扩展智能体强化学习\n\n我们大幅扩展了智能体训练环境，以提升智能和泛化能力。\n* **大规模代码智能体环境**: 我们利用真实的GitHub问题创建了超过10万个可验证的任务。我们的自动化流水线维护着一个Kubernetes集群，能够运行超过1万个并发Pod，环境搭建成功率达到70%。\n* **面向Web开发的多模态验证器**: 对于Web开发任务，我们采用基于视觉的验证器，通过录制视频而非静态截图来评估代码执行情况。这减少了视觉幻觉，确保功能正确性。\n* **跨领域泛化**: 我们的实验表明，大规模的代码智能体强化学习训练能够有效泛化到其他领域，从而提升数学和通用智能体任务的表现。\n\n### 5.3 高级强化学习基础设施\n\n为了支持大规模MoE模型的高吞吐量强化学习训练，我们在SGLang和Megatron-LM的基础上实现了多项基础设施优化。\n* **Rollout路由重放（R3）**：解决推理与训练过程中MoE路由在数值精度上的不一致问题。R3在训练阶段复用推理阶段精确路由选择出的专家，从而确保一致性，且开销极低。\n* **请求级前缀缓存**：在多轮对话式智能体训练中，该缓存会存储前几轮的KV状态及路由选择的专家。它避免了重复计算，并保证各轮之间的采样一致性。\n* **细粒度数据调度器**：我们将推理引擎扩展为按细粒度序列而非微批次进行调度。结合部分推理机制，这显著减少了因长尾延迟任务导致的GPU空闲时间。\n* **工具箱与工具管理器**：采用两层设计，利用Ray Actor池来处理资源竞争问题。它消除了工具执行的冷启动延迟，并将任务逻辑与系统策略隔离开来。\n\n-----\n\n## 6. 推理与部署\n\nMiMo-V2-Flash支持FP8混合精度推理。我们推荐使用**SGLang**以获得最佳性能。\n\n### 使用SGLang快速入门\n\n请参照https:\u002F\u002Flmsys.org\u002Fblog\u002F2025-12-16-mimo-v2-flash\u002F，按照以下方式安装兼容版本的SGLang。\n\n```bash\npip install sglang==0.5.6.post2.dev8005+pr.15207.g39d5bd57a \\\n  --index-url https:\u002F\u002Fsgl-project.github.io\u002Fwhl\u002Fpr\u002F \\\n  --extra-index-url https:\u002F\u002Fpypi.org\u002Fsimple\n\n#启动服务端\nSGLANG_ENABLE_SPEC_V2=1 python3 -m sglang.launch_server \\\n        --model-path XiaomiMiMo\u002FMiMo-V2-Flash \\\n        --served-model-name mimo-v2-flash \\\n        --pp-size 1 \\\n        --dp-size 2 \\\n        --enable-dp-attention \\\n        --tp-size 8 \\\n        --moe-a2a-backend deepep \\\n        --page-size 1 \\\n        --host 0.0.0.0 \\\n        --port 9001 \\\n        --trust-remote-code \\\n        --mem-fraction-static 0.75 \\\n        --max-running-requests 128 \\\n        --chunked-prefill-size 16384 \\\n        --reasoning-parser qwen3 \\\n        --tool-call-parser mimo \\\n        --context-length 262144 \\\n        --attention-backend fa3 \\\n        --speculative-algorithm EAGLE \\\n        --speculative-num-steps 3 \\\n        --speculative-eagle-topk 1 \\\n        --speculative-num-draft-tokens 4 \\\n        --enable-mtp\n\n#发送请求\ncurl -i http:\u002F\u002Flocalhost:9001\u002Fv1\u002Fchat\u002Fcompletions \\\n    -H 'Content-Type:application\u002Fjson' \\\n    -d  '{\n            \"messages\" : [{\n                \"role\": \"user\",\n                \"content\": \"Nice to meet you MiMo\"\n            }],\n            \"model\": \"mimo-v2-flash\",\n            \"max_tokens\": 4096,\n            \"temperature\": 0.8,\n            \"top_p\": 0.95,\n            \"stream\": true,\n            \"chat_template_kwargs\": {\n                \"enable_thinking\": true\n            }\n        }'\n```\n\n### 通知事项\n\n#### 1. 系统提示词\n\n> [!IMPORTANT]\n> 强烈建议使用以下系统提示词，请从英文版或中文版中选择。\n\n英文：\n\n```plaintext\nYou are MiMo, an AI assistant developed by Xiaomi.\n\nToday's date: {date} {week}. Your knowledge cutoff date is December 2024.\n```\n\n中文：\n\n```plaintext\n你是MiMo（中文名称也是MiMo），是小米公司研发的AI智能助手。\n\n今天的日期：{date} {week}，你的知识截止日期是2024年12月。\n```\n\n#### 2. 采样参数\n\n> [!IMPORTANT]\n> 推荐的采样参数如下：\n>\n> `top_p=0.95`\n> \n> 数学、写作、Web开发等任务可设置为`temperature=0.8`；\n> \n> 对于代理类任务（如氛围编码、工具调用等），建议设置为`temperature=0.3`。\n\n#### 3. 工具使用注意事项\n\n> [!IMPORTANT]\n> 在多轮工具调用的思考模式下，模型会在返回`tool_calls`的同时附带`reasoning_content`字段。若要继续对话，用户必须将所有历史`reasoning_content`保留在后续每次请求的`messages`数组中。\n\n-----\n\n## 7. 引用\n\n如果您觉得我们的工作有所帮助，请引用我们的技术报告：\n\n```bibtex\n@misc{xiao2026mimov2flashtechnicalreport,\n      title={MiMo-V2-Flash 技术报告}, \n      author={LLM-Core Xiaomi},\n      year={2026},\n      eprint={2601.02780},\n      archivePrefix={arXiv},\n      primaryClass={cs.CL},\n      url={https:\u002F\u002Farxiv.org\u002Fabs\u002F2601.02780}, \n}\n```\n\n## 8. 联系方式\n\n如有任何问题，请通过[mimo@xiaomi.com](mailto:mimo@xiaomi.com)联系我们，加入下方的微信群组，或直接提交问题。\n\n\u003Cp align=\"center\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FXiaomiMiMo_MiMo-V2-Flash_readme_2951e13ddf1b.jpg\" width=\"20%\" \u002F>\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FXiaomiMiMo_MiMo-V2-Flash_readme_4ba994e7fdb5.jpg\" width=\"20%\" \u002F>\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FXiaomiMiMo_MiMo-V2-Flash_readme_ed0eaac9ae4a.jpg\" width=\"20%\" \u002F>\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FXiaomiMiMo_MiMo-V2-Flash_readme_4824cddb8d35.jpg\" width=\"20%\" \u002F>\n\u003C\u002Fp>","# MiMo-V2-Flash 快速上手指南\n\nMiMo-V2-Flash 是小米开源的混合专家（MoE）语言模型，拥有 **3090 亿总参数**（激活参数仅 **150 亿**）。该模型专为高速推理和智能体工作流设计，采用混合注意力架构和多令牌预测（MTP）技术，在保持长上下文能力的同时显著降低了推理成本。\n\n## 1. 环境准备\n\n在开始之前，请确保您的开发环境满足以下要求：\n\n*   **操作系统**: Linux (推荐 Ubuntu 20.04+)\n*   **Python 版本**: 3.10 或更高\n*   **GPU 要求**: \n    *   推荐使用 NVIDIA A100\u002FH100\u002FH800 等高性能显卡。\n    *   由于激活参数仅为 15B，单卡或多卡推理对显存需求相对友好，但加载全量权重仍需充足显存或使用量化\u002F卸载技术。\n*   **前置依赖**:\n    *   `torch` (建议 2.1+)\n    *   `transformers` (建议最新版以支持 MoE 架构)\n    *   `accelerate`\n    *   `huggingface_hub`\n\n> **国内加速建议**：\n> 推荐使用国内镜像源安装依赖，并配置 Hugging Face 镜像以加速模型下载。\n> ```bash\n> export HF_ENDPOINT=https:\u002F\u002Fhf-mirror.com\n> pip install torch transformers accelerate huggingface_hub -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n> ```\n\n## 2. 安装步骤\n\n### 步骤一：克隆仓库（可选）\n如果您需要参考官方示例代码或查看技术报告：\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002FXiaomiMiMo\u002FMiMo-V2-Flash.git\ncd MiMo-V2-Flash\n```\n\n### 步骤二：安装核心依赖\n创建虚拟环境并安装必要的 Python 包：\n```bash\npython -m venv mimo-env\nsource mimo-env\u002Fbin\u002Factivate\n\npip install torch torchvision torchaudio --index-url https:\u002F\u002Fdownload.pytorch.org\u002Fwhl\u002Fcu121\npip install transformers>=4.46.0 accelerate sentencepiece protobuf\n```\n\n### 步骤三：验证环境\n确保能够导入相关库且无报错：\n```bash\npython -c \"import torch; import transformers; print(f'PyTorch: {torch.__version__}')\"\n```\n\n## 3. 基本使用\n\n以下是最简单的使用示例，展示如何加载模型并进行文本生成。\n\n### 方法 A：使用 Transformers 原生加载\n\n```python\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\nimport torch\n\n# 模型名称 (可根据需求选择 Base 或指令微调版)\nmodel_name = \"XiaomiMiMo\u002FMiMo-V2-Flash\"\n\n# 加载分词器\ntokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n\n# 加载模型\n# 注意：建议使用 float16 或 bfloat16 以节省显存\n# 如果显存不足，可添加 device_map=\"auto\" 启用自动多卡\u002F卸载\nmodel = AutoModelForCausalLM.from_pretrained(\n    model_name,\n    torch_dtype=torch.bfloat16,\n    device_map=\"auto\",\n    trust_remote_code=True\n)\n\n# 准备输入\nprompt = \"请解释一下量子纠缠的基本概念。\"\ninputs = tokenizer(prompt, return_tensors=\"pt\").to(model.device)\n\n# 生成回复\noutputs = model.generate(\n    **inputs,\n    max_new_tokens=512,\n    do_sample=True,\n    temperature=0.7,\n    top_p=0.9\n)\n\n# 解码输出\nresponse = tokenizer.decode(outputs[0], skip_special_tokens=True)\nprint(response)\n```\n\n### 方法 B：利用 MTP 加速推理（进阶）\nMiMo-V2-Flash 内置了多令牌预测（MTP）模块，可显著提升推理速度。若使用官方提供的推理脚本或集成库，通常会自动启用此功能。确保您下载的模型包含 MTP 权重（官方已开源 3 层 MTP 权重）。\n\n### 在线体验\n如果您暂时无法本地部署，可以通过以下官方平台直接体验：\n*   **Xiaomi MiMo Studio**: [https:\u002F\u002Faistudio.xiaomimimo.com](https:\u002F\u002Faistudio.xiaomimimo.com)\n*   **API 平台**: [https:\u002F\u002Fplatform.xiaomimimo.com](https:\u002F\u002Fplatform.xiaomimimo.com)\n\n---\n**提示**：模型支持高达 **256k** 的上下文窗口。在处理长文档时，请确保您的显存足以支撑 KV Cache，或利用其混合注意力架构（滑动窗口 + 全局注意力）带来的显存优化优势。","某大型电商平台的后端团队正面临“双 11\"大促前的紧急任务，需要在 48 小时内重构并修复遗留系统中数百个复杂的订单处理微服务代码。\n\n### 没有 MiMo-V2-Flash 时\n- **推理成本高昂**：面对数百万行代码库的长上下文分析需求，传统大模型显存占用极大，导致团队不得不缩减并发实例数量，严重拖慢整体进度。\n- **响应速度迟缓**：在生成复杂逻辑代码或进行多步推理时，模型逐字输出的延迟过高，开发人员大量时间浪费在等待补全上，打断心流。\n- **长文档理解割裂**：由于上下文窗口限制或注意力机制效率低，模型难以同时兼顾全局架构文档与局部代码细节，常出现“顾头不顾尾”的逻辑错误。\n- **智能体执行乏力**：在自动修复 Bug（SWE-Bench 类任务）时，旧模型缺乏足够的代理规划能力，往往需要人工反复介入修正中间步骤。\n\n### 使用 MiMo-V2-Flash 后\n- **极致降本增效**：凭借 309B 总参数仅激活 15B 的 MoE 架构及混合注意力机制，KV 缓存减少近 6 倍，团队得以低成本部署高并发实例，全量扫描代码库。\n- **三倍输出加速**：利用多令牌预测（MTP）技术，代码生成速度提升三倍，开发者几乎能实时获得完整的函数实现，大幅缩短编码循环。\n- **超长上下文无损**：原生支持 256k 上下文窗口，MiMo-V2-Flash 能一次性读完整个微服务模块的设计文档与所有依赖文件，确保重构逻辑的全局一致性。\n- **自主闭环修复**：得益于强大的代理能力与强化学习训练，它能独立规划并执行复杂的代码修复任务，显著减少人工干预，按时交付高质量代码。\n\nMiMo-V2-Flash 通过平衡超长上下文理解与极致推理效率，将原本需要数周的高强度代码重构工作压缩至几天内高质量完成。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FXiaomiMiMo_MiMo-V2-Flash_27bb8220.jpg","XiaomiMiMo","Xiaomi MiMo","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002FXiaomiMiMo_72ce0558.jpg","Ignite every curiosity with creative spark.  Ask Mi Anything!",null,"mimo@xiaomi.com","https:\u002F\u002Fgithub.com\u002FXiaomiMiMo",1264,55,"2026-04-10T22:43:15","Apache-2.0",5,"","未说明（模型总参数量 309B，激活参数 15B，采用 FP8 混合精度训练，推测推理需要高性能多卡集群或专用推理框架支持）","未说明",{"notes":88,"python":86,"dependencies":89},"README 主要介绍了模型架构（混合注意力、多令牌预测 MTP）、参数量（309B 总参数\u002F15B 激活参数）、上下文长度（原生 32k，支持至 256k）及性能基准。文中未提供具体的本地部署环境需求（如操作系统、Python 版本、具体依赖库列表）。鉴于模型规模巨大，普通单机显存无法直接加载完整权重，通常需依赖特定的推理引擎或分布式环境。",[],[35,13],"2026-03-27T02:49:30.150509","2026-04-12T14:00:08.331339",[94,99],{"id":95,"question_zh":96,"answer_zh":97,"source_url":98},30670,"该模型的正确许可证是什么？博客提到是 MIT，但仓库显示是 Apache 2.0。","Huggingface 上的 MiMo-V2-Flash 模型采用 MIT 许可证。该 GitHub 仓库主要包含 README 和技术报告，因此使用了 Apache 2.0 许可证文件。","https:\u002F\u002Fgithub.com\u002FXiaomiMiMo\u002FMiMo-V2-Flash\u002Fissues\u002F2",{"id":100,"question_zh":101,"answer_zh":102,"source_url":103},30671,"README 中的论文链接地址不正确，正确的地址是什么？","正确的论文链接地址应为：https:\u002F\u002Fgithub.com\u002FXiaomiMiMo\u002FMiMo-V2-Flash\u002Fblob\u002Fmain\u002Fpaper.pdf","https:\u002F\u002Fgithub.com\u002FXiaomiMiMo\u002FMiMo-V2-Flash\u002Fissues\u002F6",[]]