[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-mistralai--mistral-finetune":3,"tool-mistralai--mistral-finetune":61},[4,18,28,36,45,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":24,"last_commit_at":25,"category_tags":26,"status":17},9989,"n8n","n8n-io\u002Fn8n","n8n 是一款面向技术团队的公平代码（fair-code）工作流自动化平台，旨在让用户在享受低代码快速构建便利的同时，保留编写自定义代码的灵活性。它主要解决了传统自动化工具要么过于封闭难以扩展、要么完全依赖手写代码效率低下的痛点，帮助用户轻松连接 400 多种应用与服务，实现复杂业务流程的自动化。\n\nn8n 特别适合开发者、工程师以及具备一定技术背景的业务人员使用。其核心亮点在于“按需编码”：既可以通过直观的可视化界面拖拽节点搭建流程，也能随时插入 JavaScript 或 Python 代码、调用 npm 包来处理复杂逻辑。此外，n8n 原生集成了基于 LangChain 的 AI 能力，支持用户利用自有数据和模型构建智能体工作流。在部署方面，n8n 提供极高的自由度，支持完全自托管以保障数据隐私和控制权，也提供云端服务选项。凭借活跃的社区生态和数百个现成模板，n8n 让构建强大且可控的自动化系统变得简单高效。",184740,2,"2026-04-19T23:22:26",[16,14,13,15,27],"插件",{"id":29,"name":30,"github_repo":31,"description_zh":32,"stars":33,"difficulty_score":10,"last_commit_at":34,"category_tags":35,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":24,"last_commit_at":42,"category_tags":43,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",161147,"2026-04-19T23:31:47",[14,13,44],"语言模型",{"id":46,"name":47,"github_repo":48,"description_zh":49,"stars":50,"difficulty_score":24,"last_commit_at":51,"category_tags":52,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",109154,"2026-04-18T11:18:24",[14,15,13],{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":24,"last_commit_at":59,"category_tags":60,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[27,13,15,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":67,"readme_en":68,"readme_zh":69,"quickstart_zh":70,"use_case_zh":71,"hero_image_url":72,"owner_login":73,"owner_name":74,"owner_avatar_url":75,"owner_bio":74,"owner_company":65,"owner_location":65,"owner_email":76,"owner_twitter":65,"owner_website":77,"owner_url":78,"languages":79,"stars":88,"forks":89,"last_commit_at":90,"license":91,"difficulty_score":10,"env_os":92,"env_gpu":93,"env_ram":92,"env_deps":94,"category_tags":100,"github_topics":65,"view_count":24,"oss_zip_url":65,"oss_zip_packed_at":65,"status":17,"created_at":101,"updated_at":102,"faqs":103,"releases":133},9891,"mistralai\u002Fmistral-finetune","mistral-finetune",null,"mistral-finetune 是 Mistral AI 官方推出的轻量级代码库，专为高效微调其系列大模型（如 Mistral 7B、Mixtral 8x7B\u002F8x22B、Mistral Nemo 及 Mistral Large v2）而设计。它主要解决了在有限显存资源下难以对大规模模型进行定制化训练的痛点，让开发者无需昂贵硬件即可轻松上手。\n\n该工具基于 LoRA（低秩适应）技术，其核心亮点在于冻结模型绝大部分参数，仅训练 1%-2% 的额外低秩矩阵权重。这种策略不仅大幅降低了内存占用，还保持了出色的模型性能。代码库针对单节点多卡环境进行了深度优化，同时对于 7B 等较小规模的模型，单张 GPU 也能胜任。此外，它还紧跟模型迭代，已兼容最新的 Mistral Large v2 和 Nemo 模型，并提供了针对性的显存与超参数建议。\n\nmistral-finetune 特别适合希望快速验证想法的 AI 开发者、研究人员以及需要构建垂直领域应用的技术团队。作为一个“意见鲜明”的入门级工具，它在数据格式等方面提供了标准化指引，旨在为用户提供一条简单、清晰的微调路径，帮助用户专注于业务逻辑","mistral-finetune 是 Mistral AI 官方推出的轻量级代码库，专为高效微调其系列大模型（如 Mistral 7B、Mixtral 8x7B\u002F8x22B、Mistral Nemo 及 Mistral Large v2）而设计。它主要解决了在有限显存资源下难以对大规模模型进行定制化训练的痛点，让开发者无需昂贵硬件即可轻松上手。\n\n该工具基于 LoRA（低秩适应）技术，其核心亮点在于冻结模型绝大部分参数，仅训练 1%-2% 的额外低秩矩阵权重。这种策略不仅大幅降低了内存占用，还保持了出色的模型性能。代码库针对单节点多卡环境进行了深度优化，同时对于 7B 等较小规模的模型，单张 GPU 也能胜任。此外，它还紧跟模型迭代，已兼容最新的 Mistral Large v2 和 Nemo 模型，并提供了针对性的显存与超参数建议。\n\nmistral-finetune 特别适合希望快速验证想法的 AI 开发者、研究人员以及需要构建垂直领域应用的技术团队。作为一个“意见鲜明”的入门级工具，它在数据格式等方面提供了标准化指引，旨在为用户提供一条简单、清晰的微调路径，帮助用户专注于业务逻辑而非底层架构适配。","# Mistral-finetune\n\n\u003Ca target=\"_blank\" href=\"https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Fmistralai\u002Fmistral-finetune\u002Fblob\u002Fmain\u002Ftutorials\u002Fmistral_finetune_7b.ipynb\">\n  \u003Cimg src=\"https:\u002F\u002Fcolab.research.google.com\u002Fassets\u002Fcolab-badge.svg\" alt=\"Open In Colab\"\u002F>\n\u003C\u002Fa>\n\n\n`mistral-finetune` is a light-weight codebase that enables memory-efficient and performant finetuning of Mistral's models.\nIt is based on [LoRA](https:\u002F\u002Farxiv.org\u002Fabs\u002F2106.09685), a training paradigm where most weights are frozen and only 1-2% of additional weights in the form of low-rank matrix perturbations are trained. \n\nFor maximum efficiency it is recommended to use an A100 or H100 GPU. The codebase is optimized \nfor multi-GPU-single-node training setups, but for smaller models, such as the 7B a single GPU suffices.\n\n> **Note**\n> \n> - The goal of this repository is to provide a simple, guided entrypoint to finetune Mistral models.\n> As such, it is fairly opinionated (especially around data formatting) and does not aim at being exhaustive\n> across multiple model architectures or hardware types.\n> For more generic approaches, you can check out some other great projects like \n> [torchtune](https:\u002F\u002Fpytorch.org\u002Ftorchtune\u002Fstable\u002Foverview.html).\n\n\n## News\n\n- **13.08.2024**: [Mistral Large v2](https:\u002F\u002Fmistral.ai\u002Fnews\u002Fmistral-large-2407\u002F) is now compatible with `mistral-finetune`!\n  - 1. Download the 123B Instruct [here](##model-download) and set `model_id_or_path` to the downloaded checkpoint dir.\n  - 2. Fine-tuning Mistral-Large v2 requires significantly more memory due to a larger model size. For now set `seq_len` to \u003C= 8192\n  - 3. It is recommended to use a lower learning rate as compared to other models, *e.g.* lr=1e-6 should work well for most cases.\n\n- **19.07.2024**: [Mistral Nemo](https:\u002F\u002Fmistral.ai\u002Fnews\u002Fmistral-nemo\u002F) is now compatible with `mistral-finetune`! \n  - 1. Download the 12B Base or Instruct [here](##model-download) and set `model_id_or_path` to the downloaded checkpoint dir.\n  - 2. Run `pip install --upgrade mistral-common` to have a version that supports the Tekkenizer (`>=1.3.1`).\n  - 3. Fine-tuning Mistral-Nemo requires currently much more memory due to a larger vocabulary size which spikes the peak memory requirement of the CE loss (we'll soon add an improved CE loss here). For now set `seq_len` to \u003C= 16384\n  - 4. It is recommended to use the same hyperparameters as for the 7B v3.\n\n## Installation\n\nTo get started with Mistral LoRA fine-tuning, follow these steps:\n\n1. Clone this repository:\n```\ncd $HOME && git clone https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune.git\n```\n\n2. Install all required dependencies:\n```\ncd mistral-finetune\npip install -r requirements.txt\n```\n\n## Model download\n\nWe recommend fine-tuning one of the official Mistral models which you can download here:\n\n| Model          | Link                                                                                                    | Checksum                          |\n|----------------|---------------------------------------------------------------------------------------------------------|-----------------------------------|\n| 7B Base V3       | [7B Base](https:\u002F\u002Fmodels.mistralcdn.com\u002Fmistral-7b-v0-3\u002Fmistral-7B-v0.3.tar)                            | `0663b293810d7571dad25dae2f2a5806`|\n| 7B Instruct v3 | [7B Instruct v3](https:\u002F\u002Fmodels.mistralcdn.com\u002Fmistral-7b-v0-3\u002Fmistral-7B-Instruct-v0.3.tar)             | `80b71fcb6416085bcb4efad86dfb4d52`|\n| 8x7B Base V1   | [8x7B Base](https:\u002F\u002Fhuggingface.co\u002Fmistralai\u002FMixtral-8x7B-v0.1)                                                                        | (HF link)                                |\n| 8x7B Instruct V1 | [8x7B Instruct](https:\u002F\u002Fmodels.mistralcdn.com\u002Fmixtral-8x7b-v0-1\u002FMixtral-8x7B-v0.1-Instruct.tar) | `8e2d3930145dc43d3084396f49d38a3f` |\n| 8x22 Instruct V3 | [8x22 Instruct](https:\u002F\u002Fmodels.mistralcdn.com\u002Fmixtral-8x22b-v0-3\u002Fmixtral-8x22B-Instruct-v0.3.tar)        | `471a02a6902706a2f1e44a693813855b`|\n| 8x22B Base V3  | [8x22B Base](https:\u002F\u002Fmodels.mistralcdn.com\u002Fmixtral-8x22b-v0-3\u002Fmixtral-8x22B-v0.3.tar)                        | `a2fa75117174f87d1197e3a4eb50371a`|\n| 12B Instruct | [12B Instruct (Mistral-Nemo)](https:\u002F\u002Fmodels.mistralcdn.com\u002Fmistral-nemo-2407\u002Fmistral-nemo-instruct-2407.tar) | `296fbdf911cb88e6f0be74cd04827fe7` |\n| 12B Base | [12 Base (Mistral-Nemo)](https:\u002F\u002Fmodels.mistralcdn.com\u002Fmistral-nemo-2407\u002Fmistral-nemo-base-2407.tar) | `c5d079ac4b55fc1ae35f51f0a3c0eb83` |\n| Mistral Large 2 | [123B Instruct (Large v2)](https:\u002F\u002Fmodels.mistralcdn.com\u002Fmistral-large-2407\u002Fmistral-large-instruct-2407.tar) | `fc602155f9e39151fba81fcaab2fa7c4` |\n\n**Important Notice**: For 8x7B Base V1 and 8x7B Instruct V1, it is necessary to use our v3 tokenizer and extend the vocabulary size to 32768 prior to fine-tuning. For detailed instructions on this process, please refer to the [\"Model extension\"](https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune?tab=readme-ov-file#model-extension) section. \n\nE.g., to download the 7B-base model you can run the following command:\n```sh\nmkdir -p ~\u002F${HOME}\u002Fmistral_models\ncd ${HOME} && wget https:\u002F\u002Fmodels.mistralcdn.com\u002Fmistral-7b-v0-3\u002Fmistral-7B-v0.3.tar\ntar -xf mistral-7B-v0.3.tar -C mistral_models\n```\n\nMake sure to modify your training script and add the path to the downloaded \nfolder as `model_id_or_path`.\n\nE.g., modify [example\u002F7B.yaml](https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune\u002Fblob\u002Fmain\u002Fexample\u002F7B.yaml) to include the absolute path to `$HOME\u002Fmistral_models\u002F7B`:\n\n```\nmodel_id_or_path: \"\u002FUsers\u002Fjohndoe\u002Fmistral_models\u002F7B\"\n```\n\n## Prepare dataset \n\nTo ensure effective training, `mistral-finetune` has strict \nrequirements for how the training data has to be formatted.\n\nAll data files must be stored in jsonl format files.\n\nYou can build two types of data files:\n\n### _Pretrain_:\n\nPretrain data corresponds to plain text data stored in the `\"text\"` key. E.g:\n\n```jsonl\n{\"text\": \"Text contained in document n°1\"}\n{\"text\": \"Text contained in document n°2\"}\n```\n\n### _Instruct_:\n\nCurrently two different types of instruction following data are supported:\n\n- _Instruct_: conversational data stored in the `\"messages\"` key in the form of a list. Each list item is a dictionary containing the `\"content\"` and `\"role\"` keys. `\"role\"` is a string being one of \"user\", \"assistant\" or \"system\". The loss will only be computed if \"role\" == \"assistant\". E.g.:\n\n```jsonl\n{\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"User interaction n°1 contained in document n°1\"\n    },\n    {\n      \"role\": \"assistant\",\n      \"content\": \"Bot interaction n°1 contained in document n°1\"\n    },\n    {\n      \"role\": \"user\",\n      \"content\": \"User interaction n°2 contained in document n°1\"\n    },\n    {\n      \"role\": \"assistant\",\n      \"content\": \"Bot interaction n°2 contained in document n°1\"\n    }\n  ]\n}\n{\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"User interaction n°1 contained in document n°2\"\n    },\n    {\n      \"role\": \"assistant\",\n      \"content\": \"Bot interaction n°1 contained in document n°2\"\n    },\n    {\n      \"role\": \"user\",\n      \"content\": \"User interaction n°2 contained in document n°2\"\n    },\n    {\n      \"role\": \"assistant\",\n      \"content\": \"Bot interaction n°2 contained in document n°2\",\n      \"weight\": 0,  # don't train on n°2\n    },\n    {\n      \"role\": \"user\",\n      \"content\": \"User interaction n°3 contained in document n°2\"\n    },\n    {\n      \"role\": \"assistant\",\n      \"content\": \"Bot interaction n°3 contained in document n°2\"\n    }\n  ]\n}\n```\n\n- _Function calling_: conversational data stored in the `\"messages\"` key in the form of a list. Each list item is a dictionary containing the `\"role\"` and `\"content\"` or `\"tool_calls\"` keys. `\"role\"` is a string being one of \"user\", \"assistant\", \"system\", or \"tool\". The loss will only be computed if \"role\" == \"assistant\".\n\n**Note**: In function calling the `\"id\"` of `\"tool_calls\"` and the `\"tool_call_id\"` are randomly generated strings of exactly 9 chars. We recommend to generate this automatically \nin a data preparation script as is done [here](https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune\u002Fblob\u002F208b25c0f7299bb78d06cea25b82adee03834319\u002Futils\u002Freformat_data_glaive.py#L74).\n\nE.g.:\n\n```jsonl\n{\n  \"messages\": [\n    {\n      \"role\": \"system\",\n      \"content\": \"You are a helpful assistant who has access to the following functions to help the user, you can use the functions if needed\"\n    },\n    {\n      \"role\": \"user\",\n      \"content\": \"Can you help me generate an anagram of the word \\\"listen\\\"?\"\n    },\n    {\n      \"role\": \"assistant\",\n      \"tool_calls\": [\n        {\n          \"id\": \"TX92Jm8Zi\",\n          \"type\": \"function\",\n          \"function\": {\n            \"name\": \"generate_anagram\",\n            \"arguments\": \"{\\\"word\\\": \\\"listen\\\"}\"\n          }\n        }\n      ]\n    },\n    {\n      \"role\": \"tool\",\n      \"content\": \"{\\\"anagram\\\": \\\"silent\\\"}\",\n      \"tool_call_id\": \"TX92Jm8Zi\"\n    },\n    {\n      \"role\": \"assistant\",\n      \"content\": \"The anagram of the word \\\"listen\\\" is \\\"silent\\\".\"\n    },\n    {\n      \"role\": \"user\",\n      \"content\": \"That's amazing! Can you generate an anagram for the word \\\"race\\\"?\"\n    },\n    {\n      \"role\": \"assistant\",\n      \"tool_calls\": [\n        {\n          \"id\": \"3XhQnxLsT\",\n          \"type\": \"function\",\n          \"function\": {\n            \"name\": \"generate_anagram\",\n            \"arguments\": \"{\\\"word\\\": \\\"race\\\"}\"\n          }\n        }\n      ]\n    }\n  ],\n  \"tools\": [\n    {\n      \"type\": \"function\",\n      \"function\": {\n        \"name\": \"generate_anagram\",\n        \"description\": \"Generate an anagram of a given word\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"word\": {\n              \"type\": \"string\",\n              \"description\": \"The word to generate an anagram of\"\n            }\n          },\n          \"required\": [\n            \"word\"\n          ]\n        }\n      }\n    }\n  ]\n}\n```\n\n## Verify dataset\n\nBefore starting a training run you should verify that your dataset is correctly formatted and get an \nestimation of the training time. You can do so by using the [.\u002Futils\u002Fvalidate_data](https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune\u002Fblob\u002Fmain\u002Futils\u002Fvalidate_data.py) script.\n\nNote that this step is crucial to ensure that the data is correctly formatted.\n\n### Instruction following\n\nLet's go over a simple example to train a model in instruction following:\n\n- 1. **Load a chunk of [Ultachat_200k](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FHuggingFaceH4\u002Fultrachat_200k)**\n\nCreate the data folder and navigate to the folder.\n```sh\ncd $HOME && mkdir -p data && cd $HOME\u002Fdata\n```\n\nLoad the data into a Pandas Dataframe. \n\n**Note**: Make sure to have pandas and pyarrow installed (`pip install pandas pyarrow`).\n\n```py\nimport pandas as pd\n\ndf = pd.read_parquet('https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FHuggingFaceH4\u002Fultrachat_200k\u002Fresolve\u002Fmain\u002Fdata\u002Ftest_gen-00000-of-00001-3d4cd8309148a71f.parquet')\n```\n- 2. Split into train and eval\n\n```py\ndf_train=df.sample(frac=0.95,random_state=200)\ndf_eval=df.drop(df_train.index)\n```\n\n- 3. Save data to jsonl\n\n```py\ndf_train.to_json(\"ultrachat_chunk_train.jsonl\", orient=\"records\", lines=True)\ndf_eval.to_json(\"ultrachat_chunk_eval.jsonl\", orient=\"records\", lines=True)\n```\n\n- 4. Modify your training yaml to include the ultrachat dataset and verify the yaml\n\nModify [example\u002F7B.yaml](https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune\u002Fblob\u002Fmain\u002Fexample\u002F7B.yaml) to include the absolute path to `$HOME\u002Fdata\u002Fultrachat_chunk_train.jsonl` as well as a dataset mixing weight for training and `$HOME\u002Fdata\u002Fultrachat_chunk_eval.jsonl` for eval, *e.g.*\n\n```\ndata:\n  instruct_data: \"\u002FUsers\u002Fjohndoe\u002Fdata\u002Fultrachat_chunk_train.jsonl\"\n  eval_instruct_data: \"\u002FUsers\u002Fjohndoe\u002Fdata\u002Fultrachat_chunk_eval.jsonl\"\n```\n\nNow you can verify your training yaml to make sure the data is correctly formatted and to get an estimate of your training time.\n\n```\ncd $HOME\u002Fmistral-finetune\npython -m utils.validate_data --train_yaml example\u002F7B.yaml\n```\n\nUpon completion you should see an error report with many of the following errors:\n\n```\nThe data in line 1412 of dataset \u002FUsers\u002Fjohndoe\u002Fdata\u002Fultrachat_chunk_eval.jsonl is incorrectly formatted. Expected last role to be one of: [assistant] but got user\nThe data in line 1413 of dataset \u002FUsers\u002Fjohndoe\u002Fdata\u002Fultrachat_chunk_eval.jsonl is incorrectly formatted. Expected last role to be one of: [assistant] but got user\nThe data in line 1414 of dataset \u002FUsers\u002Fjohndoe\u002Fdata\u002Fultrachat_chunk_eval.jsonl is incorrectly formatted. Expected last role to be one of: [assistant] but got user\nThe data in line 1415 of dataset \u002FUsers\u002Fjohndoe\u002Fdata\u002Fultrachat_chunk_eval.jsonl is incorrectly formatted. Expected last role to be one of: [assistant] but got user\n```\n\nMany conversations seem to end with the 'user' role which is unnecessary as we only train on 'assistant' messages and thus would unnecessarily process data.\n\nYou can make use of [.\u002Futils\u002Freformat_data.py](https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune\u002Fblob\u002Fmain\u002Futils\u002Freformat_data.py) to correct the data:\n\n```\ncd $HOME\u002Fmistral-finetune\npython -m utils.reformat_data $HOME\u002Fdata\u002Fultrachat_chunk_train.jsonl\npython -m utils.reformat_data $HOME\u002Fdata\u002Fultrachat_chunk_eval.jsonl\n```\n\nYou should see that a couple of samples will be skipped.\n\n- 5. Potentially change number of training steps\n\nUpon correction of the dataset, run the script again\n\n```\ncd $HOME\u002Fmistral-finetune\npython -m utils.validate_data --train_yaml example\u002F7B.yaml\n```\n\nYou should get a summary of the data input and training parameters:\n\n```\nTrain States\n --------------------\n{\n   \"expected\": {\n       \"eta\": \"00:52:44\",\n       \"data_tokens\": 25169147,\n       \"train_tokens\": 131072000,\n       \"epochs\": \"5.21\",\n       \"max_steps\": 500,\n       \"data_tokens_per_dataset\": {\n           \"\u002FUsers\u002Fjohndoe\u002Fdata\u002Fultrachat_chunk_train.jsonl\": \"25169147.0\"\n       },\n       \"train_tokens_per_dataset\": {\n           \"\u002FUsers\u002Fjohndoe\u002Fdata\u002Fultrachat_chunk_train.jsonl\": \"131072000.0\"\n       },\n       \"epochs_per_dataset\": {\n           \"\u002FUsers\u002Fjohndoe\u002Fdata\u002Fultrachat_chunk_train.jsonl\": \"5.2\"\n       }\n   },\n}\n```\n\nHaving `max_steps` set to 500 would lead to iterating through the dataset roughly 5 times which is reasonable, but might \nbe a bit too much. A recommended setting is shown below which would only take 30min on a 8xH100 cluster.\n\n### Function calling\n\nNext let's go over a more advanced use case to fine-tune a model on function calling.\nFunction calling requires the data to be in the format as [explained above](#instruct). Let's go over an example.\n\n- 1. **Load a chat-formatted version of the [Glaive function calling dataset](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FLocutusque\u002Ffunction-calling-chatml)**\n\nCreate the data folder and navigate to the folder.\n```sh\ncd $HOME && mkdir -p data && cd $HOME\u002Fdata\n```\n\nLoad the data into a Pandas Dataframe.\n\n**Note**: Make sure to have pandas and pyarrow installed (`pip install pandas pyarrow`).\n\n```py\nimport pandas as pd\n\ndf = pd.read_parquet('https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FLocutusque\u002Ffunction-calling-chatml\u002Fresolve\u002Fmain\u002Fdata\u002Ftrain-00000-of-00001-f0b56c6983b4a78f.parquet')\n```\n- 2. Split into train and eval\n\n```py\ndf_train=df.sample(frac=0.95,random_state=200)\ndf_eval=df.drop(df_train.index)\n```\n\n- 3. Save data to jsonl\n\n```py\ndf_train.to_json(\"glaive_train.jsonl\", orient=\"records\", lines=True)\ndf_eval.to_json(\"glaive_eval.jsonl\", orient=\"records\", lines=True)\n```\n\n- 4. Reformat dataset\n\nAs one can see the dataset does not follow the required function calling format, so it will need to be reformatted. Among other things `\"from\"` should be renamed to `\"user\"` and superfluous `\"\\n\"` characters should be removed.\nFor this dataset you can make use of [`.\u002Futils\u002Freformat_data_glaive.py`](https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune\u002Fblob\u002Fmain\u002Futils\u002Freformat_data_glaive.py):\n\n```\ncd $HOME\u002Fmistral-finetune\npython -m utils.reformat_data_glaive $HOME\u002Fdata\u002Fglaive_train.jsonl\npython -m utils.reformat_data_glaive $HOME\u002Fdata\u002Fglaive_eval.jsonl\n```\n\nRunning this command will make sure that most samples are in the correct format.\n\n**Note**: It is impossible to write reformatting scripts that work for all kinds of datasets. \nIf you have datasets that don't yet follow the required format above, you will most probably have to \ncreate a reformatting script yourself (mistral-chat or chat-gpt is your best friend here!).\n\n- 5. Validate dataset\n\nYou can now validate the dataset by setting `data.instruct_data` and `data.eval_instruct_data` to\n`$HOME\u002Fdata\u002Fglaive_train.jsonl` and `$HOME\u002Fdata\u002Fglaive_eval.jsonl` in `example\u002F7B.yaml` respectively.\n\nThe reformatted datasets still have some errors which can be removed with `--create_corrected`. For this, make sure to add\n`--create_corrected` as follows:\n\n```\ncd $HOME\u002Fmistral-finetune\npython -m utils.validate_data --train_yaml example\u002F7B.yaml --create_corrected\n```\n\nRunning this command will show a couple of errors and save two new datasets `$HOME\u002Fdata\u002Fglaive_train.jsonl.corrected` and `$HOME\u002Fdata\u002Fglaive_eval.jsonl.corrected`. Make sure to use these two dataset in `example\u002F7B.yaml` and run the command again. Now the dataset should be correctly formatted!\n\n\n## Start training\n\nHaving followed the [dataset verification section](#verify-dataset), we can now start training.\nFor faster training, we recommend setting max_steps to only 300. Make sure to define `run_dir` to your experiment folder and optionally set `wandb_project` to a Weights & Biases project for logging`, *e.g.*:\n```\nmax_steps: 300\nrun_dir: \"\u002FUsers\u002Fjohndoe\u002Fultra_chat_test\"\nwandb.project: ultra_chat\n```\n\nOptionally you can also set `wandb`\n\nSave the training configuration and start training! Make sure to set `--nproc-per-node` to the number of available GPUs.\n\n```\ncd $HOME\u002Fmistral-finetune\ntorchrun --nproc-per-node 8 --master_port $RANDOM -m train example\u002F7B.yaml\n```\n\nTraining on ultra-chat should take around 30min on a 8xH100 node and the resulting weights should give an MT Bench score around 6.3.\n\nTraining on glaive should take around 1h on a 8xH100 node and the resulting weights should work nicely for function calling.\n\n## Customizing training configuration\n\nThe example `mistral-finetune\u002Fexamples\u002F7B` defines reasonable parameters for learning rate, weight decay, etc... but you are advised to \ncustomize these settings for your use case.\n\nGenerally, a training configuration should fill the following parameters:\n\n- `model_id_or_path` defines the model to start training from. This can be a path to a pre-trained model or a local model directory.\n- `run_dir` defines the directory where training checkpoints and metrics are stored.\n- `seq_len` defines the sequence length for training. This is the maximum length of input sequences the model will process. Samples are packed to reach a length of `seq_len` for maximum training efficiency.\n- `batch_size` defines the number of training examples used per GPU. **Note**: The overall effective batch_size (in tokens) across all GPUs equals `num_gpus` x `batch_size` x `seq_len`.\n- `max_steps` defines the maximum number of training steps. This is the total number of iterations the training process will run. It can be adjusted based on the specific needs of your training scenario. Total number of tokens seen during training is `max_steps` x `num_gpus` x `batch_size` x `seq_len`.\n- `optim.lr` defines the learning rate. This is the initial learning rate for the optimizer.\n- `optim.weight_decay` defines weight decay. Weight decay is a regularization technique used to prevent overfitting by penalizing large weights. We recommend leaving it at 0.1.\n- `optim.pct_start` defines the percentage of the total training steps used for the learning rate warm-up phase before it starts to decrease. It corresponds to pct_start of PyTorch's OneCycleLR.\n- `lora.rank` defines the size of the LoRA (Low-Rank Adaptation) adapters. We recommend 64 or less, which adjusts the rank of the low-rank decomposition used in LoRA.\n- `seed` defines the random seed for initialization and data shuffling\u002Fsampling. Setting a seed ensures reproducibility of results.\n- `log_freq` defines the logging frequency. This specifies how often (in steps) to log training metrics.\n- `data.instruct_data` is the path to the instruction data used for training. This field has to be filled with one or multiple data sources in the format as explained above. Each data source should either be a path to a jsonl file or a path to a directory containing jsonl files followed by a weighting to define the importance of this dataset: `\u003Cpath\u002Fto\u002Fdata_source>:\u003Cweight>`. E.g.: `data.instruct_data: \"\u002Fpath\u002Fto\u002Fdata1.jsonl:5.,\u002Fpath\u002Fto\u002Fdata2.jsonl:1.,\u002Fpath\u002Fto\u002Fdir_of_jsonls:1.\"`\n- `data.data` is an optional path to additional pretraining data in the format as explained above. Note that this field can be left blank.\n- `data.eval_instruct_data` is an optional path to evaluation instruction data to run cross-validation at every `eval_freq` steps. Cross-validation metrics are displayed as `loss` and `perplexity`.\n- `eval_freq` defines how often (in steps) to evaluate the model. This specifies the interval at which the model is evaluated on the validation set.\n- `no_eval` is a flag to enable or disable intermediate evaluation. Setting it to False enables periodic evaluation during training.\n- `ckpt_freq` defines how often (in steps) to save checkpoints. This specifies the interval at which the model's state is saved.\n- `save_adapters` defines whether to only save the trained LoRA checkpoints or whether the trained LoRA should directly be merged into the base model and saved. **Note**: When setting `save_adapters=False` make sure that you have enough CPU and GPU memory to save the full model on a single process (this is usually only possible for the 7B model).\n- `wandb.key` is used to pass your Weights & Biases (wandb) API key for logging. This allows you to log training metrics to the wandb dashboard.\n- `wandb.project` defines the wandb project name. This is where the training run will be logged in the wandb interface.\n\n## Inference\n\nOnce your model is trained, you should try it out in inference. We recommend using [mistral-inference](https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-inference). \n\nMake sure to have `mistral_inference` correctly installed:\n```\npip install mistral_inference\n```\n\nAssuming your `lora.safetensors` is saved under `$HOME\u002Fultra_chat_test\u002Fcheckpoints\u002Fcheckpoint_000300\u002Fconsolidated\u002Flora.safetensors`, you can chat with the model using `mistral_inference`, *e.g.*:\n\n```sh\nmistral-chat \u002Fmnt\u002Fslow\u002Fruns\u002Fpatrick\u002Fmistral-finetune\u002F7B\u002F --max_tokens 256 --temperature 1.0 --instruct --lora_path $HOME\u002Fultra_chat_test\u002Fcheckpoints\u002Fcheckpoint_000300\u002Fconsolidated\u002Flora.safetensors\n```\n\n## Adding Weights and Biases (wandb) Support\n\nWe have added explicit support for [Weights and Biases](https:\u002F\u002Fwww.wandb.com\u002F) to help you monitor and visualize your training runs. This integration allows you to log various metrics and track experiments easily.\n\n### Setting Up Weights and Biases\n\nTo use Weights and Biases with `mistral-finetune`, follow these steps:\n\n1. **Install Weights and Biases:**\n\n   Make sure you have the `wandb` library installed. You can install it using pip:\n\n```sh\n   pip install wandb\n```\n### Viewing Your Logs\n\nOnce the training starts, you can monitor the progress in real-time by visiting your wandb project dashboard. All metrics, including training loss, evaluation loss, learning rate, etc., will be logged and visualized.\n\nFor more details on how to use wandb, visit the [Weights and Biases documentation](https:\u002F\u002Fdocs.wandb.ai\u002F).\n\n## Model extension\n\n**Important**: Note that one can only fine-tune mistral models that are compatible with the v3 tokenizer which entails that the models have a vocabulary size of 32768 - not 32000. One can however easily extend older version of vocabulary size 32000 to have a vocabulary size of 32768 by using:\n```\npython -m utils.extend_model_vocab --original_model_ckpt \u002Ffolder\u002Fto\u002Fold\u002Fmodel --extended_model_ckpt \u002Ffolder\u002Fto\u002Fextended\u002Fmodel\n```\n\nOnce the extension has worked, one can fine-tune using the newly created model checkpoint in `\u002Ffolder\u002Fto\u002Fextended\u002Fmodel`.\n\n## FAQ:\n\n> - What's the best practice of fine-tuning MoEs?\n\nWe see a higher degree of performance variance in when fine-tuning MoE models. It's not unusual to find that fine-tuning MoE models with different seeds can lead to a high variance in performance. We did not observe such a high variance with dense models. Therefore, we suggest running multiple instances of the same fine-tuning process on MoEs models and selecting the one that performs best.\n\n> - How can I determine the number of tokens used during the model training process?\n  \nYou can use the following script to find out: https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune\u002Fblob\u002Fmain\u002Futils\u002Fvalidate_data.py. This script accepts a .yaml training file as input and returns the number of tokens the model is being trained on.\n\n> - What should I do if I encounter a CUDA out-of-memory error?\n  \nOne possible solution is to reduce the batch size per GPU. The batch size is equal to `seq_len` x `batch_size`. Try setting `batch_size` to 1 and reduce `seq_len`. You can define the `batch_size` and `seq_len` in the .yaml file.\n\n## License\n\nThis library is licensed under the Apache 2.0 License. See the [LICENCE](.\u002FLICENCE) file for more information.\n\n*You must not use this library or our models in a manner that infringes, misappropriates, or otherwise violates any third party’s rights, including intellectual property rights.*\n","# Mistral-finetune\n\n\u003Ca target=\"_blank\" href=\"https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Fmistralai\u002Fmistral-finetune\u002Fblob\u002Fmain\u002Ftutorials\u002Fmistral_finetune_7b.ipynb\">\n  \u003Cimg src=\"https:\u002F\u002Fcolab.research.google.com\u002Fassets\u002Fcolab-badge.svg\" alt=\"Open In Colab\"\u002F>\n\u003C\u002Fa>\n\n\n`mistral-finetune` 是一个轻量级代码库，支持对 Mistral 模型进行高效且节省显存的微调。它基于 [LoRA](https:\u002F\u002Farxiv.org\u002Fabs\u002F2106.09685)，这是一种训练范式：大部分权重被冻结，仅对以低秩矩阵扰动形式存在的 1-2% 额外权重进行训练。\n\n为获得最佳效率，建议使用 A100 或 H100 GPU。该代码库针对多 GPU 单节点训练环境进行了优化，但对于较小的模型（如 7B），单个 GPU 也足够。\n\n> **注意**\n> \n> - 本仓库的目标是提供一个简单、有指导性的入口，用于微调 Mistral 模型。\n> 因此，它在某些方面（尤其是数据格式）较为固定，并不旨在覆盖多种模型架构或硬件类型。\n> 如果您需要更通用的方法，可以参考其他优秀的项目，例如 [torchtune](https:\u002F\u002Fpytorch.org\u002Ftorchtune\u002Fstable\u002Foverview.html)。\n\n\n## 最新消息\n\n- **2024年8月13日**：[Mistral Large v2](https:\u002F\u002Fmistral.ai\u002Fnews\u002Fmistral-large-2407\u002F) 现已兼容 `mistral-finetune`！\n  - 1. 请从 [这里](##model-download) 下载 123B 的 Instruct 版本，并将 `model_id_or_path` 设置为下载的检查点目录。\n  - 2. 微调 Mistral-Large v2 由于模型规模较大，需要显著更多的显存。目前请将 `seq_len` 设置为 ≤ 8192。\n  - 3. 建议使用比其他模型更低的学习率，例如 lr=1e-6 在大多数情况下效果良好。\n\n- **2024年7月19日**：[Mistral Nemo](https:\u002F\u002Fmistral.ai\u002Fnews\u002Fmistral-nemo\u002F) 现已兼容 `mistral-finetune`！\n  - 1. 请从 [这里](##model-download) 下载 12B 的 Base 或 Instruct 版本，并将 `model_id_or_path` 设置为下载的检查点目录。\n  - 2. 运行 `pip install --upgrade mistral-common` 以获取支持 Tekkenizer 的版本（≥1.3.1）。\n  - 3. 目前，微调 Mistral-Nemo 由于词汇表规模更大，导致交叉熵损失的峰值显存需求激增（我们很快会在此处添加改进的交叉熵损失）。因此，暂时请将 `seq_len` 设置为 ≤ 16384。\n  - 4. 建议使用与 7B v3 相同的超参数。\n\n## 安装\n\n要开始使用 Mistral LoRA 进行微调，请按照以下步骤操作：\n\n1. 克隆本仓库：\n```\ncd $HOME && git clone https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune.git\n```\n\n2. 安装所有必需的依赖项：\n```\ncd mistral-finetune\npip install -r requirements.txt\n```\n\n## 模型下载\n\n我们推荐微调官方的 Mistral 模型之一，您可以在这里下载：\n\n| 模型          | 链接                                                                                                    | 校验和                          |\n|----------------|---------------------------------------------------------------------------------------------------------|-----------------------------------|\n| 7B Base V3       | [7B Base](https:\u002F\u002Fmodels.mistralcdn.com\u002Fmistral-7b-v0-3\u002Fmistral-7B-v0.3.tar)                            | `0663b293810d7571dad25dae2f2a5806`|\n| 7B Instruct v3 | [7B Instruct v3](https:\u002F\u002Fmodels.mistralcdn.com\u002Fmistral-7b-v0-3\u002Fmistral-7B-Instruct-v0.3.tar)             | `80b71fcb6416085bcb4efad86dfb4d52`|\n| 8x7B Base V1   | [8x7B Base](https:\u002F\u002Fhuggingface.co\u002Fmistralai\u002FMixtral-8x7B-v0.1)                                                                        | (HF 链接)                                |\n| 8x7B Instruct V1 | [8x7B Instruct](https:\u002F\u002Fmodels.mistralcdn.com\u002Fmixtral-8x7b-v0-1\u002FMixtral-8x7B-v0.1-Instruct.tar) | `8e2d3930145dc43d3084396f49d38a3f` |\n| 8x22 Instruct V3 | [8x22 Instruct](https:\u002F\u002Fmodels.mistralcdn.com\u002Fmixtral-8x22b-v0-3\u002Fmixtral-8x22B-Instruct-v0.3.tar)        | `471a02a6902706a2f1e44a693813855b`|\n| 8x22B Base V3  | [8x22B Base](https:\u002F\u002Fmodels.mistralcdn.com\u002Fmixtral-8x22b-v0-3\u002Fmixtral-8x22B-v0.3.tar)                        | `a2fa75117174f87d1197e3a4eb50371a`|\n| 12B Instruct | [12B Instruct (Mistral-Nemo)](https:\u002F\u002Fmodels.mistralcdn.com\u002Fmistral-nemo-2407\u002Fmistral-nemo-instruct-2407.tar) | `296fbdf911cb88e6f0be74cd04827fe7` |\n| 12B Base | [12 Base (Mistral-Nemo)](https:\u002F\u002Fmodels.mistralcdn.com\u002Fmistral-nemo-2407\u002Fmistral-nemo-base-2407.tar) | `c5d079ac4b55fc1ae35f51f0a3c0eb83` |\n| Mistral Large 2 | [123B Instruct (Large v2)](https:\u002F\u002Fmodels.mistralcdn.com\u002Fmistral-large-2407\u002Fmistral-large-instruct-2407.tar) | `fc602155f9e39151fba81fcaab2fa7c4` |\n\n**重要提示**：对于 8x7B Base V1 和 8x7B Instruct V1，必须使用我们的 v3 分词器，并在微调之前将词汇表大小扩展到 32768。有关此过程的详细说明，请参阅“模型扩展”部分：[https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune?tab=readme-ov-file#model-extension]。\n\n例如，要下载 7B-base 模型，可以运行以下命令：\n```sh\nmkdir -p ~\u002F${HOME}\u002Fmistral_models\ncd ${HOME} && wget https:\u002F\u002Fmodels.mistralcdn.com\u002Fmistral-7b-v0-3\u002Fmistral-7B-v0.3.tar\ntar -xf mistral-7B-v0.3.tar -C mistral_models\n```\n\n请务必修改您的训练脚本，并将下载文件夹的路径作为 `model_id_or_path` 添加进去。\n\n例如，修改 [example\u002F7B.yaml](https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune\u002Fblob\u002Fmain\u002Fexample\u002F7B.yaml)，加入 `$HOME\u002Fmistral_models\u002F7B` 的绝对路径：\n\n```\nmodel_id_or_path: \"\u002FUsers\u002Fjohndoe\u002Fmistral_models\u002F7B\"\n```\n\n## 准备数据集 \n\n为确保训练效果，`mistral-finetune` 对训练数据的格式有严格要求。\n\n所有数据文件必须以 jsonl 格式存储。\n\n您可以构建两种类型的数据文件：\n\n### _预训练_：\n\n预训练数据对应于存储在 `\"text\"` 键中的纯文本数据。例如：\n\n```jsonl\n{\"text\": \"文档第1号中包含的文本\"}\n{\"text\": \"文档第2号中包含的文本\"}\n```\n\n### _指令_：\n\n目前支持两种不同类型的指令遵循数据：\n\n- _指令_：以列表形式存储在 `\"messages\"` 键中的对话数据。每个列表项是一个字典，包含 `\"content\"` 和 `\"role\"` 键。`\"role\"` 是一个字符串，取值为 `\"user\"`、`\"assistant\"` 或 `\"system\"`。只有当 `\"role\" == \"assistant\"` 时才会计算损失。例如：\n\n```jsonl\n{\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"文档1中的用户交互第1条\"\n    },\n    {\n      \"role\": \"assistant\",\n      \"content\": \"文档1中的机器人交互第1条\"\n    },\n    {\n      \"role\": \"user\",\n      \"content\": \"文档1中的用户交互第2条\"\n    },\n    {\n      \"role\": \"assistant\",\n      \"content\": \"文档1中的机器人交互第2条\"\n    }\n  ]\n}\n{\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"文档2中的用户交互第1条\"\n    },\n    {\n      \"role\": \"assistant\",\n      \"content\": \"文档2中的机器人交互第1条\"\n    },\n    {\n      \"role\": \"user\",\n      \"content\": \"文档2中的用户交互第2条\"\n    },\n    {\n      \"role\": \"assistant\",\n      \"content\": \"文档2中的机器人交互第2条\",\n      \"weight\": 0,  # 不对第2条进行训练\n    },\n    {\n      \"role\": \"user\",\n      \"content\": \"文档2中的用户交互第3条\"\n    },\n    {\n      \"role\": \"assistant\",\n      \"content\": \"文档2中的机器人交互第3条\"\n    }\n  ]\n}\n```\n\n- _函数调用_：以列表形式存储在 `\"messages\"` 键中的对话数据。每个列表项是一个字典，包含 `\"role\"` 和 `\"content\"` 或 `\"tool_calls\"` 键。`\"role\"` 是一个字符串，取值为 `\"user\"`、`\"assistant\"`、`\"system\"` 或 `\"tool\"`。只有当 `\"role\" == \"assistant\"` 时才会计算损失。\n\n**注意**：在函数调用中，`\"tool_calls\"` 的 `\"id\"` 和 `\"tool_call_id\"` 是随机生成的、长度恰好为9个字符的字符串。我们建议在数据准备脚本中自动生成这些ID，如[此处](https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune\u002Fblob\u002F208b25c0f7299bb78d06cea25b82adee03834319\u002Futils\u002Freformat_data_glaive.py#L74)所示。\n\n例如：\n\n```jsonl\n{\n  \"messages\": [\n    {\n      \"role\": \"system\",\n      \"content\": \"你是一位助手，可以访问以下函数来帮助用户，必要时可以调用这些函数\"\n    },\n    {\n      \"role\": \"user\",\n      \"content\": \"你能帮我生成单词‘listen’的字谜吗？\"\n    },\n    {\n      \"role\": \"assistant\",\n      \"tool_calls\": [\n        {\n          \"id\": \"TX92Jm8Zi\",\n          \"type\": \"function\",\n          \"function\": {\n            \"name\": \"generate_anagram\",\n            \"arguments\": \"{\\\"word\\\": \\\"listen\\\"}\"\n          }\n        }\n      ]\n    },\n    {\n      \"role\": \"tool\",\n      \"content\": \"{\\\"anagram\\\": \\\"silent\\\"}\",\n      \"tool_call_id\": \"TX92Jm8Zi\"\n    },\n    {\n      \"role\": \"assistant\",\n      \"content\": \"单词‘listen’的字谜是‘silent’。\"\n    },\n    {\n      \"role\": \"user\",\n      \"content\": \"太棒了！那你能再生成一个‘race’的字谜吗？\"\n    },\n    {\n      \"role\": \"assistant\",\n      \"tool_calls\": [\n        {\n          \"id\": \"3XhQnxLsT\",\n          \"type\": \"function\",\n          \"function\": {\n            \"name\": \"generate_anagram\",\n            \"arguments\": \"{\\\"word\\\": \\\"race\\\"}\"\n          }\n        }\n      ]\n    }\n  ],\n  \"tools\": [\n    {\n      \"type\": \"function\",\n      \"function\": {\n        \"name\": \"generate_anagram\",\n        \"description\": \"生成给定单词的字谜\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"word\": {\n              \"type\": \"string\",\n              \"description\": \"要生成字谜的单词\"\n            }\n          },\n          \"required\": [\n            \"word\"\n          ]\n        }\n      }\n    }\n  ]\n}\n```\n\n## 数据集验证\n\n在开始训练之前，您应该验证数据集的格式是否正确，并估算训练所需时间。您可以使用 [.\u002Futils\u002Fvalidate_data](https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune\u002Fblob\u002Fmain\u002Futils\u002Fvalidate_data.py) 脚本来完成此操作。\n\n请注意，这一步骤至关重要，可确保数据格式正确无误。\n\n### 指令遵循\n\n让我们通过一个简单的示例来训练一个指令遵循模型：\n\n- 1. **加载一段 [Ultachat_200k](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FHuggingFaceH4\u002Fultrachat_200k) 数据**\n\n创建数据文件夹并进入该文件夹。\n```sh\ncd $HOME && mkdir -p data && cd $HOME\u002Fdata\n```\n\n将数据加载到 Pandas DataFrame 中。\n\n**注意**：请确保已安装 pandas 和 pyarrow（`pip install pandas pyarrow`）。\n\n```py\nimport pandas as pd\n\ndf = pd.read_parquet('https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FHuggingFaceH4\u002Fultrachat_200k\u002Fresolve\u002Fmain\u002Fdata\u002Ftest_gen-00000-of-00001-3d4cd8309148a71f.parquet')\n```\n- 2. 划分训练集和评估集\n\n```py\ndf_train=df.sample(frac=0.95,random_state=200)\ndf_eval=df.drop(df_train.index)\n```\n\n- 3. 将数据保存为 jsonl 格式\n\n```py\ndf_train.to_json(\"ultrachat_chunk_train.jsonl\", orient=\"records\", lines=True)\ndf_eval.to_json(\"ultrachat_chunk_eval.jsonl\", orient=\"records\", lines=True)\n```\n\n- 4. 修改训练配置文件以包含 ultrachat 数据集，并验证配置文件\n\n修改 [example\u002F7B.yaml](https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune\u002Fblob\u002Fmain\u002Fexample\u002F7B.yaml)，加入 `$HOME\u002Fdata\u002Fultrachat_chunk_train.jsonl` 的绝对路径以及训练用的数据集混合权重，同时加入 `$HOME\u002Fdata\u002Fultrachat_chunk_eval.jsonl` 作为评估数据，例如：\n\n```\ndata:\n  instruct_data: \"\u002FUsers\u002Fjohndoe\u002Fdata\u002Fultrachat_chunk_train.jsonl\"\n  eval_instruct_data: \"\u002FUsers\u002Fjohndoe\u002Fdata\u002Fultrachat_chunk_eval.jsonl\"\n```\n\n现在可以验证你的训练配置文件，以确保数据格式正确，并估算训练所需时间。\n\n```\ncd $HOME\u002Fmistral-finetune\npython -m utils.validate_data --train_yaml example\u002F7B.yaml\n```\n\n运行完成后，你应该会看到类似以下的错误报告：\n\n```\n\u002FUsers\u002Fjohndoe\u002Fdata\u002Fultrachat_chunk_eval.jsonl 数据集中第1412行的数据格式不正确。期望最后一个角色是 [assistant]，但实际是 user。\n\u002FUsers\u002Fjohndoe\u002Fdata\u002Fultrachat_chunk_eval.jsonl 数据集中第1413行的数据格式不正确。期望最后一个角色是 [assistant]，但实际是 user。\n\u002FUsers\u002Fjohndoe\u002Fdata\u002Fultrachat_chunk_eval.jsonl 数据集中第1414行的数据格式不正确。期望最后一个角色是 [assistant]，但实际是 user。\n\u002FUsers\u002Fjohndoe\u002Fdata\u002Fultrachat_chunk_eval.jsonl 数据集中第1415行的数据格式不正确。期望最后一个角色是 [assistant]，但实际是 user。\n```\n\n许多对话似乎以 `user` 角色结束，而我们只训练 `assistant` 消息，因此这些不必要的 `user` 角色会导致数据被无谓地处理。\n\n你可以使用 [.\u002Futils\u002Freformat_data.py](https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune\u002Fblob\u002Fmain\u002Futils\u002Freformat_data.py) 来修正数据：\n\n```\ncd $HOME\u002Fmistral-finetune\npython -m utils.reformat_data $HOME\u002Fdata\u002Fultrachat_chunk_train.jsonl\npython -m utils.reformat_data $HOME\u002Fdata\u002Fultrachat_chunk_eval.jsonl\n```\n\n你可能会发现有少数样本被跳过。\n\n- 5. 可能需要调整训练步数\n\n在修正数据集后，再次运行脚本：\n\n```\ncd $HOME\u002Fmistral-finetune\npython -m utils.validate_data --train_yaml example\u002F7B.yaml\n```\n\n你应该会得到关于数据输入和训练参数的摘要：\n\n```\n训练状态\n --------------------\n{\n   \"expected\": {\n       \"eta\": \"00:52:44\",\n       \"data_tokens\": 25169147,\n       \"train_tokens\": 131072000,\n       \"epochs\": \"5.21\",\n       \"max_steps\": 500,\n       \"data_tokens_per_dataset\": {\n           \"\u002FUsers\u002Fjohndoe\u002Fdata\u002Fultrachat_chunk_train.jsonl\": \"25169147.0\"\n       },\n       \"train_tokens_per_dataset\": {\n           \"\u002FUsers\u002Fjohndoe\u002Fdata\u002Fultrachat_chunk_train.jsonl\": \"131072000.0\"\n       },\n       \"epochs_per_dataset\": {\n           \"\u002FUsers\u002Fjohndoe\u002Fdata\u002Fultrachat_chunk_train.jsonl\": \"5.2\"\n       }\n   },\n}\n```\n\n将 `max_steps` 设置为 500 步意味着大约会遍历数据集 5 次，这在合理范围内，但可能稍显过多。推荐的设置如下，这样在一个 8xH100 集群上只需约 30 分钟即可完成训练。\n\n### 函数调用\n\n接下来，我们来看一个更高级的用例，即微调一个支持函数调用的模型。函数调用要求数据必须按照 [上述说明](#instruct) 的格式进行组织。下面是一个示例。\n\n- 1. **加载 [Glaive 函数调用数据集](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FLocutusque\u002Ffunction-calling-chatml) 的聊天格式版本**\n\n创建数据文件夹并进入该文件夹。\n```sh\ncd $HOME && mkdir -p data && cd $HOME\u002Fdata\n```\n\n将数据加载到 Pandas DataFrame 中。\n\n**注意**：请确保已安装 pandas 和 pyarrow（`pip install pandas pyarrow`）。\n\n```py\nimport pandas as pd\n\ndf = pd.read_parquet('https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FLocutusque\u002Ffunction-calling-chatml\u002Fresolve\u002Fmain\u002Fdata\u002Ftrain-00000-of-00001-f0b56c6983b4a78f.parquet')\n```\n- 2. 划分训练集和评估集\n\n```py\ndf_train=df.sample(frac=0.95,random_state=200)\ndf_eval=df.drop(df_train.index)\n```\n\n- 3. 将数据保存为 jsonl 格式\n\n```py\ndf_train.to_json(\"glaive_train.jsonl\", orient=\"records\", lines=True)\ndf_eval.to_json(\"glaive_eval.jsonl\", orient=\"records\", lines=True)\n```\n\n- 4. 重新格式化数据集\n\n可以看出，该数据集并不符合所需的函数调用格式，因此需要进行重新格式化。例如，应将 `\"from\"` 改名为 `\"user\"`，并移除多余的 `\"\\n\"` 字符。对于这个数据集，你可以使用 [`.\u002Futils\u002Freformat_data_glaive.py`](https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune\u002Fblob\u002Fmain\u002Futils\u002Freformat_data_glaive.py)：\n\n```\ncd $HOME\u002Fmistral-finetune\npython -m utils.reformat_data_glaive $HOME\u002Fdata\u002Fglaive_train.jsonl\npython -m utils.reformat_data_glaive $HOME\u002Fdata\u002Fglaive_eval.jsonl\n```\n\n运行此命令后，大多数样本应该会符合正确的格式。\n\n**注意**：不可能编写适用于所有类型数据集的重新格式化脚本。如果你的数据尚未符合上述要求的格式，很可能需要自己编写重新格式化脚本（此时 mistral-chat 或 chat-gpt 就是你最好的帮手！）。\n\n- 5. 验证数据集\n\n现在可以在 `example\u002F7B.yaml` 中将 `data.instruct_data` 和 `data.eval_instruct_data` 分别设置为 `$HOME\u002Fdata\u002Fglaive_train.jsonl` 和 `$HOME\u002Fdata\u002Fglaive_eval.jsonl`，以验证数据集。\n\n经过重新格式化的数据集仍然存在一些错误，可以通过 `--create_corrected` 参数来修复。为此，请按如下方式添加 `--create_corrected`：\n\n```\ncd $HOME\u002Fmistral-finetune\npython -m utils.validate_data --train_yaml example\u002F7B.yaml --create_corrected\n```\n\n运行此命令后，系统会显示一些错误，并生成两个新的数据集 `$HOME\u002Fdata\u002Fglaive_train.jsonl.corrected` 和 `$HOME\u002Fdata\u002Fglaive_eval.jsonl.corrected`。请务必在 `example\u002F7B.yaml` 中使用这两个数据集，然后再次运行命令。此时，数据集应该已经正确格式化了！\n\n## 开始训练\n\n在完成了[数据集验证部分](#verify-dataset)之后，我们现在可以开始训练了。为了加快训练速度，我们建议将`max_steps`设置为仅300步。请确保将`run_dir`定义为你实验的文件夹，并可选地将`wandb_project`设置为一个用于日志记录的Weights & Biases项目，例如：\n```\nmax_steps: 300\nrun_dir: \"\u002FUsers\u002Fjohndoe\u002Fultra_chat_test\"\nwandb.project: ultra_chat\n```\n\n你也可以选择性地设置`wandb`\n\n保存训练配置并开始训练！请务必把`--nproc-per-node`设置为可用的GPU数量。\n\n```\ncd $HOME\u002Fmistral-finetune\ntorchrun --nproc-per-node 8 --master_port $RANDOM -m train example\u002F7B.yaml\n```\n\n在ultra-chat数据集上进行训练，在一台配备8块H100显卡的节点上大约需要30分钟，最终得到的权重应该能在MT Bench上取得约6.3分的成绩。\n\n而在glaive数据集上进行训练，则大约需要1小时，在同样的硬件条件下，生成的权重将非常适合用于函数调用任务。\n\n## 自定义训练配置\n\n示例配置`mistral-finetune\u002Fexamples\u002F7B`已经为学习率、权重衰减等参数设定了合理的值，但建议你根据自己的使用场景对这些设置进行调整。\n\n一般来说，训练配置应包含以下参数：\n\n- `model_id_or_path`：指定开始训练的基础模型。这可以是预训练模型的路径，也可以是本地模型目录。\n- `run_dir`：指定存储训练检查点和指标的目录。\n- `seq_len`：定义训练时的序列长度。这是模型能够处理的最大输入序列长度。为了提高训练效率，样本会被打包到`seq_len`的长度。\n- `batch_size`：每张GPU使用的训练样本数。**注意**：所有GPU上的总有效批量大小（以token数计）等于`num_gpus` × `batch_size` × `seq_len`。\n- `max_steps`：最大训练步数。这是训练过程将运行的总迭代次数。可以根据具体的训练需求进行调整。整个训练过程中看到的总token数为`max_steps` × `num_gpus` × `batch_size` × `seq_len`。\n- `optim.lr`：学习率。这是优化器的初始学习率。\n- `optim.weight_decay`：权重衰减。权重衰减是一种正则化技术，通过惩罚过大的权重来防止过拟合。我们建议将其保持在0.1。\n- `optim.pct_start`：在学习率开始下降之前，用于学习率预热阶段的训练总步数百分比。它对应于PyTorch的OneCycleLR中的pct_start。\n- `lora.rank`：LoRA（低秩适应）适配器的规模。我们推荐设置为64或更小，这会调整LoRA中使用的低秩分解的秩。\n- `seed`：初始化以及数据打乱和采样的随机种子。设置种子可以确保结果的可重复性。\n- `log_freq`：日志记录频率。这指定了每隔多少步记录一次训练指标。\n- `data.instruct_data`：用于训练的指令数据路径。该字段必须填写一个或多个数据源，格式如上文所述。每个数据源可以是jsonl文件的路径，也可以是包含jsonl文件的目录路径，并在其后加上权重以定义该数据集的重要性：`\u003Cpath\u002Fto\u002Fdata_source>:\u003Cweight>`。例如：`data.instruct_data: \"\u002Fpath\u002Fto\u002Fdata1.jsonl:5.,\u002Fpath\u002Fto\u002Fdata2.jsonl:1.,\u002Fpath\u002Fto\u002Fdir_of_jsonls:1.\"`\n- `data.data`：可选的额外预训练数据路径，格式同上。请注意，此字段可以留空。\n- `data.eval_instruct_data`：可选的评估指令数据路径，用于每隔`eval_freq`步进行交叉验证。交叉验证指标将以`loss`和`perplexity`的形式显示。\n- `eval_freq`：模型评估的频率。这指定了模型在验证集上进行评估的间隔。\n- `no_eval`：中间评估的开关标志。将其设置为False即可在训练过程中定期进行评估。\n- `ckpt_freq`：检查点保存频率。这指定了模型状态被保存的间隔。\n- `save_adapters`：决定是仅保存训练好的LoRA检查点，还是将训练好的LoRA直接合并到基础模型中并保存。**注意**：当设置`save_adapters=False`时，请确保有足够的CPU和GPU内存来在一个进程中保存完整的模型（通常只有7B模型才可能做到这一点）。\n- `wandb.key`：用于传递你的Weights & Biases（wandb）API密钥以便进行日志记录。这样你可以将训练指标记录到wandb仪表板上。\n- `wandb.project`：指定wandb项目的名称。训练过程的所有信息都将被记录在这个项目中。\n\n## 推理\n\n一旦你的模型训练完成，你应该尝试对其进行推理测试。我们推荐使用[mistral-inference](https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-inference)。\n\n请确保正确安装了`mistral_inference`：\n```\npip install mistral_inference\n```\n\n假设你的`lora.safetensors`保存在`$HOME\u002Fultra_chat_test\u002Fcheckpoints\u002Fcheckpoint_000300\u002Fconsolidated\u002Flora.safetensors`，那么你可以使用`mistral_inference`与模型对话，例如：\n```sh\nmistral-chat \u002Fmnt\u002Fslow\u002Fruns\u002Fpatrick\u002Fmistral-finetune\u002F7B\u002F --max_tokens 256 --temperature 1.0 --instruct --lora_path $HOME\u002Fultra_chat_test\u002Fcheckpoints\u002Fcheckpoint_000300\u002Fconsolidated\u002Flora.safetensors\n```\n\n## 添加Weights and Biases（wandb）支持\n\n我们已明确添加了对[Weights and Biases](https:\u002F\u002Fwww.wandb.com\u002F)的支持，以帮助你监控和可视化训练过程。这一集成使你可以轻松记录各种指标并跟踪实验。\n\n### 设置Weights and Biases\n\n要将Weights and Biases与`mistral-finetune`结合使用，请按照以下步骤操作：\n\n1. **安装Weights and Biases：**\n\n   确保已安装`wandb`库。你可以通过pip进行安装：\n```sh\n   pip install wandb\n```\n### 查看你的日志\n\n训练开始后，你可以通过访问你的wandb项目仪表板实时监控训练进度。所有的指标，包括训练损失、评估损失、学习率等，都会被记录并可视化。\n\n有关如何使用wandb的更多详细信息，请参阅[Weights and Biases文档](https:\u002F\u002Fdocs.wandb.ai\u002F)。\n\n## 模型扩展\n\n**重要提示**：请注意，只能对兼容v3分词器的Mistral模型进行微调，这意味着这些模型的词汇表大小必须是32768，而不是32000。不过，你可以很容易地将旧版词汇表大小为32000的模型扩展到32768，方法如下：\n```\npython -m utils.extend_model_vocab --original_model_ckpt \u002Ffolder\u002Fto\u002Fold\u002Fmodel --extended_model_ckpt \u002Ffolder\u002Fto\u002Fextended\u002Fmodel\n```\n\n扩展完成后，你就可以使用新创建的模型检查点（位于`\u002Ffolder\u002Fto\u002Fextended\u002Fmodel`）来进行微调了。\n\n## 常见问题解答：\n\n> - 微调 MoE 模型的最佳实践是什么？\n\n我们在微调 MoE 模型时观察到性能差异较大。使用不同随机种子对 MoE 模型进行微调，往往会导致性能出现显著波动。而在密集模型中，我们并未观察到如此大的差异。因此，我们建议对 MoE 模型运行多次相同的微调过程，并选择表现最佳的实例。\n\n> - 如何确定模型训练过程中使用的令牌数量？\n\n您可以使用以下脚本进行查询：https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune\u002Fblob\u002Fmain\u002Futils\u002Fvalidate_data.py。该脚本接受一个 .yaml 格式的训练配置文件作为输入，并输出模型正在训练的令牌总数。\n\n> - 如果遇到 CUDA 内存不足错误，该怎么办？\n\n一种可能的解决方案是减少每个 GPU 的批次大小。批次大小等于 `seq_len` 乘以 `batch_size`。您可以尝试将 `batch_size` 设置为 1，并相应地减小 `seq_len`。您可以在 .yaml 配置文件中定义 `batch_size` 和 `seq_len`。\n\n## 许可证\n\n本库采用 Apache 2.0 许可证授权。更多信息请参阅 [LICENCE](.\u002FLICENCE) 文件。\n\n*您不得以侵犯、盗用或以其他方式违反任何第三方权利（包括知识产权）的方式使用本库或我们的模型。*","# Mistral-finetune 快速上手指南\n\n`mistral-finetune` 是 Mistral AI 官方提供的轻量级代码库，专为高效、低显存占用地微调 Mistral 系列模型（如 7B, Nemo, Large v2 等）而设计。它基于 LoRA 技术，仅训练少量参数，适合单卡或多卡单机环境。\n\n## 环境准备\n\n### 系统要求\n- **操作系统**: Linux (推荐) 或 macOS\n- **GPU**: \n  - 推荐：NVIDIA A100 或 H100 以获得最佳效率。\n  - 最小配置：对于 7B 模型，单张消费级 GPU（如 RTX 3090\u002F4090）即可运行。\n  - 注意：微调 Mistral-Nemo (12B) 或 Mistral-Large (123B) 需要更大的显存。\n- **Python**: 3.10 或更高版本\n\n### 前置依赖\n确保已安装以下基础工具：\n- `git`\n- `pip`\n- `cuda` (驱动版本需与 PyTorch 兼容)\n\n> **提示**：国内用户建议在安装 Python 依赖时使用清华或阿里镜像源加速：\n> `pip install -r requirements.txt -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple`\n\n## 安装步骤\n\n1. **克隆仓库**\n   ```bash\n   cd $HOME && git clone https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune.git\n   ```\n\n2. **进入目录并安装依赖**\n   ```bash\n   cd mistral-finetune\n   pip install -r requirements.txt\n   ```\n   *(若需微调 Mistral-Nemo 模型，请额外执行：`pip install --upgrade mistral-common`)*\n\n3. **下载模型权重**\n   从官方链接下载模型并解压。以 **Mistral 7B Base V3** 为例：\n   ```bash\n   mkdir -p ~\u002Fmistral_models\n   cd ~\n   wget https:\u002F\u002Fmodels.mistralcdn.com\u002Fmistral-7b-v0-3\u002Fmistral-7B-v0.3.tar\n   tar -xf mistral-7B-v0.3.tar -C mistral_models\n   ```\n   *其他模型（如 Instruct 版、Nemo、Large v2）下载地址请参考项目 README 中的表格。*\n\n## 基本使用\n\n### 1. 准备数据集\n`mistral-finetune` 对数据格式有严格要求，必须为 `jsonl` 格式。\n\n**指令微调 (Instruct) 数据示例** (`data.jsonl`)：\n```jsonl\n{\"messages\": [{\"role\": \"user\", \"content\": \"你好\"}, {\"role\": \"assistant\", \"content\": \"你好！有什么我可以帮你的吗？\"}]}\n{\"messages\": [{\"role\": \"user\", \"content\": \"写一首诗\"}, {\"role\": \"assistant\", \"content\": \"春眠不觉晓...\"}]}\n```\n*注意：只有 `role` 为 `assistant` 的内容会参与损失计算。*\n\n### 2. 配置训练参数\n复制示例配置文件并根据实际情况修改。主要需更改 `model_id_or_path` 和數據路径。\n\n```bash\ncp example\u002F7B.yaml my_config.yaml\n```\n\n编辑 `my_config.yaml`：\n```yaml\nmodel_id_or_path: \"\u002Froot\u002Fmistral_models\u002F7B\"  # 替换为你下载的模型绝对路径\ndata:\n  instruct_data: \"\u002Froot\u002Fdata\u002Ftrain.jsonl\"    # 替换为你的训练数据路径\n  eval_instruct_data: \"\u002Froot\u002Fdata\u002Feval.jsonl\" # 替换为你的评估数据路径（可选）\n# 其他超参数可根据需求调整，如 learning_rate, seq_len 等\n```\n\n### 3. 验证数据格式（强烈推荐）\n在开始训练前，先运行验证脚本检查数据格式并预估训练时间：\n```bash\npython -m utils.validate_data --train_yaml my_config.yaml\n```\n如果输出无报错且显示预估时间，则数据格式正确。\n\n### 4. 启动微调\n使用以下命令开始训练：\n```bash\npython -m train --config my_config.yaml\n```\n\n训练完成后，LoRA 适配器权重将保存在指定输出目录中，可配合原始模型进行推理。","某电商公司的算法团队需要将通用的 Mistral 7B 大模型快速改造为精通自家商品知识库和售后话术的专属客服助手。\n\n### 没有 mistral-finetune 时\n- **显存门槛极高**：传统全量微调需要加载全部参数梯度，单张消费级显卡无法运行，必须租用昂贵的多卡 A100\u002FH100 集群，成本高昂。\n- **配置复杂易错**：手动搭建 LoRA 训练环境需编写大量样板代码，且在数据格式对齐、分词器适配上容易出错，调试周期长达数天。\n- **资源利用率低**：缺乏针对 Mistral 架构的深度优化，训练过程中显存峰值波动大，经常因内存溢出（OOM）导致任务中断。\n- **迭代效率低下**：从数据准备到模型产出耗时过长，难以支持业务部门对促销策略变更所需的“小时级”模型更新需求。\n\n### 使用 mistral-finetune 后\n- **硬件成本骤降**：基于高效的 LoRA 范式，仅训练 1-2% 的参数，使得单张 GPU 即可流畅完成 7B 模型的微调，大幅降低算力预算。\n- **开箱即用体验**：提供标准化的数据格式指引和预设脚本，团队只需关注业务数据本身，半天内即可完成从环境部署到启动训练的全过程。\n- **训练稳定高效**：代码库针对 Mistral 模型结构进行了专项内存优化，显著降低峰值显存占用，确保长序列训练过程稳定不崩溃。\n- **敏捷响应业务**：训练速度显著提升，算法团队能在促销活动期间实时根据新话术调整模型，实现了“上午更新数据，下午上线模型”的敏捷闭环。\n\nmistral-finetune 通过极致的内存效率和简化的工作流，让中小企业也能以低成本实现大模型的垂直领域定制化落地。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmistralai_mistral-finetune_f40dbebd.png","mistralai","Mistral AI","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fmistralai_7094b1e0.png","contact@mistral.ai","mistral.ai","https:\u002F\u002Fgithub.com\u002Fmistralai",[80,84],{"name":81,"color":82,"percentage":83},"Python","#3572A5",52.9,{"name":85,"color":86,"percentage":87},"Jupyter Notebook","#DA5B0B",47.1,3088,312,"2026-04-17T14:17:41","Apache-2.0","未说明","必需。推荐使用 NVIDIA A100 或 H100 GPU。针对 7B 模型单卡即可，多卡需单机多卡设置。微调 Mistral-Large v2 (123B) 和 Mistral-Nemo (12B) 需要显著更多的显存。",{"notes":95,"python":92,"dependencies":96},"该工具基于 LoRA 进行内存高效的微调。对于 8x7B V1 模型，微调前必须使用 v3 tokenizer 并将词表扩展至 32768。微调 Mistral-Large v2 时建议序列长度设为\u003C=8192 并使用较低学习率 (如 1e-6)。微调 Mistral-Nemo 时建议序列长度设为\u003C=16384。训练数据必须严格格式化为 jsonl 文件，并在运行前使用 validate_data 脚本验证。",[97,98,99],"mistral-common>=1.3.1 (针对 Mistral Nemo)","pandas","pyarrow",[44,14],"2026-03-27T02:49:30.150509","2026-04-20T10:37:11.883285",[104,109,114,119,124,129],{"id":105,"question_zh":106,"answer_zh":107,"source_url":108},44436,"如何微调 Mistral 8x7B 和 8x22B 模型？需要多少显卡资源？","微调 8x7B 和 8x22B 的方法与微调 7B 模型完全相同。硬件需求方面：微调 8x7B 建议使用 2-4 张 80GB 显存的 GPU；微调 8x22B 则需要 8 张 80GB 显存的 GPU。Mistral 的 MoE（混合专家）路由等机制已内置支持，无需特殊实现。","https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune\u002Fissues\u002F11",{"id":110,"question_zh":111,"answer_zh":112,"source_url":113},44437,"在 V100 GPU 上运行报错或不支持怎么办？如何解决显存溢出（OOM）问题？","V100 不支持 bfloat16，需将数据类型改为 float16。解决方法有两种：\n1. 修改代码：在 train.py 中将 dtype 从 torch.bfloat16 改为 torch.float16。\n2. 修改配置：在 YAML 配置文件（如 7B.yaml）中添加一行 compute_dtype: torch.float16。\n\n若仍遇到显存溢出（OOM），建议调整以下超参数：\n- 将序列长度（seq_len）从 64K 降低至 8192。\n- 将 LoRA Rank 从 64 降低至 16。\n这些调整可显著降低显存占用并启用多卡分片加载。","https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune\u002Fissues\u002F107",{"id":115,"question_zh":116,"answer_zh":117,"source_url":118},44438,"遇到 'ValueError: setting an array element with a sequence' 或数组形状不均匀的错误如何修复？","该错误通常由数据解析或数组处理逻辑引起，官方已通过提交修复了此问题。请更新代码库到包含以下修复提交的版本：\nhttps:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune\u002Fcommit\u002F656df1c94c80ca9703ebc471c9f106c9b7a0bfa7\n拉取最新代码后重新运行即可解决。","https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune\u002Fissues\u002F99",{"id":120,"question_zh":121,"answer_zh":122,"source_url":123},44439,"工具定义（Tools）无法被完全识别或 _parse_available_tools 方法返回不全怎么办？","这是一个已知的代码逻辑缺陷，导致部分定义的工具未被正确解析。维护者已合并了相关的修复代码。请确保您的代码库已更新到最新版本，以包含对该方法的修复，从而能正确返回所有定义的工具。","https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune\u002Fissues\u002F77",{"id":125,"question_zh":126,"answer_zh":127,"source_url":128},44440,"运行数据验证脚本时出现 'ValueError: could not convert string to float' 错误是什么原因？","该错误通常发生在解析数据源权重时，原因是配置文件或数据源字符串格式不正确，导致程序试图将非数字字符串（如文件路径的一部分）转换为浮点数。请检查您的 YAML 配置文件中数据源路径的格式，确保没有多余字符干扰权重解析，并确认数据文件路径书写正确且无截断。","https:\u002F\u002Fgithub.com\u002Fmistralai\u002Fmistral-finetune\u002Fissues\u002F66",{"id":130,"question_zh":131,"answer_zh":132,"source_url":113},44441,"如何在配置文件中启用混合精度训练（如 qLora）？","虽然 train.py 源码支持混合精度，但在标准 YAML 配置中需手动指定数据类型。对于不支持 bfloat16 的显卡（如 V100），请在配置文件中添加 compute_dtype: torch.float16。若要使用更高级的量化技术（如 qLora），可能需要进一步修改源码或等待官方配置模板更新，目前主要通过调整 LoRA 秩（rank）和序列长度来优化显存使用。",[]]