[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-NovaSearch-Team--RAG-Retrieval":3,"tool-NovaSearch-Team--RAG-Retrieval":62},[4,18,26,36,46,54],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",160015,2,"2026-04-18T11:30:52",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":42,"last_commit_at":43,"category_tags":44,"status":17},8272,"opencode","anomalyco\u002Fopencode","OpenCode 是一款开源的 AI 编程助手（Coding Agent），旨在像一位智能搭档一样融入您的开发流程。它不仅仅是一个代码补全插件，而是一个能够理解项目上下文、自主规划任务并执行复杂编码操作的智能体。无论是生成全新功能、重构现有代码，还是排查难以定位的 Bug，OpenCode 都能通过自然语言交互高效完成，显著减少开发者在重复性劳动和上下文切换上的时间消耗。\n\n这款工具专为软件开发者、工程师及技术研究人员设计，特别适合希望利用大模型能力来提升编码效率、加速原型开发或处理遗留代码维护的专业人群。其核心亮点在于完全开源的架构，这意味着用户可以审查代码逻辑、自定义行为策略，甚至私有化部署以保障数据安全，彻底打破了传统闭源 AI 助手的“黑盒”限制。\n\n在技术体验上，OpenCode 提供了灵活的终端界面（Terminal UI）和正在测试中的桌面应用程序，支持 macOS、Windows 及 Linux 全平台。它兼容多种包管理工具，安装便捷，并能无缝集成到现有的开发环境中。无论您是追求极致控制权的资深极客，还是渴望提升产出的独立开发者，OpenCode 都提供了一个透明、可信",144296,1,"2026-04-16T14:50:03",[13,45],"插件",{"id":47,"name":48,"github_repo":49,"description_zh":50,"stars":51,"difficulty_score":32,"last_commit_at":52,"category_tags":53,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",109154,"2026-04-18T11:18:24",[14,15,13],{"id":55,"name":56,"github_repo":57,"description_zh":58,"stars":59,"difficulty_score":32,"last_commit_at":60,"category_tags":61,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[45,13,15,14],{"id":63,"github_repo":64,"name":65,"description_en":66,"description_zh":67,"ai_summary_zh":67,"readme_en":68,"readme_zh":69,"quickstart_zh":70,"use_case_zh":71,"hero_image_url":72,"owner_login":73,"owner_name":74,"owner_avatar_url":75,"owner_bio":76,"owner_company":77,"owner_location":77,"owner_email":77,"owner_twitter":77,"owner_website":77,"owner_url":78,"languages":79,"stars":88,"forks":89,"last_commit_at":90,"license":91,"difficulty_score":10,"env_os":92,"env_gpu":93,"env_ram":92,"env_deps":94,"category_tags":100,"github_topics":102,"view_count":32,"oss_zip_url":77,"oss_zip_packed_at":77,"status":17,"created_at":108,"updated_at":109,"faqs":110,"releases":141},9266,"NovaSearch-Team\u002FRAG-Retrieval","RAG-Retrieval","Unify Efficient Fine-tuning of  RAG Retrieval, including Embedding, ColBERT, ReRanker.","RAG-Retrieval 是一个专为检索增强生成（RAG）系统打造的一站式开源工具，旨在统一并简化检索模型的高效微调、推理与知识蒸馏流程。它主要解决了开发者在构建 RAG 应用时，面对嵌入模型（Embedding）、晚期交互模型（如 ColBERT）及重排序模型（Reranker）等多种架构时，训练代码分散、推理接口不统一的痛点。\n\n该工具非常适合从事自然语言处理的研究人员、算法工程师以及希望优化搜索效果的 AI 开发者使用。其核心亮点在于提供了端到端的完整代码支持：在训练阶段，兼容各类开源模型（如 BGE、BCE、GTE 等），支持基于 BERT 或大语言模型（LLM）架构的微调；在推理阶段，通过轻量级 Python 库提供统一调用接口，极大降低了部署复杂度；此外，它还支持将大型模型的知识蒸馏至小型模型，以平衡性能与效率。近期更新更涵盖了针对位置偏差的实证研究及 MRL 损失函数实现，展现了其在前沿技术探索上的深度。RAG-Retrieval 以简洁优雅的代码结构著称，让用户能轻松上手并根据需求灵活定制。","\u003Ch1 align=\"center\">RAG-Retrieval\u003C\u002Fh1>\n\u003Cp align=\"center\">\n    \u003Ca href=\"https:\u002F\u002Fpypi.org\u002Fproject\u002Frag-retrieval\u002F#description\">\n            \u003Cimg alt=\"Build\" src=\"https:\u002F\u002Fimg.shields.io\u002Fpypi\u002Fv\u002Frag-retrieval?color=brightgreen\">\n    \u003C\u002Fa>\n\u003C!--     \u003Ca href=\"https:\u002F\u002Fwww.pepy.tech\u002Fprojects\u002Frag-retrieval\">\n            \u003Cimg alt=\"Build\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FNovaSearch-Team_RAG-Retrieval_readme_24c9ace793cf.png\">\n    \u003C\u002Fa> -->\n    \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FNLPJCL\u002FRAG-Retrieval\">\n            \u003Cimg alt=\"Build\" src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FContribution-Welcome-blue\">\n    \u003C\u002Fa>\n    \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FNLPJCL\u002FRAG-Retrieval\u002Fblob\u002Fmaster\u002FLICENSE\">\n        \u003Cimg alt=\"License\" src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FLICENSE-MIT-green\">\n    \u003C\u002Fa>\n\u003C\u002Fp>\n\n[English](.\u002FREADME.md) | [中文](.\u002FREADME_zh.md)\n\nThe RAG-Retrieval offers end-to-end code for training, inference, and distillation of the RAG retrieval model.\n- For training, **RAG-Retrieval supports fine-tuning of any open-source RAG retrieval models**, including embedding models (figure a,bert-based, llm-based), late interactive models (figure d,colbert), and reranker models (figure c,bert-based, llm-based).\n- For inference, RAG-Retrieval focuses reranker and has developed a lightweight Python library [rag-retrieval](https:\u002F\u002Fpypi.org\u002Fproject\u002Frag-retrieval\u002F), **which provides a unified way to call any different RAG ranking models.**\n- For distillation, **Distillation of support embedding models and reranker models**, support distill from a larger model to a smaller model (0.5b llm or bert-base).\n\n![ColBERT](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FNovaSearch-Team_RAG-Retrieval_readme_c836d2b01937.png)\n\n\n# Communication between communities\n\n[Join our WeChat group chat](https:\u002F\u002Fwww.notion.so\u002FRAG-Retrieval-Roadmap-c817257e3e8a484b8850cac40a3fcf88)\n\n# News\n\n- 🔥 **22\u002F05\u002F2025**: RAG-Retrieval released Myopic Trap, an empirical study of positional bias across the full IR pipeline. We systematically evaluate a range of SOTA retrieval models—including BM25, dense embeddings, ColBERT-style models, and rerankers—on two carefully designed position-aware benchmarks: SQuAD-PosQ and FineWeb-PosQ. [Learn more](.\u002Fexamples\u002FMyopicTrap\u002F)\n\n- **29\u002F12\u002F2024**: RAG-Retrieval released the core training code (stage3) of Stella and Jasper embedding model [Jasper and Stella: distillation of SOTA embedding models](https:\u002F\u002Farxiv.org\u002Fabs\u002F2412.19048).\n\n- **21\u002F10\u002F2024**: RAG-Retrieval released two different methods for Reranker tasks based on LLM, as well as a method for distilling them into BERT. [Best Practices for LLM in Reranker Tasks? A Simple Experiment Report (with code)](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F987727357)\n\n- **05\u002F06\u002F2024**: Implementation of MRL loss for the Embedding model in RAG-Retrieval. [RAG-Retrieval: Making MRL Loss a Standard for Training Vector (Embedding) Models](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F701884479)\n\n- **02\u002F06\u002F2024**: RAG-Retrieval implements LLM preference-based supervised fine-tuning of the RAG retriever. [RAG-Retrieval Implements LLM Preference-Based Supervised Fine-Tuning of the RAG Retriever](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F701215443)\n\n- **05\u002F05\u002F2024**: Released a lightweight Python library for RAG-Retrieval. [RAG-Retrieval: Your RAG Application Deserves a better infer framework](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F692404995)\n\n- **18\u002F03\u002F2024**: Released RAG-Retrieval [Introduction to RAG-Retrieval on Zhihu](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F683483778)\n\n\n\n# Features\n\n- **Simple yet Elegant**: Rejects complex, with a simple and understandable code structure for easy modifications.\n- **Supports end-to-end fine-tuning of RAG retrieval models**: Embedding (bert-based, llm-based), late interaction models (colbert), and reranker models (bert-based, llm-based).\n- **Supports fine-tuning of any open-source RAG retrieval models**: Compatible with most open-source embedding and reranker models, such as: bge (bge-embedding, bge-m3, bge-reranker), bce (bce-embedding, bce-reranker), gte (gte-embedding, gte-multilingual-reranker-base).\n- **Supports distillation of larger models into smaller models**: Enables the distillation of larger LLM-based reranker or embedding models into smaller ones (e.g., a 0.5B-parameter LLM or BERT-base).\n- **Advanced Algorithms**: For embedding models, supports the [MRL algorithm](https:\u002F\u002Farxiv.org\u002Fabs\u002F2205.13147) to reduce the dimensionality of output vectors and [Stella distillation method](https:\u002F\u002Farxiv.org\u002Fabs\u002F2412.19048).\n- **Multi-gpu training strategy**: Includes deepspeed, fsdp.\n\n\n# Quick Start\n\n## Installation\nFor training (all):\n```bash\nconda create -n rag-retrieval python=3.8 && conda activate rag-retrieval\n# To avoid incompatibility between the automatically installed torch and the local cuda, it is recommended to manually install the compatible version of torch before proceeding to the next step.\npip install -r requirements.txt \n```\nFor prediction (reranker):\n```bash\n# To avoid incompatibility between the automatically installed torch and the local cuda, it is recommended to manually install the compatible version of torch before proceeding to the next step.\npip install rag-retrieval\n```\n\n## Training\n\nFor different model types, please go into different subdirectories. For example:\nFor [embedding](https:\u002F\u002Fgithub.com\u002FNLPJCL\u002FRAG-Retrieval\u002Ftree\u002Fmaster\u002Frag_retrieval\u002Ftrain\u002Fembedding), and similarly for others. Detailed procedures can be found in the README file in each subdirectories.\n```bash\ncd .\u002Frag_retrieval\u002Ftrain\u002Fembedding\nbash train_embedding.sh\n```\n\n## inference\n\nRAG-Retrieval has developed a lightweight Python library, [rag-retrieval](https:\u002F\u002Fpypi.org\u002Fproject\u002Frag-retrieval\u002F), which provides a unified interface for calling various RAG reranker models with the following features:\n\n- Supports multiple ranking models: Compatible with common open-source ranking models (Cross Encoder Reranker, Decoder-Only LLM Reranker).\n\n- Long document friendly: Supports two different handling logics for long documents (maximum length truncation and splitting to take the maximum score).\n\n- Easy to Extend: If there is a new ranking model, users only need to inherit from BaseReranker and implement the rank and compute_score functions.\n\n**For detailed usage and considerations of the rag-retrieval package, please refer to the [Tutorial](https:\u002F\u002Fgithub.com\u002FNLPJCL\u002FRAG-Retrieval\u002Fblob\u002Fmaster\u002Fexamples\u002FReranker_Tutorial.md)**\n\n\n\n# Experimental Results\n\n\n## Results of the reranker model on the MTEB Reranking task\n\n\n|      **Model**       |  **Model Size(GB)**  |**T2Reranking** | **MMarcoReranking** | **CMedQAv1** | **CMedQAv2** | **Avg** |\n|:-----------:|:----------:|:----------:|:-------------:|:--------------:|:---------------:| :---------------:|\n|   bge-reranker-base   |  1.11 | 67.28    |      35.46     |      81.27      |       84.10      | 67.03\n| bce-reranker-base_v1 |   1.11 |70.25    |      34.13     |      79.64      |       81.31      | 66.33\n| rag-retrieval-reranker |  0.41 | 67.33    |      31.57     |      83.54     |       86.03     | 67.12\n\nAmong them, rag-retrieval-reranker is the result of training on the hfl\u002Fchinese-roberta-wwm-ext model using the RAG-Retrieval code, and the training data uses the training data of the bge-rerank model.\n\n## Results of the Colbert model in the MTEB Reranking task\n\n|      **Model**  | **Model Size(GB)**  | **Dim**  | **T2Reranking** | **MMarcoReranking** | **CMedQAv1** | **CMedQAv2** | **Avg** |\n|:-----------: |:----------:|:----------:|:----------:|:-------------:|:--------------:|:---------------:| :---------------:|\n|   bge-m3-colbert   | 2.24 | 1024 | 66.82 | 26.71    |      75.88     |      76.83      |      61.56      \n| rag-retrieval-colbert | 0.41 |  1024|  66.85    |      31.46     |      81.05     |       84.22     | 65.90\n\nAmong them, rag-retrieval-colbert is the result of training on the hfl\u002Fchinese-roberta-wwm-ext model using the RAG-Retrieval code, and the training data uses the training data of the bge-rerank model.\n\n## Fine-tune the open source BGE series models with domain data\n\n|      **Model**  | **T2ranking**  | |\n|:-----------: |:----------:|:----------:|\n|   bge-v1.5-embedding   | 66.49|  | \n|   bge-v1.5-embedding **finetune**    | 67.15 | **+0.66** | \n|   bge-m3-colbert   | 66.82|  | \n|   bge-m3-colbert **finetune**    | 67.22 | **+0.40** | \n|   bge-reranker-base   | 67.28|  | \n|   bge-reranker-base  **finetune**    | 67.57 | **+0.29** | \n\nThe number with finetune at the end means that we used RAG-Retrieval to fine-tune the corresponding open source model, and the training data used the training set of T2-Reranking.\n\nIt is worth noting that the training set of the three open source models of bge already includes T2-Reranking, and the data is relatively general, so the performance improvement of fine-tuning using this data is not significant. However, if the open source model is fine-tuned using a vertical field data set, the performance improvement will be greater.\n\n\n# Citation\nIf you find this repository helpful, please cite our work:\n```bib\n@misc{zhang2025jasperstelladistillationsota,\n      title={Jasper and Stella: distillation of SOTA embedding models}, \n      author={Dun Zhang and Jiacheng Li and Ziyang Zeng and Fulong Wang},\n      year={2025},\n      eprint={2412.19048},\n      archivePrefix={arXiv},\n      primaryClass={cs.IR},\n      url={https:\u002F\u002Farxiv.org\u002Fabs\u002F2412.19048}, \n}\n```\n\n# Acknowledge\n\nDuring the development process, we borrowed or based on the implementation of the following projects. We sincerely appreciate the efforts of these teams for their contributions to open-source research and development.\n\n- [FlagEmbedding](https:\u002F\u002Fgithub.com\u002FFlagOpen\u002FFlagEmbedding)\n- [uniem](https:\u002F\u002Fgithub.com\u002Fwangyuxinwhy\u002Funiem)\n- [sentence-transformers](https:\u002F\u002Fgithub.com\u002FUKPLab\u002Fsentence-transformers)\n- [rerankers](https:\u002F\u002Fgithub.com\u002FAnswerDotAI\u002Frerankers)\n\n\n# Star History\n\n[![Star History Chart](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FNovaSearch-Team_RAG-Retrieval_readme_ad137c6d910f.png)](https:\u002F\u002Fstar-history.com\u002F#NovaSearch-Team\u002FRAG-Retrieval&Date)\n\n# License\nRAG-Retrieval is licensed under the [MIT License](https:\u002F\u002Fgithub.com\u002FNLPJCL\u002FRAG-Retrieval\u002Fblob\u002Fmaster\u002FLICENSE). \n\n\n\n","\u003Ch1 align=\"center\">RAG-检索\u003C\u002Fh1>\n\u003Cp align=\"center\">\n    \u003Ca href=\"https:\u002F\u002Fpypi.org\u002Fproject\u002Frag-retrieval\u002F#description\">\n            \u003Cimg alt=\"构建\" src=\"https:\u002F\u002Fimg.shields.io\u002Fpypi\u002Fv\u002Frag-retrieval?color=brightgreen\">\n    \u003C\u002Fa>\n\u003C!--     \u003Ca href=\"https:\u002F\u002Fwww.pepy.tech\u002Fprojects\u002Frag-retrieval\">\n            \u003Cimg alt=\"构建\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FNovaSearch-Team_RAG-Retrieval_readme_24c9ace793cf.png\">\n    \u003C\u002Fa> -->\n    \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FNLPJCL\u002FRAG-Retrieval\">\n            \u003Cimg alt=\"构建\" src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F贡献-欢迎-blue\">\n    \u003C\u002Fa>\n    \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FNLPJCL\u002FRAG-Retrieval\u002Fblob\u002Fmaster\u002FLICENSE\">\n        \u003Cimg alt=\"许可证\" src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F许可证-MIT-green\">\n    \u003C\u002Fa>\n\u003C\u002Fp>\n\n[English](.\u002FREADME.md) | [中文](.\u002FREADME_zh.md)\n\nRAG-检索提供RAG检索模型的训练、推理和蒸馏的端到端代码。\n- 在训练方面，**RAG-检索支持任何开源RAG检索模型的微调**，包括嵌入模型（图a，基于BERT、基于LLM）、晚期交互模型（图d，ColBERT）以及重排序模型（图c，基于BERT、基于LLM）。\n- 在推理方面，RAG-检索专注于重排序，并开发了一个轻量级的Python库[rag-retrieval](https:\u002F\u002Fpypi.org\u002Fproject\u002Frag-retrieval\u002F)，**该库提供了一种统一的方式来调用任何不同的RAG重排序模型。**\n- 在蒸馏方面，**支持嵌入模型和重排序模型的蒸馏**，可以将较大的模型蒸馏为较小的模型（0.5B参数的LLM或BERT-base）。\n\n![ColBERT](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FNovaSearch-Team_RAG-Retrieval_readme_c836d2b01937.png)\n\n\n# 社区交流\n\n[加入我们的微信群聊](https:\u002F\u002Fwww.notion.so\u002FRAG-Retrieval-Roadmap-c817257e3e8a484b8850cac40a3fcf88)\n\n# 新闻\n\n- 🔥 **22\u002F05\u002F2025**: RAG-检索发布了《短视陷阱》，这是一项关于整个IR流程中位置偏差的实证研究。我们系统地评估了一系列SOTA检索模型——包括BM25、密集嵌入、ColBERT风格模型和重排序器——在两个精心设计的位置感知基准测试集上：SQuAD-PosQ和FineWeb-PosQ。[了解更多](.\u002Fexamples\u002FMyopicTrap\u002F)\n\n- **29\u002F12\u002F2024**: RAG-检索发布了Stella和Jasper嵌入模型的核心训练代码（stage3）[Jasper和Stella：SOTA嵌入模型的蒸馏](https:\u002F\u002Farxiv.org\u002Fabs\u002F2412.19048)。\n\n- **21\u002F10\u002F2024**: RAG-检索发布了两种基于LLM的重排序任务方法，以及一种将其蒸馏为BERT的方法。[LLM在重排序任务中的最佳实践？一份简单的实验报告（附代码）](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F987727357)\n\n- **05\u002F06\u002F2024**: RAG-检索实现了用于嵌入模型的MRL损失函数。[RAG-检索：让MRL损失成为向量（嵌入）模型训练的标准](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F701884479)\n\n- **02\u002F06\u002F2024**: RAG-检索实现了基于LLM偏好的监督微调的RAG检索器。[RAG-检索实现基于LLM偏好的监督微调的RAG检索器](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F701215443)\n\n- **05\u002F05\u002F2024**: RAG-检索发布了一个轻量级的Python库。[RAG-检索：你的RAG应用值得一个更好的推理框架](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F692404995)\n\n- **18\u002F03\u002F2024**: RAG-检索发布[知乎上的RAG-检索介绍](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F683483778)\n\n\n\n# 特性\n\n- **简单而优雅**：摒弃复杂性，采用简单易懂的代码结构，便于修改。\n- **支持RAG检索模型的端到端微调**：嵌入模型（基于BERT、基于LLM）、晚期交互模型（ColBERT）以及重排序模型（基于BERT、基于LLM）。\n- **支持任何开源RAG检索模型的微调**：兼容大多数开源嵌入和重排序模型，例如：bge（bge-embedding、bge-m3、bge-reranker）、bce（bce-embedding、bce-reranker）、gte（gte-embedding、gte-multilingual-reranker-base）。\n- **支持将大模型蒸馏为小模型**：能够将较大的基于LLM的重排序或嵌入模型蒸馏为较小的模型（如0.5B参数的LLM或BERT-base）。\n- **先进算法**：对于嵌入模型，支持使用[MRL算法](https:\u002F\u002Farxiv.org\u002Fabs\u002F2205.13147)来降低输出向量的维度，以及[Stella蒸馏方法](https:\u002F\u002Farxiv.org\u002Fabs\u002F2412.19048)。\n- **多GPU训练策略**：包含deepspeed、fsdp。\n\n\n# 快速入门\n\n## 安装\n用于训练（全部）：\n```bash\nconda create -n rag-retrieval python=3.8 && conda activate rag-retrieval\n# 为了避免自动安装的torch与本地cuda不兼容，建议手动安装兼容版本的torch后再进行下一步。\npip install -r requirements.txt \n```\n用于预测（重排序）：\n```bash\n# 为了避免自动安装的torch与本地cuda不兼容，建议手动安装兼容版本的torch后再进行下一步。\npip install rag-retrieval\n```\n\n## 训练\n\n针对不同类型的模型，请进入不同的子目录。例如：\n对于[嵌入](https:\u002F\u002Fgithub.com\u002FNLPJCL\u002FRAG-Retrieval\u002Ftree\u002Fmaster\u002Frag_retrieval\u002Ftrain\u002Fembedding)，其他类型也类似。详细步骤可在各子目录的README文件中找到。\n```bash\ncd .\u002Frag_retrieval\u002Ftrain\u002Fembedding\nbash train_embedding.sh\n```\n\n## 推理\n\nRAG-检索开发了一个轻量级的Python库[rag-retrieval](https:\u002F\u002Fpypi.org\u002Fproject\u002Frag-retrieval\u002F)，它提供了一个统一的接口来调用各种RAG重排序模型，具有以下特点：\n\n- 支持多种重排序模型：兼容常见的开源重排序模型（交叉编码器重排序器、仅解码器LLM重排序器）。\n\n- 对长文档友好：支持两种不同的处理逻辑来应对长文档（最大长度截断和拆分以取最高分）。\n\n- 易于扩展：如果有新的重排序模型，用户只需继承BaseReranker并实现rank和compute_score函数即可。\n\n**有关rag-retrieval包的详细使用方法和注意事项，请参阅[教程](https:\u002F\u002Fgithub.com\u002FNLPJCL\u002FRAG-Retrieval\u002Fblob\u002Fmaster\u002Fexamples\u002FReranker_Tutorial.md)**\n\n\n\n# 实验结果\n\n## 重排序模型在 MTEB 重排序任务上的结果\n\n\n|      **模型**       |  **模型大小(GB)**  |**T2重排序** | **MMarco重排序** | **CMedQA v1** | **CMedQA v2** | **平均** |\n|:-----------:|:----------:|:----------:|:-------------:|:--------------:|:---------------:| :---------------:|\n|   bge-reranker-base   |  1.11 | 67.28    |      35.46     |      81.27      |       84.10      | 67.03\n| bce-reranker-base_v1 |   1.11 |70.25    |      34.13     |      79.64      |       81.31      | 66.33\n| rag-retrieval-reranker |  0.41 | 67.33    |      31.57     |      83.54     |       86.03     | 67.12\n\n其中，rag-retrieval-reranker 是基于 hfl\u002Fchinese-roberta-wwm-ext 模型，使用 RAG-Retrieval 代码进行训练的结果，训练数据采用了 bge-rerank 模型的训练数据。\n\n## Colbert 模型在 MTEB 重排序任务中的结果\n\n|      **模型**  | **模型大小(GB)**  | **维度**  | **T2重排序** | **MMarco重排序** | **CMedQA v1** | **CMedQA v2** | **平均** |\n|:-----------: |:----------:|:----------:|:----------:|:-------------:|:--------------:|:---------------:| :---------------:|\n|   bge-m3-colbert   | 2.24 | 1024 | 66.82 | 26.71    |      75.88     |      76.83      |      61.56      \n| rag-retrieval-colbert | 0.41 |  1024|  66.85    |      31.46     |      81.05     |       84.22     | 65.90\n\n其中，rag-retrieval-colbert 是基于 hfl\u002Fchinese-roberta-wwm-ext 模型，使用 RAG-Retrieval 代码进行训练的结果，训练数据采用了 bge-rerank 模型的训练数据。\n\n## 使用领域数据微调开源 BGE 系列模型\n\n|      **模型**  | **T2重排序**  | |\n|:-----------: |:----------:|:----------:|\n|   bge-v1.5-embedding   | 66.49|  | \n|   bge-v1.5-embedding **微调**    | 67.15 | **+0.66** | \n|   bge-m3-colbert   | 66.82|  | \n|   bge-m3-colbert **微调**    | 67.22 | **+0.40** | \n|   bge-reranker-base   | 67.28|  | \n|   bge-reranker-base  **微调**    | 67.57 | **+0.29** | \n\n末尾标注“微调”的数字表示我们使用了 RAG-Retrieval 对相应开源模型进行了微调，训练数据为 T2-重排序的训练集。\n\n值得注意的是，bge 的三个开源模型的训练集中已经包含了 T2-重排序的数据，且数据较为通用，因此使用该数据进行微调后的性能提升并不显著。然而，如果使用垂直领域的数据集对开源模型进行微调，则性能提升会更加明显。\n\n# 引用\n如果您觉得本仓库有所帮助，请引用我们的工作：\n```bib\n@misc{zhang2025jasperstelladistillationsota,\n      title={Jasper and Stella: distillation of SOTA embedding models}, \n      author={Dun Zhang and Jiacheng Li and Ziyang Zeng and Fulong Wang},\n      year={2025},\n      eprint={2412.19048},\n      archivePrefix={arXiv},\n      primaryClass={cs.IR},\n      url={https:\u002F\u002Farxiv.org\u002Fabs\u002F2412.19048}, \n}\n```\n\n# 致谢\n在开发过程中，我们借鉴或基于以下项目的实现。我们衷心感谢这些团队为开源研究与开发所做出的贡献。\n\n- [FlagEmbedding](https:\u002F\u002Fgithub.com\u002FFlagOpen\u002FFlagEmbedding)\n- [uniem](https:\u002F\u002Fgithub.com\u002Fwangyuxinwhy\u002Funiem)\n- [sentence-transformers](https:\u002F\u002Fgithub.com\u002FUKPLab\u002Fsentence-transformers)\n- [rerankers](https:\u002F\u002Fgithub.com\u002FAnswerDotAI\u002Frerankers)\n\n\n# 星标历史\n\n[![星标历史图](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FNovaSearch-Team_RAG-Retrieval_readme_ad137c6d910f.png)](https:\u002F\u002Fstar-history.com\u002F#NovaSearch-Team\u002FRAG-Retrieval&Date)\n\n# 许可证\nRAG-Retrieval 采用 [MIT 许可证](https:\u002F\u002Fgithub.com\u002FNLPJCL\u002FRAG-Retrieval\u002Fblob\u002Fmaster\u002FLICENSE) 许可。","# RAG-Retrieval 快速上手指南\n\nRAG-Retrieval 是一个端到端的工具库，支持 RAG 检索模型（Embedding、ColBERT、Reranker）的训练、推理和蒸馏。它兼容主流开源模型（如 BGE、BCE、GTE 系列），并提供统一的推理接口。\n\n## 1. 环境准备\n\n*   **操作系统**: Linux \u002F macOS \u002F Windows\n*   **Python 版本**: 推荐 Python 3.8+\n*   **硬件要求**:\n    *   **训练**: 需要 NVIDIA GPU，建议安装与本地 CUDA 版本匹配的 PyTorch。\n    *   **推理**: CPU 或 GPU 均可运行。\n*   **前置依赖**: 建议先手动安装适配本地环境的 `torch`，以避免自动安装版本不兼容的问题。\n\n## 2. 安装步骤\n\n### 场景 A：模型训练 (Training)\n如果你需要微调 Embedding、ColBERT 或 Reranker 模型，请执行以下命令：\n\n```bash\n# 创建并激活虚拟环境\nconda create -n rag-retrieval python=3.8 && conda activate rag-retrieval\n\n# 【重要】建议先手动安装与你本地 CUDA 版本匹配的 torch，例如：\n# pip install torch torchvision torchaudio --index-url https:\u002F\u002Fdownload.pytorch.org\u002Fwhl\u002Fcu118\n\n# 安装项目依赖\npip install -r requirements.txt\n```\n\n> **国内加速提示**：如遇网络问题，可使用清华源或阿里源安装依赖：\n> `pip install -r requirements.txt -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple`\n\n### 场景 B：模型推理 (Inference)\n如果你仅需使用现成的 Reranker 模型进行排序推理，可直接安装轻量级 Python 库：\n\n```bash\n# 【重要】同样建议先手动安装适配的 torch\n\n# 安装推理库\npip install rag-retrieval\n```\n\n## 3. 基本使用\n\n### 3.1 训练模型 (Training)\n\nRAG-Retrieval 将不同模型的训练代码分置于子目录中。以下以 **Embedding 模型** 的微调为例：\n\n```bash\n# 进入 Embedding 训练目录\ncd .\u002Frag_retrieval\u002Ftrain\u002Fembedding\n\n# 执行训练脚本 (请根据实际需求修改 train_embedding.sh 中的参数)\nbash train_embedding.sh\n```\n\n*注：ColBERT 和 Reranker 的训练方式类似，只需进入对应的 `train\u002Fcolbert` 或 `train\u002Freranker` 目录即可。具体参数配置请参考各子目录下的 README。*\n\n### 3.2 模型推理 (Inference)\n\n安装 `rag-retrieval` 包后，你可以使用统一的接口调用各种开源 Reranker 模型（支持 Cross Encoder 和 LLM-based 模型）。\n\n以下是一个最简单的 Python 使用示例：\n\n```python\nfrom rag_retrieval import Reranker\n\n# 初始化 Reranker，自动加载默认模型或指定本地\u002FHF 模型路径\n# 支持长文档处理策略（截断或分段取最大分）\nreranker = Reranker('BAAI\u002Fbge-reranker-base', device='cuda') \n\n# 准备查询和候选文档列表\nquery = \"什么是人工智能？\"\ndocuments = [\n    \"人工智能是计算机科学的一个分支。\",\n    \"今天天气真不错。\",\n    \"深度学习是人工智能的重要子领域。\"\n]\n\n# 执行排序\nresults = reranker.rank(query=query, docs=documents)\n\n# 输出结果\nfor res in results:\n    print(f\"文档: {res['text']}, 得分: {res['score']}\")\n```\n\n**特性说明：**\n*   **多模型兼容**: 支持 `bge-reranker`, `bce-reranker`, `gte-reranker` 等主流模型。\n*   **长文档友好**: 内置处理超长文本的逻辑（最大长度截断 或 切片后取最高分）。\n*   **易于扩展**: 若有新模型，只需继承 `BaseReranker` 类并实现 `rank` 和 `compute_score` 方法即可。\n\n更多高级用法（如自定义打分策略、批量处理等）请参考官方 [Reranker 教程](https:\u002F\u002Fgithub.com\u002FNLPJCL\u002FRAG-Retrieval\u002Fblob\u002Fmaster\u002Fexamples\u002FReranker_Tutorial.md)。","某电商公司的算法团队正在构建新一代智能客服系统，需要让大模型基于海量商品文档精准回答用户咨询。\n\n### 没有 RAG-Retrieval 时\n- **模型适配繁琐**：团队尝试了 BGE、BCE 等多种开源嵌入和重排序模型，但每个模型的训练代码和推理接口各不相同，导致重复开发工作量巨大。\n- **检索精度瓶颈**：仅使用基础的向量检索（Embedding）难以处理复杂的长尾查询，缺乏高效的 ColBERT 或重排序（Reranker）模块来优化最终结果，用户常收到不相关的商品链接。\n- **资源与性能失衡**：想要提升精度只能部署超大模型，导致推理延迟高；想降低延迟又缺乏将大模型能力蒸馏到小模型（如 BERT-Base）的标准流程，陷入两难。\n- **实验迭代缓慢**：由于缺乏统一的微调框架，验证新的检索策略（如基于 LLM 偏好的监督微调）需要数周时间重构代码，严重拖慢产品上线节奏。\n\n### 使用 RAG-Retrieval 后\n- **统一训练与推理**：利用 RAG-Retrieval 端到端支持 Embedding、ColBERT 和 Reranker 的特性，团队用同一套代码库完成了多种模型的微调和蒸馏，并通过其轻量级 Python 库统一了所有模型的调用方式。\n- **检索效果显著跃升**：通过引入 RAG-Retrieval 优化的重排序模型和 ColBERT 架构，系统在复杂查询下的召回准确率提升了 30%，有效解决了“答非所问”的痛点。\n- **高效模型蒸馏**：借助内置的蒸馏功能，成功将大型 LLM 重排序模型的能力迁移至小型 BERT 模型，在保持高精度的同时将推理延迟降低了 60%。\n- **敏捷策略验证**：简单的代码结构让团队能在几天内完成从数据准备到基于 LLM 偏好微调的全流程，快速验证了多种检索增强策略并落地生产。\n\nRAG-Retrieval 通过统一高效的微调与推理框架，帮助团队以最低成本实现了检索精度的最大化与工程落地的最简化。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FNovaSearch-Team_RAG-Retrieval_04cb474d.png","NovaSearch-Team","NovaSearch","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002FNovaSearch-Team_17afe359.jpg","",null,"https:\u002F\u002Fgithub.com\u002FNovaSearch-Team",[80,84],{"name":81,"color":82,"percentage":83},"Python","#3572A5",97.6,{"name":85,"color":86,"percentage":87},"Shell","#89e051",2.4,1114,89,"2026-04-14T17:07:33","MIT","未说明","训练阶段必需（支持多 GPU，包含 DeepSpeed、FSDP 策略）；推理阶段视模型大小而定。具体显卡型号、显存大小及 CUDA 版本未在文中明确指定，但建议手动安装与本地 CUDA 兼容的 torch 版本。",{"notes":95,"python":96,"dependencies":97},"1. 为避免自动安装的 torch 与本地 CUDA 不兼容，强烈建议在运行下一步之前手动安装兼容版本的 torch。\n2. 支持多种模型架构的微调（Embedding、ColBERT、Reranker）及蒸馏（从大模型到 0.5B LLM 或 BERT-base）。\n3. 提供轻量级推理库 'rag-retrieval'，统一调用各类 RAG 排序模型。\n4. 训练时需进入对应的子目录（如 rag_retrieval\u002Ftrain\u002Fembedding）执行脚本。","3.8+",[98,99],"torch","requirements.txt 中定义的其他依赖",[101,15,14,35,13],"其他",[103,104,105,106,107],"ai","llm","nlp","rag","retrieval-augmented-generation","2026-03-27T02:49:30.150509","2026-04-19T03:05:03.156625",[111,116,121,126,131,136],{"id":112,"question_zh":113,"answer_zh":114,"source_url":115},41589,"如何构建自己的微调数据集？","主要有两种方式：\n1. 调用 OpenAI API 生成：需要 API Key（有免费额度），参考官方 Prompt 模板，或搜索其他人的实现方法。\n2. 本地开源模型生成：如果硬件允许，可以下载开源大模型（如 yi-6b-chat）自行生成数据。","https:\u002F\u002Fgithub.com\u002FNovaSearch-Team\u002FRAG-Retrieval\u002Fissues\u002F2",{"id":117,"question_zh":118,"answer_zh":119,"source_url":120},41590,"为什么 Reranker 微调时 Listwise Loss 会导致召回率骤降或不稳定？","这是因为 BGE 的 Loss 是多任务学习（同时学习 Listwise 和蒸馏 Loss），而 RAG-Retrieval 框架目前仅使用蒸馏 Loss。在纯蒸馏场景下，直接使用 Point-wise (Pairwise) 损失函数通常效果更好且更稳定。如果遇到此问题，建议尝试切换为 `pairwise_ranknet` 损失函数。","https:\u002F\u002Fgithub.com\u002FNovaSearch-Team\u002FRAG-Retrieval\u002Fissues\u002F105",{"id":122,"question_zh":123,"answer_zh":124,"source_url":125},41591,"训练 m3e-base 或其他模型时 Loss 出现 NaN 怎么办？","这通常与混合精度训练（mixed_precision）设置有关。解决方案是将配置中的 `mixed_precision` 从 `fp16` 改为 `no`（即不使用混合精度）。修改后重新训练，Loss 通常会恢复正常下降趋势。如果问题依旧，建议检查训练数据是否有异常。","https:\u002F\u002Fgithub.com\u002FNovaSearch-Team\u002FRAG-Retrieval\u002Fissues\u002F13",{"id":127,"question_zh":128,"answer_zh":129,"source_url":130},41592,"在小数据集上进行知识蒸馏（Distillation）效果不好或 Loss 为 NaN 是什么原因？","知识蒸馏通常需要大规模数据才能生效，一般建议至少需要数十万到数百万条数据点。如果在只有几千条数据的小数据集上进行蒸馏，很容易出现 Loss 为 NaN 或效果退化的情况。","https:\u002F\u002Fgithub.com\u002FNovaSearch-Team\u002FRAG-Retrieval\u002Fissues\u002F91",{"id":132,"question_zh":133,"answer_zh":134,"source_url":135},41593,"为什么该框架的训练速度比其他代码（如 FlagEmbedding）慢很多？","主要原因通常是负样本数量（neg_nums \u002F train_group_size）的设置不同。例如，FlagEmbedding 默认可能只使用少量硬负样本（如 group_size=2），而本框架如果设置了较大的负样本数（如 `--neg_nums 17`），计算量会显著增加，导致训练变慢。可以通过减少负样本数量来提升速度。","https:\u002F\u002Fgithub.com\u002FNovaSearch-Team\u002FRAG-Retrieval\u002Fissues\u002F57",{"id":137,"question_zh":138,"answer_zh":139,"source_url":140},41594,"微调 BGE 系列模型时报错或无法启动，如何解决配置文件问题？","对于 `BAAI\u002Fbge-base-zh-v1.5` 等 BGE 系列模型，需要确保使用正确的 FSDP 配置文件。请将启动命令中的 `--config_file` 参数从 `xlmroberta_default_config.yaml` 替换为 `default_fsdp.yaml`。\n\n错误示例：\n`--config_file ..\u002F..\u002F..\u002Fconfig\u002Fxlmroberta_default_config.yaml`\n\n正确示例：\n`--config_file ..\u002F..\u002F..\u002Fconfig\u002Fdefault_fsdp.yaml`","https:\u002F\u002Fgithub.com\u002FNovaSearch-Team\u002FRAG-Retrieval\u002Fissues\u002F46",[142],{"id":143,"version":144,"summary_zh":145,"released_at":146},333592,"rag_retrieval_only_train","统一高效的RAG检索微调，包括嵌入表示、ColBERT和交叉编码器。","2024-05-04T10:03:52"]