[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-EthicalML--awesome-production-machine-learning":3,"tool-EthicalML--awesome-production-machine-learning":64},[4,17,27,35,43,56],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":16},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,3,"2026-04-05T11:01:52",[13,14,15],"开发框架","图像","Agent","ready",{"id":18,"name":19,"github_repo":20,"description_zh":21,"stars":22,"difficulty_score":23,"last_commit_at":24,"category_tags":25,"status":16},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",138956,2,"2026-04-05T11:33:21",[13,15,26],"语言模型",{"id":28,"name":29,"github_repo":30,"description_zh":31,"stars":32,"difficulty_score":23,"last_commit_at":33,"category_tags":34,"status":16},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",107662,"2026-04-03T11:11:01",[13,14,15],{"id":36,"name":37,"github_repo":38,"description_zh":39,"stars":40,"difficulty_score":23,"last_commit_at":41,"category_tags":42,"status":16},3704,"NextChat","ChatGPTNextWeb\u002FNextChat","NextChat 是一款轻量且极速的 AI 助手，旨在为用户提供流畅、跨平台的大模型交互体验。它完美解决了用户在多设备间切换时难以保持对话连续性，以及面对众多 AI 模型不知如何统一管理的痛点。无论是日常办公、学习辅助还是创意激发，NextChat 都能让用户随时随地通过网页、iOS、Android、Windows、MacOS 或 Linux 端无缝接入智能服务。\n\n这款工具非常适合普通用户、学生、职场人士以及需要私有化部署的企业团队使用。对于开发者而言，它也提供了便捷的自托管方案，支持一键部署到 Vercel 或 Zeabur 等平台。\n\nNextChat 的核心亮点在于其广泛的模型兼容性，原生支持 Claude、DeepSeek、GPT-4 及 Gemini Pro 等主流大模型，让用户在一个界面即可自由切换不同 AI 能力。此外，它还率先支持 MCP（Model Context Protocol）协议，增强了上下文处理能力。针对企业用户，NextChat 提供专业版解决方案，具备品牌定制、细粒度权限控制、内部知识库整合及安全审计等功能，满足公司对数据隐私和个性化管理的高标准要求。",87618,"2026-04-05T07:20:52",[13,26],{"id":44,"name":45,"github_repo":46,"description_zh":47,"stars":48,"difficulty_score":23,"last_commit_at":49,"category_tags":50,"status":16},2268,"ML-For-Beginners","microsoft\u002FML-For-Beginners","ML-For-Beginners 是由微软推出的一套系统化机器学习入门课程，旨在帮助零基础用户轻松掌握经典机器学习知识。这套课程将学习路径规划为 12 周，包含 26 节精炼课程和 52 道配套测验，内容涵盖从基础概念到实际应用的完整流程，有效解决了初学者面对庞大知识体系时无从下手、缺乏结构化指导的痛点。\n\n无论是希望转型的开发者、需要补充算法背景的研究人员，还是对人工智能充满好奇的普通爱好者，都能从中受益。课程不仅提供了清晰的理论讲解，还强调动手实践，让用户在循序渐进中建立扎实的技能基础。其独特的亮点在于强大的多语言支持，通过自动化机制提供了包括简体中文在内的 50 多种语言版本，极大地降低了全球不同背景用户的学习门槛。此外，项目采用开源协作模式，社区活跃且内容持续更新，确保学习者能获取前沿且准确的技术资讯。如果你正寻找一条清晰、友好且专业的机器学习入门之路，ML-For-Beginners 将是理想的起点。",84991,"2026-04-05T10:45:23",[14,51,52,53,15,54,26,13,55],"数据工具","视频","插件","其他","音频",{"id":57,"name":58,"github_repo":59,"description_zh":60,"stars":61,"difficulty_score":10,"last_commit_at":62,"category_tags":63,"status":16},3128,"ragflow","infiniflow\u002Fragflow","RAGFlow 是一款领先的开源检索增强生成（RAG）引擎，旨在为大语言模型构建更精准、可靠的上下文层。它巧妙地将前沿的 RAG 技术与智能体（Agent）能力相结合，不仅支持从各类文档中高效提取知识，还能让模型基于这些知识进行逻辑推理和任务执行。\n\n在大模型应用中，幻觉问题和知识滞后是常见痛点。RAGFlow 通过深度解析复杂文档结构（如表格、图表及混合排版），显著提升了信息检索的准确度，从而有效减少模型“胡编乱造”的现象，确保回答既有据可依又具备时效性。其内置的智能体机制更进一步，使系统不仅能回答问题，还能自主规划步骤解决复杂问题。\n\n这款工具特别适合开发者、企业技术团队以及 AI 研究人员使用。无论是希望快速搭建私有知识库问答系统，还是致力于探索大模型在垂直领域落地的创新者，都能从中受益。RAGFlow 提供了可视化的工作流编排界面和灵活的 API 接口，既降低了非算法背景用户的上手门槛，也满足了专业开发者对系统深度定制的需求。作为基于 Apache 2.0 协议开源的项目，它正成为连接通用大模型与行业专有知识之间的重要桥梁。",77062,"2026-04-04T04:44:48",[15,14,13,26,54],{"id":65,"github_repo":66,"name":67,"description_en":68,"description_zh":69,"ai_summary_zh":69,"readme_en":70,"readme_zh":71,"quickstart_zh":72,"use_case_zh":73,"hero_image_url":74,"owner_login":75,"owner_name":76,"owner_avatar_url":77,"owner_bio":78,"owner_company":79,"owner_location":79,"owner_email":80,"owner_twitter":75,"owner_website":81,"owner_url":82,"languages":79,"stars":83,"forks":84,"last_commit_at":85,"license":86,"difficulty_score":87,"env_os":88,"env_gpu":88,"env_ram":88,"env_deps":89,"category_tags":92,"github_topics":93,"view_count":113,"oss_zip_url":79,"oss_zip_packed_at":79,"status":16,"created_at":114,"updated_at":115,"faqs":116,"releases":147},822,"EthicalML\u002Fawesome-production-machine-learning","awesome-production-machine-learning","A curated list of awesome open source libraries to deploy, monitor, version and scale your machine learning","awesome-production-machine-learning 是一个专注于机器学习工程化的精选开源资源目录。它汇集了众多优秀的开源库，协助开发者完成模型的部署、监控、版本管理及规模化扩展。面对机器学习模型从实验室走向生产环境时的复杂挑战，这个列表有效解决了工具链碎片化和选型困难的问题。\n\n无论是机器学习工程师、数据科学家还是负责 AI 系统落地的开发者，都能从中受益。内容覆盖范围极广，包括自动机器学习、数据管道优化、模型服务化、可解释性以及隐私安全等关键环节。特别值得一提的是，项目提供了便捷的搜索工具，帮助用户快速导航庞大的工具链。社区保持高频更新，每月发布新版本摘要，确保收录的技术始终处于行业前沿。对于正在构建稳定可靠 AI 应用团队而言，这不仅是工具箱，更是通往高效 MLOps 实践的路线图。","[![Awesome](https:\u002F\u002Fawesome.re\u002Fbadge.svg)](https:\u002F\u002Fawesome.re)\n[![X](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FX-%23000000?logo=X&logoColor=white)](https:\u002F\u002Ftwitter.com\u002FEthicalML)\n\n# Awesome Production Machine Learning\n\nThis repository contains a curated list of awesome open source libraries that will help you deploy, monitor, version, scale, and secure your production machine learning 🚀\n\nYou can keep up to date by watching this github repo to get a summary of the new production ML libraries added every month [via releases](https:\u002F\u002Fgithub.com\u002FEthicalML\u002Fawesome-production-machine-learning\u002Freleases) 🤩\n\nAdditionally, we provide a [search toolkit](https:\u002F\u002Fhuggingface.co\u002Fspaces\u002Fzhiminy\u002FAwesome-Production-Machine-Learning-Search) that helps you quickly navigate through the toolchain.\n\n## Quick links to sections on this page\n\n| | | |\n|-|-|-|\n| [🔧 AutoML](#automl) | [🧮 Computation & Communication Optimisation](#computation-and-communication-optimisation) | [🏷️ Data Annotation & Synthesis](#data-annotation-and-synthesis) |\n| [🧵 Data Pipeline](#data-pipeline) | [📓 Data Science Notebook](#data-science-notebook) | [💾 Data Storage Optimisation](#data-storage-optimisation) |\n| [💸 Data Stream Processing](#data-stream-processing) | [💪 Deployment & Serving](#deployment-and-serving) | [📈 Evaluation & Monitoring](#evaluation-and-monitoring) |\n| [🔍 Explainability & Fairness](#explainability-and-fairness) | [🎁 Feature Store](#feature-store) | [🔴 Industry-strength Anomaly Detection](#industry-strength-anomaly-detection) |\n| [👁️ Industry-strength Computer Vision](#industry-strength-computer-vision) | [🔥 Industry-strength Information Retrieval](#industry-strength-information-retrieval) | [🔠 Industry-strength Natural Language Processing](#industry-strength-nlp) |\n| [🙌 Industry-strength Recommender System](#industry-strength-recommender-system) | [🍕 Industry-strength Reinforcement Learning](#industry-strength-reinforcement-learning) | [🤖 Industry-strength Robotics](#industry-strength-robotics) |\n| [📊 Industry-strength Visualisation](#industry-strength-visualisation) | [📅 Metadata Management](#metadata-management) | [📜 Model, Data & Experiment Management](#model-data-and-experiment-management) |\n| [🔩 Model Storage Optimisation](#model-storage-optimisation) | [🏁 Model Training & Orchestration](#model-training-and-orchestration) | [🔏 Privacy & Safety](#privacy-and-safety) |\n\n## Contributing to the list\n\nPlease review our [CONTRIBUTING.md](https:\u002F\u002Fgithub.com\u002FEthicalML\u002Fawesome-production-machine-learning\u002Fblob\u002Fmaster\u002FCONTRIBUTING.md) requirements when submitting a PR to help us keep the list clean and up-to-date - thank you to the community for supporting its steady growth 🚀\n\n\u003Cpicture>\n  \u003Csource\n    media=\"(prefers-color-scheme: grey)\"\n    srcset=\"\n      https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FEthicalML_awesome-production-machine-learning_readme_4d8efabc3db6.png&theme=dark\n    \"\n  \u002F>\n  \u003Csource\n    media=\"(prefers-color-scheme: light)\"\n    srcset=\"\n      https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FEthicalML_awesome-production-machine-learning_readme_4d8efabc3db6.png\n    \"\n  \u002F>\n  \u003Cimg\n    alt=\"Star History Chart\"\n    src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FEthicalML_awesome-production-machine-learning_readme_4d8efabc3db6.png\"\n  \u002F>\n\u003C\u002Fpicture>\n\n## 10 Min Video Overview\n\n\u003Ctable>\n  \u003Ctr>\n    \u003Ctd width=\"30%\">\n        This \u003Ca href=\"https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=Ynb6X0KZKxY\">10 minute video\u003C\u002Fa> provides an overview of the motivations for machine learning operations as well as a high level overview on some of the tools in this repo. This \u003Ca href=\"https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=NycftytgPnk\">newer video\u003C\u002Fa> covers the an updated 2024 version of the state of MLOps.\n    \u003C\u002Ftd>\n    \u003Ctd width=\"70%\">\n        \u003Ca href=\"https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=Ynb6X0KZKxY\">\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FEthicalML_awesome-production-machine-learning_readme_5a15cf128666.png\">\u003C\u002Fa>\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n\u003C\u002Ftable>\n\n## Want to receive recurrent updates on this repo and other advancements?\n\n\u003Ctable>\n  \u003Ctr>\n    \u003Ctd width=\"30%\">\n         You can join the \u003Ca href=\"https:\u002F\u002Fethical.institute\u002Fmle.html\">Machine Learning Engineer\u003C\u002Fa> newsletter. Join over 70,000 ML professionals and enthusiasts who receive weekly curated articles & tutorials on production Machine Learning.\n    \u003C\u002Ftd>\n    \u003Ctd width=\"70%\">\n        \u003Ca href=\"https:\u002F\u002Fethical.institute\u002Fmle.html\">\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FEthicalML_awesome-production-machine-learning_readme_bf9a0a64abe8.png\">\u003C\u002Fa>\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd width=\"30%\">\n         Also check out the \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FEthicalML\u002Fawesome-production-genai\u002F\">Awesome Production GenAI\u003C\u002Fa> List, where we aim to map a curated list of awesome open source libraries to deploy, monitor, version and scale your generative artificial intelligence applications and systems.\n    \u003C\u002Ftd>\n    \u003Ctd width=\"70%\">\n        \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FEthicalML\u002Fawesome-production-genai\u002F\">\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FEthicalML_awesome-production-machine-learning_readme_6cb3f367f959.jpg\">\u003C\u002Fa>\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n\u003C\u002Ftable>\n\n# Main Content\n\n## AutoML\n* [AIDE](https:\u002F\u002Fgithub.com\u002FWecoAI\u002Faideml) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FWecoAI\u002Faideml.svg?cacheSeconds=86400) - AIDE is an open-source ML engineering agent that uses a tree search algorithm to autonomously explore, implement, and evaluate solution strategies for machine learning tasks.\n* [AutoGluon](https:\u002F\u002Fgithub.com\u002Fautogluon\u002Fautogluon) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fautogluon\u002Fautogluon.svg?cacheSeconds=86400) - Automated feature, model, and hyperparameter selection for tabular, image, and text data on top of popular machine learning libraries (Scikit-Learn, LightGBM, CatBoost, PyTorch, MXNet).\n* [Autokeras](https:\u002F\u002Fgithub.com\u002Fkeras-team\u002Fautokeras) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fkeras-team\u002Fautokeras.svg?cacheSeconds=86400) - AutoML library for Keras based on [\"Auto-Keras: Efficient Neural Architecture Search with Network Morphism\"](https:\u002F\u002Farxiv.org\u002Fabs\u002F1806.10282).\n* [auto-sklearn](https:\u002F\u002Fgithub.com\u002Fautoml\u002Fauto-sklearn) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fautoml\u002Fauto-sklearn.svg?cacheSeconds=86400) - Framework to automate algorithm and hyperparameter tuning for sklearn.\n* [Ax](https:\u002F\u002Fgithub.com\u002Ffacebook\u002FAx) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffacebook\u002FAx.svg?cacheSeconds=86400) - Ax is an accessible, general-purpose platform for understanding, managing, deploying, and automating adaptive experiments.\n* [BoTorch](https:\u002F\u002Fgithub.com\u002Fmeta-pytorch\u002Fbotorch) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmeta-pytorch\u002Fbotorch.svg?cacheSeconds=86400) - BoTorch is a library for Bayesian Optimization built on PyTorch.\n* [EvalML](https:\u002F\u002Fgithub.com\u002Falteryx\u002Fevalml) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Falteryx\u002Fevalml.svg?cacheSeconds=86400) - EvalML is an AutoML library which builds, optimizes, and evaluates machine learning pipelines using domain-specific objective functions.\n* [Feature Engine](https:\u002F\u002Fgithub.com\u002Ffeature-engine\u002Ffeature_engine) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffeature-engine\u002Ffeature_engine.svg?cacheSeconds=86400) - Feature-engine is a Python library that contains several transformers to engineer features for use in machine learning models.\n* [Featuretools](https:\u002F\u002Fgithub.com\u002Falteryx\u002Ffeaturetools) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Falteryx\u002Ffeaturetools.svg?cacheSeconds=86400) - An open source framework for automated feature engineering.\n* [FLAML](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FFLAML) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmicrosoft\u002FFLAML.svg?cacheSeconds=86400) - FLAML is a fast library for automated machine learning & tuning.\n* [HEBO](https:\u002F\u002Fgithub.com\u002Fhuawei-noah\u002FHEBO) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuawei-noah\u002FHEBO.svg?cacheSeconds=86400) - Set of open-source hyperparameter optimization frameworks, including the winning submission to the [NeurIPS 2020 Black-Box Optimisation Challenge](https:\u002F\u002Fbbochallenge.com\u002Fleaderboard) tested on hyperparameter tuning tasks. \n* [Katib](https:\u002F\u002Fgithub.com\u002Fkubeflow\u002Fkatib) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fkubeflow\u002Fkatib.svg?cacheSeconds=86400) - A Kubernetes-based system for Hyperparameter Tuning and Neural Architecture Search.\n* [keras-tuner](https:\u002F\u002Fgithub.com\u002Fkeras-team\u002Fkeras-tuner) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fkeras-team\u002Fkeras-tuner.svg?cacheSeconds=86400) - Keras Tuner is an easy-to-use, distributable hyperparameter optimisation framework that solves the pain points of performing a hyperparameter search. Keras Tuner makes it easy to define a search space and leverage included algorithms to find the best hyperparameter values.\n* [Optuna](https:\u002F\u002Fgithub.com\u002Foptuna\u002Foptuna) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Foptuna\u002Foptuna.svg?cacheSeconds=86400) - Optuna is an automatic hyperparameter optimisation software framework, particularly designed for machine learning.\n* [OSS Vizier](https:\u002F\u002Fgithub.com\u002Fgoogle\u002Fvizier) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle\u002Fvizier.svg?cacheSeconds=86400) - OSS Vizier is a Python-based service for black-box optimisation and research, one of the first hyperparameter tuning services designed to work at scale.\n* [Perpetual](https:\u002F\u002Fgithub.com\u002Fperpetual-ml\u002Fperpetual) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fperpetual-ml\u002Fperpetual.svg?cacheSeconds=86400) - A gradient boosting machine that doesn't need hyperparameter optimization, with a simple budget parameter to control model complexity.\n* [TPOT](https:\u002F\u002Fgithub.com\u002Fepistasislab\u002Ftpot) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fepistasislab\u002Ftpot.svg?cacheSeconds=86400) - Automation of sklearn pipeline creation (including feature selection, pre-processor, etc.).\n* [tsfresh](https:\u002F\u002Fgithub.com\u002Fblue-yonder\u002Ftsfresh) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fblue-yonder\u002Ftsfresh.svg?cacheSeconds=86400) - Automatic extraction of relevant features from time series.\n\n## Computation and Communication Optimisation\n\n* [Accelerate](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Faccelerate) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Faccelerate.svg?cacheSeconds=86400) - Accelerate abstracts exactly and only the boilerplate code related to multi-GPU\u002FTPU\u002Fmixed-precision and leaves the rest of your code unchanged.\n* [Adapters](https:\u002F\u002Fgithub.com\u002Fadapter-hub\u002Fadapters) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fadapter-hub\u002Fadapters.svg?cacheSeconds=86400) - Adapters is a unified library for parameter-efficient and modular transfer learning.\n* [BitBLAS](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FBitBLAS) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmicrosoft\u002FBitBLAS.svg?cacheSeconds=86400) - BitBLAS is a library to support mixed-precision BLAS operations on GPUs\n* [Colossal-AI](https:\u002F\u002Fgithub.com\u002Fhpcaitech\u002FColossalAI) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhpcaitech\u002FColossalAI.svg?cacheSeconds=86400) - A unified deep learning system for big model era, which helps users to efficiently and quickly deploy large AI model training and inference.\n* [Composer](https:\u002F\u002Fgithub.com\u002Fmosaicml\u002Fcomposer) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmosaicml\u002Fcomposer.svg?cacheSeconds=86400) - Composer is a PyTorch library that enables you to train neural networks faster, at lower cost, and to higher accuracy.\n* [CuDF](https:\u002F\u002Fgithub.com\u002Frapidsai\u002Fcudf) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Frapidsai\u002Fcudf.svg?cacheSeconds=86400) - Built based on the Apache Arrow columnar memory format, cuDF is a GPU DataFrame library for loading, joining, aggregating, filtering, and otherwise manipulating data.\n* [CuML](https:\u002F\u002Fgithub.com\u002Frapidsai\u002Fcuml) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Frapidsai\u002Fcuml.svg?cacheSeconds=86400) - cuML is a suite of libraries that implement machine learning algorithms and mathematical primitives functions that share compatible APIs with other RAPIDS projects.\n* [CuPy](https:\u002F\u002Fgithub.com\u002Fcupy\u002Fcupy) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcupy\u002Fcupy.svg?cacheSeconds=86400) - An implementation of NumPy-compatible multi-dimensional array on CUDA. CuPy consists of the core multi-dimensional array class, cupy.ndarray, and many functions on it.\n* [DEAP](https:\u002F\u002Fgithub.com\u002FDEAP\u002Fdeap) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FDEAP\u002Fdeap.svg?cacheSeconds=86400) - A novel evolutionary computation framework for rapid prototyping and testing of ideas. It seeks to make algorithms explicit and data structures transparent. It works in perfect harmony with parallelisation mechanisms such as multiprocessing and SCOOP.\n* [DeepEP](https:\u002F\u002Fgithub.com\u002Fdeepseek-ai\u002FDeepEP) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdeepseek-ai\u002FDeepEP.svg?cacheSeconds=86400) - DeepEP is a communication library tailored for Mixture-of-Experts (MoE) and expert parallelism (EP). It provides high-throughput and low-latency all-to-all GPU kernels, which are also known as MoE dispatch and combine. The library also supports low-precision operations, including FP8.\n* [DGL](https:\u002F\u002Fgithub.com\u002Fdmlc\u002Fdgl) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdmlc\u002Fdgl.svg?cacheSeconds=86400) - DGL is an easy-to-use, high performance and scalable Python package for deep learning on graphs.\n* [DLRover](https:\u002F\u002Fgithub.com\u002Fintelligent-machine-learning\u002Fdlrover) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fintelligent-machine-learning\u002Fdlrover.svg?cacheSeconds=86400) - DLRover makes the distributed training of large AI models easy, stable, fast and green.\n* [Dask](https:\u002F\u002Fgithub.com\u002Fdask\u002Fdask) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdask\u002Fdask.svg?cacheSeconds=86400) - Distributed parallel processing framework for Pandas and NumPy computations.\n* [DeepSpeed](https:\u002F\u002Fgithub.com\u002Fdeepspeedai\u002FDeepSpeed) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdeepspeedai\u002FDeepSpeed.svg?cacheSeconds=86400) - DeepSpeed is a deep learning optimization library that makes distributed training and inference easy, efficient, and effective.\n* [FlagGems](https:\u002F\u002Fgithub.com\u002FFlagOpen\u002FFlagGems) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FFlagOpen\u002FFlagGems.svg?cacheSeconds=86400) - FlagGems is a high-performance general operator library implemented in OpenAI Triton. It builds on a collection of backend neutral kernels that aims to accelerate LLM training and inference across diverse hardware platforms.\n* [Flashlight](https:\u002F\u002Fgithub.com\u002Fflashlight\u002Fflashlight) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fflashlight\u002Fflashlight.svg?cacheSeconds=86400) - A fast, flexible machine learning library written entirely in C++ from the Facebook AI Research and the creators of Torch, TensorFlow, Eigen and Deep Speech.\n* [Flax](https:\u002F\u002Fgithub.com\u002Fgoogle\u002Fflax) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle\u002Fflax.svg?cacheSeconds=86400) - A neural network library and ecosystem for JAX designed for flexibility.\n* [GPUStack](https:\u002F\u002Fgithub.com\u002Fgpustack\u002Fgpustack) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgpustack\u002Fgpustack.svg?cacheSeconds=86400) - GPUStack is an open-source GPU cluster manager for running AI models.\n* [Hivemind](https:\u002F\u002Fgithub.com\u002Flearning-at-home\u002Fhivemind) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flearning-at-home\u002Fhivemind.svg?cacheSeconds=86400) - Decentralized deep learning in PyTorch.\n* [Horovod](https:\u002F\u002Fgithub.com\u002Fhorovod\u002Fhorovod) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhorovod\u002Fhorovod.svg?cacheSeconds=86400) - Uber's distributed training framework for TensorFlow, Keras, and PyTorch.\n* [Jax](https:\u002F\u002Fgithub.com\u002Fjax-ml\u002Fjax) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjax-ml\u002Fjax.svg?cacheSeconds=86400) - Composable transformations of Python+NumPy programs: differentiate, vectorize, JIT to GPU\u002FTPU, and more.\n* [Kompute](https:\u002F\u002Fgithub.com\u002Flava-nc\u002Flava) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flava-nc\u002Flava.svg?cacheSeconds=86400) - Blazing fast, lightweight and mobile phone-enabled Vulkan compute framework optimized for advanced GPU data processing usecases.\n* [Lava](https:\u002F\u002Fgithub.com\u002FKomputeProject\u002Fkompute) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FKomputeProject\u002Fkompute.svg?cacheSeconds=86400) - Lava is an open source framework to develop applications for neuromorphic hardware architectures.\n* [Liger Kernel](https:\u002F\u002Fgithub.com\u002Flinkedin\u002FLiger-Kernel) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flinkedin\u002FLiger-Kernel.svg?cacheSeconds=86400) - Liger Kernel is a collection of Triton kernels designed specifically for LLM training.\n* [LightGBM](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FLightGBM) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmicrosoft\u002FLightGBM.svg?cacheSeconds=86400) - LightGBM is a gradient boosting framework that uses tree based learning algorithms.\n* [MLX](https:\u002F\u002Fgithub.com\u002Fml-explore\u002Fmlx) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fml-explore\u002Fmlx.svg?cacheSeconds=86400) - MLX is an array framework for machine learning on Apple silicon.\n* [Modin](https:\u002F\u002Fgithub.com\u002Fmodin-project\u002Fmodin) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmodin-project\u002Fmodin.svg?cacheSeconds=86400) - Speed up your Pandas workflows by changing a single line of code.\n* [NVIDIA TensorRT](https:\u002F\u002Fgithub.com\u002FNVIDIA\u002FTensorRT) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FNVIDIA\u002FTensorRT.svg?cacheSeconds=86400) - TensorRT is a C++ library for high-performance inference on NVIDIA GPUs and deep learning accelerators.\n* [Nevergrad](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fnevergrad) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffacebookresearch\u002Fnevergrad.svg?cacheSeconds=86400) - Nevergrad is a gradient-free optimisation platform.\n* [Norse](https:\u002F\u002Fgithub.com\u002Fnorse\u002Fnorse) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fnorse\u002Fnorse.svg?cacheSeconds=86400) - Norse aims to exploit the advantages of bio-inspired neural components, which are sparse and event-driven - a fundamental difference from artificial neural networks.\n* [Numba](https:\u002F\u002Fgithub.com\u002Fnumba\u002Fnumba) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fnumba\u002Fnumba.svg?cacheSeconds=86400)  - A compiler for Python array and numerical functions.\n* [Optimum](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Foptimum) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Foptimum.svg?cacheSeconds=86400) - Optimum is an extension of Transformers and Diffusers, providing a set of optimization tools enabling maximum efficiency to train and run models on targeted hardware while keeping things easy to use.\n* [PEFT](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fpeft) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Fpeft.svg?cacheSeconds=86400) - Parameter-Efficient Fine-Tuning (PEFT) methods enable efficient adaptation of pre-trained language models (PLMs) to various downstream applications without fine-tuning all the model's parameters.\n* [PaddlePaddle](https:\u002F\u002Fgithub.com\u002FPaddlePaddle\u002FPaddle) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FPaddlePaddle\u002FPaddle.svg?cacheSeconds=86400) - PaddlePaddle is a framework to perform large-scale deep network training, using data sources distributed across hundreds of nodes. \n* [PyG](https:\u002F\u002Fgithub.com\u002Fpyg-team\u002Fpytorch_geometric) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpyg-team\u002Fpytorch_geometric.svg?cacheSeconds=86400) - PyG (PyTorch Geometric) is a library built upon PyTorch to easily write and train Graph Neural Networks (GNNs) for a wide range of applications related to structured data.\n* [PyTorch Lightning](https:\u002F\u002Fgithub.com\u002FLightning-AI\u002Fpytorch-lightning) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FLightning-AI\u002Fpytorch-lightning.svg?cacheSeconds=86400) - PyTorch Lightning pretrains, finetunes and deploys AI models on multiple GPUs, TPUs with zero code changes.\n* [PyTorch](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Fpytorch) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpytorch\u002Fpytorch.svg?cacheSeconds=86400) - PyTorch is a library to develop and train neural network based deep learning models.\n* [Ray](https:\u002F\u002Fgithub.com\u002Fray-project\u002Fray) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fray-project\u002Fray.svg?cacheSeconds=86400) - Ray is a flexible, high-performance distributed execution framework for machine learning.\n* [SetFit](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fsetfit) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Fsetfit.svg?cacheSeconds=86400) - SetFit is an efficient and prompt-free framework for few-shot fine-tuning of Sentence Transformers.\n* [Sonnet](https:\u002F\u002Fgithub.com\u002Fgoogle-deepmind\u002Fsonnet) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle-deepmind\u002Fsonnet.svg?cacheSeconds=86400) - Sonnet is a library built on top of TensorFlow 2 designed to provide simple, composable abstractions for machine learning research.\n* [Streaming](https:\u002F\u002Fgithub.com\u002Fmosaicml\u002Fstreaming) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmosaicml\u002Fstreaming.svg?cacheSeconds=86400) - A Data Streaming Library for Efficient Neural Network Training.\n* [TensorFlow](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Ftensorflow) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftensorflow\u002Ftensorflow.svg?cacheSeconds=86400) - TensorFlow is a leading library designed for developing and deploying state-of-the-art  machine learning applications.\n* [ThunderKittens](https:\u002F\u002Fgithub.com\u002FHazyResearch\u002FThunderKittens) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FHazyResearch\u002FThunderKittens.svg?cacheSeconds=86400) ThunderKittens is a framework to make it easy to write fast deep learning kernels in CUDA.\n* [TorchOpt](https:\u002F\u002Fgithub.com\u002Fmetaopt\u002Ftorchopt) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmetaopt\u002Ftorchopt.svg?cacheSeconds=86400) - TorchOpt is an efficient library for differentiable optimization built upon PyTorch.\n* [Triton](https:\u002F\u002Fgithub.com\u002Ftriton-lang\u002Ftriton) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftriton-lang\u002Ftriton.svg?cacheSeconds=86400) - Triton is a language and compiler for writing highly efficient custom Deep-Learning primitives. The aim of Triton is to provide an open-source environment to write fast code at higher productivity than CUDA, but also with higher flexibility than other existing DSLs.\n* [Vaex](https:\u002F\u002Fgithub.com\u002Fvaexio\u002Fvaex) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvaexio\u002Fvaex.svg?cacheSeconds=86400) Vaex is a high performance Python library for lazy Out-of-Core DataFrames (similar to Pandas), to visualize and explore big tabular datasets. Vaex uses memory mapping, zero memory copy policy and lazy computations for best performance (no memory wasted).\n* [Vowpal Wabbit](https:\u002F\u002Fgithub.com\u002FVowpalWabbit\u002Fvowpal_wabbit) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FVowpalWabbit\u002Fvowpal_wabbit.svg?cacheSeconds=86400) Vowpal Wabbit is a machine learning system which pushes the frontier of machine learning with techniques such as online, hashing, allreduce, reductions, learning2search, active, and interactive learning.\n* [XGBoost](https:\u002F\u002Fgithub.com\u002Fdmlc\u002Fxgboost) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdmlc\u002Fxgboost.svg?cacheSeconds=86400) - XGBoost is an optimized distributed gradient boosting library designed to be highly efficient, flexible and portable.\n* [YDF](https:\u002F\u002Fgithub.com\u002Fgoogle\u002Fyggdrasil-decision-forests) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle\u002Fyggdrasil-decision-forests.svg?cacheSeconds=86400) - YDF (Yggdrasil Decision Forests) is a library to train, evaluate, interpret, and serve Random Forest, Gradient Boosted Decision Trees, CART and Isolation forest models.\n* [bitsandbytes](https:\u002F\u002Fgithub.com\u002Fbitsandbytes-foundation\u002Fbitsandbytes) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fbitsandbytes-foundation\u002Fbitsandbytes.svg?cacheSeconds=86400) - Bitsandbytes library is a lightweight Python wrapper around CUDA custom functions, in particular 8-bit optimizers, matrix multiplication (LLM.int8()), and 8 & 4-bit quantization functions.\n* [einops](https:\u002F\u002Fgithub.com\u002Farogozhnikov\u002Feinops) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Farogozhnikov\u002Feinops.svg?cacheSeconds=86400) - Flexible and powerful tensor operations for readable and reliable code.\n* [scikit-learn](https:\u002F\u002Fgithub.com\u002Fscikit-learn\u002Fscikit-learn) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fscikit-learn\u002Fscikit-learn.svg?cacheSeconds=86400) - Scikit-learn is a powerful machine learning library that provides a wide variety of modules for data access, data preparation and statistical model building. \n* [snnTorch](https:\u002F\u002Fgithub.com\u002Fjeshraghian\u002Fsnntorch) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjeshraghian\u002Fsnntorch.svg?cacheSeconds=86400) - snnTorch is a deep and online learning library with spiking neural networks.\n* [torchdistill](https:\u002F\u002Fgithub.com\u002Fyoshitomo-matsubara\u002Ftorchdistill) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fyoshitomo-matsubara\u002Ftorchdistill.svg?cacheSeconds=86400) - torchdistill offers various state-of-the-art knowledge distillation methods and enables you to design (new) experiments simply by editing a declarative yaml config file instead of Python code.\n* [torchkeras](https:\u002F\u002Fgithub.com\u002Flyhue1991\u002Ftorchkeras?tab=readme-ov-file) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flyhue1991\u002Ftorchkeras?tab=readme-ov-file.svg?cacheSeconds=86400) The torchkeras library is a simple tool for training neural network in pytorch jusk in a keras style.\n* [veScale](https:\u002F\u002Fgithub.com\u002Fvolcengine\u002FveScale) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvolcengine\u002FveScale.svg?cacheSeconds=86400) - veScale is a PyTorch native LLM training framework.\n* [yellowbrick](https:\u002F\u002Fgithub.com\u002FDistrictDataLabs\u002Fyellowbrick) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FDistrictDataLabs\u002Fyellowbrick.svg?cacheSeconds=86400) - yellowbrick is a matplotlib-based model evaluation plots for scikit-learn and other machine learning libraries.\n\n## Data Annotation and Synthesis\n* [Argilla](https:\u002F\u002Fgithub.com\u002Fargilla-io\u002Fargilla) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fargilla-io\u002Fargilla.svg?cacheSeconds=86400) - Argilla helps domain experts and data teams to build better NLP datasets in less time.\n* [cleanlab](https:\u002F\u002Fgithub.com\u002Fcleanlab\u002Fcleanlab) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcleanlab\u002Fcleanlab.svg?cacheSeconds=86400) - Python library for data-centric AI. Can automatically: find mislabeled data, detect outliers, estimate consensus + annotator-quality for multi-annotator datasets, suggest which data is best to (re)label next.\n* [COCO Annotator](https:\u002F\u002Fgithub.com\u002Fjsbroks\u002Fcoco-annotator) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjsbroks\u002Fcoco-annotator.svg?cacheSeconds=86400) - Web-based image segmentation tool for object detection, localization and keypoints\n* [CVAT](https:\u002F\u002Fgithub.com\u002Fcvat-ai\u002Fcvat) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcvat-ai\u002Fcvat.svg?cacheSeconds=86400) - CVAT (Computer Vision Annotation Tool) is OpenCV's web-based annotation tool for both videos and images for computer algorithms.\n* [Doccano](https:\u002F\u002Fgithub.com\u002Fdoccano\u002Fdoccano) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdoccano\u002Fdoccano.svg?cacheSeconds=86400) - Open source text annotation tools for humans, providing functionality for sentiment analysis, named entity recognition, and machine translation.\n* [Gretel Synthetics](https:\u002F\u002Fgithub.com\u002Fgretelai\u002Fgretel-synthetics) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgretelai\u002Fgretel-synthetics.svg?cacheSeconds=86400) - Gretel Synthetics is a synthetic data generators for structured and unstructured text, featuring differentially private learning.\n* [Label Studio](https:\u002F\u002Fgithub.com\u002FHumanSignal\u002Flabel-studio) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FHumanSignal\u002Flabel-studio.svg?cacheSeconds=86400) - Multi-domain data labeling and annotation tool with standardized output format.\n* [NeMo Curator](https:\u002F\u002Fgithub.com\u002FNVIDIA\u002FNeMo-Curator) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FNVIDIA\u002FNeMo-Curator.svg?cacheSeconds=86400) - NeMo Curator is a GPU-accelerated framework for efficient large language model data curation.\n* [refinery](https:\u002F\u002Fgithub.com\u002Fcode-kern-ai\u002Frefinery) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcode-kern-ai\u002Frefinery.svg?cacheSeconds=86400) - The data scientist's open-source choice to scale, assess and maintain natural language data.\n* [SDV](https:\u002F\u002Fgithub.com\u002Fsdv-dev\u002FSDV) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fsdv-dev\u002FSDV.svg?cacheSeconds=86400) - Synthetic Data Vault (SDV) is a Synthetic Data Generation ecosystem of libraries that allows users to easily learn single-table, multi-table and timeseries datasets to later on generate new Synthetic Data that has the same format and statistical properties as the original dataset.\n* [Semantic Segmentation Editor](https:\u002F\u002Fgithub.com\u002FHitachi-Automotive-And-Industry-Lab\u002Fsemantic-segmentation-editor) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FHitachi-Automotive-And-Industry-Lab\u002Fsemantic-segmentation-editor.svg?cacheSeconds=86400) - Hitachi's Open source tool for labelling camera and LIDAR data.\n* [synthcity](https:\u002F\u002Fgithub.com\u002Fvanderschaarlab\u002Fsynthcity) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvanderschaarlab\u002Fsynthcity.svg?cacheSeconds=86400) - synthcity is a library for generating and evaluating synthetic tabular data.\n* [TabGAN](https:\u002F\u002Fgithub.com\u002FDiyago\u002FTabular-data-generation) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FDiyago\u002FTabular-data-generation.svg?cacheSeconds=86400) - Synthetic tabular data generation using GANs (CTGAN), Diffusion Models, and LLMs with adversarial filtering, privacy metrics, and sklearn integration.\n* [ViPE](https:\u002F\u002Fgithub.com\u002Fnv-tlabs\u002Fvipe) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fnv-tlabs\u002Fvipe.svg?cacheSeconds=86400) - ViPE is a spatial AI tool for annotating camera poses and dense depth maps from raw videos.\n* [YData Synthetic](https:\u002F\u002Fgithub.com\u002Fydataai\u002Fydata-synthetic) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fydataai\u002Fydata-synthetic.svg?cacheSeconds=86400) - YData Synthetic is a package to generate synthetic tabular and time-series data leveraging the state of the art generative models.\n\n## Data Pipeline\n* [Apache Airflow](https:\u002F\u002Fgithub.com\u002Fapache\u002Fairflow) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fairflow.svg?cacheSeconds=86400) - Data Pipeline framework built in Python, including scheduler, DAG definition and a UI for visualisation.\n* [Apache Nifi](https:\u002F\u002Fgithub.com\u002Fapache\u002Fnifi) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fnifi.svg?cacheSeconds=86400) - Apache NiFi was made for dataflow. It supports highly configurable directed graphs of data routing, transformation, and system mediation logic.\n* [Apache Oozie](https:\u002F\u002Fgithub.com\u002Fapache\u002Foozie) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Foozie.svg?cacheSeconds=86400) - Workflow scheduler for Hadoop jobs.\n* [Argo Workflows](https:\u002F\u002Fgithub.com\u002Fargoproj\u002Fargo-workflows) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fargoproj\u002Fargo-workflows.svg?cacheSeconds=86400) - Argo Workflows is an open source container-native workflow engine for orchestrating parallel jobs on Kubernetes. Argo Workflows is implemented as a Kubernetes CRD (Custom Resource Definition).\n* [Couler](https:\u002F\u002Fgithub.com\u002Fcouler-proj\u002Fcouler) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcouler-proj\u002Fcouler.svg?cacheSeconds=86400) - Unified interface for constructing and managing machine learning workflows on different workflow engines, such as Argo Workflows, Tekton Pipelines, and Apache Airflow.\n* [DataTrove](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fdatatrove) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Fdatatrove.svg?cacheSeconds=86400) - DataTrove is a library to process, filter and deduplicate text data at a very large scale.\n* [Dagster](https:\u002F\u002Fgithub.com\u002Fdagster-io\u002Fdagster) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdagster-io\u002Fdagster.svg?cacheSeconds=86400) - A data orchestrator for machine learning, analytics, and ETL.\n* [DBT](https:\u002F\u002Fgithub.com\u002Fdbt-labs\u002Fdbt-core) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdbt-labs\u002Fdbt-core.svg?cacheSeconds=86400) - ETL tool for running transformations inside data warehouses.\n* [Flyte](https:\u002F\u002Fgithub.com\u002Fflyteorg\u002Fflyte) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fflyteorg\u002Fflyte.svg?cacheSeconds=86400) - Lyft's Cloud Native Machine Learning and Data Processing Platform - [(Demo)](https:\u002F\u002Fyoutu.be\u002FKdUJGSP1h9U?t=1451).\n* [Genie](https:\u002F\u002Fgithub.com\u002FNetflix\u002Fgenie) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FNetflix\u002Fgenie.svg?cacheSeconds=86400) - Job orchestration engine to interface and trigger the execution of jobs from Hadoop-based systems.\n* [Hamilton](https:\u002F\u002Fgithub.com\u002Fdagworks-inc\u002Fhamilton) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdagworks-inc\u002Fhamilton.svg?cacheSeconds=86400) - Hamilton is a micro-orchestration framework for defining dataflows. Runs anywhere python runs (e.g. jupyter, fastAPI, spark, ray, dask). Brings software engineering best practices without you knowing it. Use it to define feature engineering transforms, end-to-end model pipelines, and LLM workflows. It complements macro-orchestration systems (e.g. kedro, luigi, airflow, dbt, etc.) as it replaces the code within those macro tasks. Comes with a self-hostable UI that captures lineage & provenance, execution telemetry & data summaries, and builds a self-populating catalog; usable in development as well as production.\n* [Instill VDP](https:\u002F\u002Fgithub.com\u002Finstill-ai\u002Finstill-core) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Finstill-ai\u002Finstill-core.svg?cacheSeconds=86400) - Instill VDP (Versatile Data Pipeline) aims to streamline the data processing pipelines from inception to completion.\n* [Instructor](https:\u002F\u002Fgithub.com\u002Finstructor-ai\u002Finstructor) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Finstructor-ai\u002Finstructor.svg?cacheSeconds=86400) - Instructor makes it easy to get structured data like JSON from LLMs like GPT-3.5, GPT-4, GPT-4-Vision, and open-source models.\n* [Kedro](https:\u002F\u002Fgithub.com\u002Fkedro-org\u002Fkedro) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fkedro-org\u002Fkedro.svg?cacheSeconds=86400) - Kedro is a workflow development tool that helps you build data pipelines that are robust, scalable, deployable, reproducible and versioned.\n* [Luigi](https:\u002F\u002Fgithub.com\u002Fspotify\u002Fluigi) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fspotify\u002Fluigi.svg?cacheSeconds=86400) - Luigi is a Python module that helps you build complex pipelines of batch jobs, handling dependency resolution, workflow management, visualisation, etc..\n* [Metaflow](https:\u002F\u002Fgithub.com\u002FNetflix\u002Fmetaflow) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FNetflix\u002Fmetaflow.svg?cacheSeconds=86400) - A framework for data scientists to easily build and manage real-life data science projects.\n* [Pachyderm](https:\u002F\u002Fgithub.com\u002Fpachyderm\u002Fpachyderm) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpachyderm\u002Fpachyderm.svg?cacheSeconds=86400) - Open source distributed processing framework build on Kubernetes focused mainly on dynamic building of production machine learning pipelines - [(Video)](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=LamKVhe2RSM).\n* [Ploomber](https:\u002F\u002Fgithub.com\u002Fploomber\u002Fploomber) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fploomber\u002Fploomber.svg?cacheSeconds=86400) - The fastest way to build data pipelines. Develop iteratively, deploy anywhere.\n* [Pixeltable](https:\u002F\u002Fgithub.com\u002Fpixeltable\u002Fpixeltable) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpixeltable\u002Fpixeltable.svg?cacheSeconds=86400) – Open-source Python library providing declarative, incremental data infrastructure for building and managing multimodal AI workloads.\n* [Prefect Core](https:\u002F\u002Fgithub.com\u002FPrefectHQ\u002Fprefect) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FPrefectHQ\u002Fprefect.svg?cacheSeconds=86400) - Workflow management system that makes it easy to take your data pipelines and add semantics like retries, logging, dynamic mapping, caching, failure notifications, and more.\n* [SeqIO](https:\u002F\u002Fgithub.com\u002Fgoogle\u002Fseqio) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle\u002Fseqio.svg?cacheSeconds=86400) - SeqIO is a library for processing sequential data to be fed into downstream sequence models.\n* [Snakemake](https:\u002F\u002Fgithub.com\u002Fsnakemake\u002Fsnakemake) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fsnakemake\u002Fsnakemake.svg?cacheSeconds=86400) - Workflow management system for reproducible and scalable data analyses.\n* [Towhee](https:\u002F\u002Fgithub.com\u002Ftowhee-io\u002Ftowhee) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftowhee-io\u002Ftowhee.svg?cacheSeconds=86400) - General-purpose machine learning pipeline for generating embedding vectors using one or many ML models.\n* [unstructured](https:\u002F\u002Fgithub.com\u002FUnstructured-IO\u002Funstructured) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FUnstructured-IO\u002Funstructured.svg?cacheSeconds=86400) - unstructured streamlines and optimizes the data processing workflow for LLMs, ingesting and pre-processing images and text documents, such as PDFs, HTML, Word docs, and many more. \n* [ZenML](https:\u002F\u002Fgithub.com\u002Fzenml-io\u002Fzenml) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fzenml-io\u002Fzenml.svg?cacheSeconds=86400) - ZenML is an extensible, open-source MLOps framework to create reproducible ML pipelines with a focus on automated metadata tracking, caching, and many integrations to other tools.\n\n## Data Science Notebook\n* [Apache Zeppelin](https:\u002F\u002Fgithub.com\u002Fapache\u002Fzeppelin) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fzeppelin.svg?cacheSeconds=86400) - Web-based notebook that enables data-driven, interactive data analytics and collaborative documents with SQL, Scala and more.\n* [Deepnote](https:\u002F\u002Fgithub.com\u002Fdeepnote\u002Fdeepnote) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdeepnote\u002Fdeepnote.svg?cacheSeconds=86400) - Deepnote is a drop-in replacement for Jupyter with an AI-first design, sleek UI, new blocks, and native data integrations. Use Python, R, and SQL locally in your favorite IDE, then scale to Deepnote cloud for real-time collaboration, Deepnote agent, and deployable data apps.\n* [Jupyter Notebooks](https:\u002F\u002Fgithub.com\u002Fjupyter\u002Fnotebook) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjupyter\u002Fnotebook.svg?cacheSeconds=86400) - Web interface python sandbox environments for reproducible development\n* [Marimo](https:\u002F\u002Fgithub.com\u002Fmarimo-team\u002Fmarimo) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmarimo-team\u002Fmarimo.svg?cacheSeconds=86400) - Reactive Python notebook — run reproducible experiments, execute as a script, deploy as an app, and version with git.\n* [Papermill](https:\u002F\u002Fgithub.com\u002Fnteract\u002Fpapermill) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fnteract\u002Fpapermill.svg?cacheSeconds=86400) - Papermill is a library for parameterizing notebooks and executing them like Python scripts.\n* [Polynote](https:\u002F\u002Fgithub.com\u002Fpolynote\u002Fpolynote) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpolynote\u002Fpolynote.svg?cacheSeconds=86400) - Polynote is an experimental polyglot notebook environment. Currently, it supports Scala and Python (with or without Spark), SQL, and Vega.\n* [RMarkdown](https:\u002F\u002Fgithub.com\u002Frstudio\u002Frmarkdown) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Frstudio\u002Frmarkdown.svg?cacheSeconds=86400) - The rmarkdown package is a next generation implementation of R Markdown based on Pandoc.\n* [Stencila](https:\u002F\u002Fgithub.com\u002Fstencila\u002Fstencila) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fstencila\u002Fstencila.svg?cacheSeconds=86400) - Stencila is a platform for creating, collaborating on, and sharing data driven content. Content that is transparent and reproducible.\n* [Voilà](https:\u002F\u002Fgithub.com\u002Fvoila-dashboards\u002Fvoila) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvoila-dashboards\u002Fvoila.svg?cacheSeconds=86400) - Voilà turns Jupyter notebooks into standalone web applications that can e.g. be used as dashboards.\n* [.NET Interactive](https:\u002F\u002Fgithub.com\u002Fdotnet\u002Finteractive) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdotnet\u002Finteractive.svg?cacheSeconds=86400) - .NET Interactive takes the power of .NET and embeds it into your interactive experiences.\n\n## Data Storage Optimisation\n* [AIStore](https:\u002F\u002Fgithub.com\u002FNVIDIA\u002Faistore) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FNVIDIA\u002Faistore.svg?cacheSeconds=86400) - AIStore is a lightweight object storage system with the capability to linearly scale out with each added storage node and a special focus on petascale deep learning.\n* [Alluxio](https:\u002F\u002Fgithub.com\u002FAlluxio\u002Falluxio) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FAlluxio\u002Falluxio.svg?cacheSeconds=86400) - A virtual distributed storage system that bridges the gab between computation frameworks and storage systems.\n* [Apache Arrow](https:\u002F\u002Fgithub.com\u002Fapache\u002Farrow) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Farrow.svg?cacheSeconds=86400) - In-memory columnar representation of data compatible with Pandas, Hadoop-based systems, etc..\n* [Apache Druid](https:\u002F\u002Fgithub.com\u002Fapache\u002Fdruid) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fdruid.svg?cacheSeconds=86400) - A high performance real-time analytics database. Check this [article](https:\u002F\u002Ftowardsdatascience.com\u002Fintroduction-to-druid-4bf285b92b5a) for introduction.\n* [Apache Hudi](https:\u002F\u002Fgithub.com\u002Fapache\u002Fhudi) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fhudi.svg?cacheSeconds=86400) - Hudi is a transactional data lake platform that brings core warehouse and database functionality directly to a data lake. Hudi is great for streaming workloads, and also allows creation of efficient incremental batch pipelines. Supports popular query engines including Spark, Flink, Presto, Trino, Hive, etc. More info [here](https:\u002F\u002Fhudi.apache.org\u002F).\n* [Apache Iceberg](https:\u002F\u002Fgithub.com\u002Fapache\u002Ficeberg) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Ficeberg.svg?cacheSeconds=86400) - Iceberg is an ACID-compliant, high-performance format built for huge analytic tables (containing tens of petabytes of data), and it brings the reliability and simplicity of SQL tables to big data, while making it possible for engines like Spark, Trino, Flink, Presto, Hive and Impala to safely work with the same tables, at the same time. More info [here](https:\u002F\u002Ficeberg.apache.org\u002F).\n* [Apache Ignite](https:\u002F\u002Fgithub.com\u002Fapache\u002Fignite) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fignite.svg?cacheSeconds=86400) - A memory-centric distributed database, caching, and processing platform for transactional, analytical, and streaming workloads delivering in-memory speeds at petabyte scale - [Demo](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=Xt4PWQ__YPw).\n* [Apache Parquet](https:\u002F\u002Fgithub.com\u002Fapache\u002Fparquet-java) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fparquet-java.svg?cacheSeconds=86400) - On-disk columnar representation of data compatible with Pandas, Hadoop-based systems, etc..\n* [Apache Pinot](https:\u002F\u002Fgithub.com\u002Fapache\u002Fpinot) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fpinot.svg?cacheSeconds=86400) - A realtime distributed OLAP datastore. Comparison of the open source OLAP systems for big data: ClickHouse, Druid, and Pinot is found [here](https:\u002F\u002Fmedium.com\u002F@leventov\u002Fcomparison-of-the-open-source-olap-systems-for-big-data-clickhouse-druid-and-pinot-8e042a5ed1c7).\n* [Casibase](https:\u002F\u002Fgithub.com\u002Fcasibase\u002Fcasibase) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcasibase\u002Fcasibase.svg?cacheSeconds=86400) - Casibase is a LangChain-like RAG (Retrieval-Augmented Generation) knowledge database with web UI and Enterprise SSO.\n* [Chroma](https:\u002F\u002Fgithub.com\u002Fchroma-core\u002Fchroma) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fchroma-core\u002Fchroma.svg?cacheSeconds=86400) - Chroma is an open-source embedding database.\n* [ClickHouse](https:\u002F\u002Fgithub.com\u002FClickHouse\u002FClickHouse) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FClickHouse\u002FClickHouse.svg?cacheSeconds=86400) - ClickHouse is an open source column oriented database management system.\n* [Delta Lake](https:\u002F\u002Fgithub.com\u002Fdelta-io\u002Fdelta) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdelta-io\u002Fdelta.svg?cacheSeconds=86400) - Delta Lake is a storage layer that brings scalable, ACID transactions to Apache Spark and other big-data engines.\n* [EdgeDB](https:\u002F\u002Fgithub.com\u002Fgeldata\u002Fgel) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgeldata\u002Fgel.svg?cacheSeconds=86400) - Gel supercharges Postgres with a modern data model, graph queries, Auth & AI solutions, and much more.\n* [GPTCache](https:\u002F\u002Fgithub.com\u002Fzilliztech\u002FGPTCache) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fzilliztech\u002FGPTCache.svg?cacheSeconds=86400) - GPTCache is a library for creating semantic cache for large language model queries.\n* [InfluxDB](https:\u002F\u002Fgithub.com\u002Finfluxdata\u002Finfluxdb) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Finfluxdata\u002Finfluxdb.svg?cacheSeconds=86400) Scalable datastore for metrics, events, and real-time analytics.\n* [Milvus](https:\u002F\u002Fgithub.com\u002Fmilvus-io\u002Fmilvus) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmilvus-io\u002Fmilvus.svg?cacheSeconds=86400) Milvus is a cloud-native, open-source vector database built to manage embedding vectors generated by machine learning models and neural networks.\n* [Marqo](https:\u002F\u002Fgithub.com\u002Fmarqo-ai\u002Fmarqo) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmarqo-ai\u002Fmarqo.svg?cacheSeconds=86400) Marqo is an end-to-end vector search engine.\n* [pgvector](https:\u002F\u002Fgithub.com\u002Fpgvector\u002Fpgvector) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpgvector\u002Fpgvector.svg?cacheSeconds=86400) pgvector helps with vector similarity search for Postgres.\n* [PostgresML](https:\u002F\u002Fgithub.com\u002Fpostgresml\u002Fpostgresml) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpostgresml\u002Fpostgresml.svg?cacheSeconds=86400) PostgresML is a machine learning extension for PostgreSQL that enables you to perform training and inference on text and tabular data using SQL queries.\n* [Redis](https:\u002F\u002Fgithub.com\u002Fredis\u002Fredis) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fredis\u002Fredis.svg?cacheSeconds=86400) Redis is an open-source, in-memory data store that supports vector similarity search, making it suitable for AI\u002FML applications such as semantic search and recommendation systems.\n* [Safetensors](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fsafetensors) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Fsafetensors.svg?cacheSeconds=86400) Simple, safe way to store and distribute tensors.\n* [TimescaleDB](https:\u002F\u002Fgithub.com\u002Ftimescale\u002Ftimescaledb) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftimescale\u002Ftimescaledb.svg?cacheSeconds=86400) An open-source time-series SQL database optimized for fast ingest and complex queries packaged as a PostgreSQL extension - [(Video)](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=zbjub8BQPyE).\n* [Weaviate](https:\u002F\u002Fgithub.com\u002Fweaviate\u002Fweaviate) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fweaviate\u002Fweaviate.svg?cacheSeconds=86400) - A low-latency vector search engine (GraphQL, RESTful) with out-of-the-box support for different media types. Modules include Semantic Search, Q&A, Classification, Customizable Models (PyTorch\u002FTensorFlow\u002FKeras), and more.\n* [Zarr](https:\u002F\u002Fgithub.com\u002Fzarr-developers\u002Fzarr-python) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fzarr-developers\u002Fzarr-python.svg?cacheSeconds=86400) - Python implementation of chunked, compressed, N-dimensional arrays designed for use in parallel computing.\n\n## Data Stream Processing\n* [Apache Beam](https:\u002F\u002Fgithub.com\u002Fapache\u002Fbeam) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fbeam.svg?cacheSeconds=86400) Apache Beam is a unified programming model for Batch and Streaming.\n* [Apache Flink](https:\u002F\u002Fgithub.com\u002Fapache\u002Fflink) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fflink.svg?cacheSeconds=86400) - Open source stream processing framework with powerful stream and batch processing capabilities.\n* [Apache Kafka](https:\u002F\u002Fgithub.com\u002Fapache\u002Fkafka) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fkafka.svg?cacheSeconds=86400) - Kafka client library for building applications and microservices where the input and output are stored in kafka clusters.\n* [Apache Samza](https:\u002F\u002Fgithub.com\u002Fapache\u002Fsamza) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fsamza.svg?cacheSeconds=86400) - Distributed stream processing framework. It uses Apache Kafka for messaging, and Apache Hadoop YARN to provide fault tolerance, processor isolation, security, and resource management.\n* [Apache Spark](https:\u002F\u002Fgithub.com\u002Fapache\u002Fspark) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fspark.svg?cacheSeconds=86400) - Micro-batch processing for streams using the apache spark framework as a backend supporting stateful exactly-once semantics.\n* [Bytewax](https:\u002F\u002Fgithub.com\u002Fbytewax\u002Fbytewax) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fbytewax\u002Fbytewax.svg?cacheSeconds=86400) - Flexible Python-centric stateful stream processing framework built on top of Rust engine.\n* [FastStream](https:\u002F\u002Fgithub.com\u002Fairtai\u002Ffaststream) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fairtai\u002Ffaststream.svg?cacheSeconds=86400) - A modern broker-agnostic streaming Python framework supporting Apache Kafka, RabbitMQ and NATS protocols, inspired by FastAPI and easily integratable with other web frameworks.\n* [MOA](https:\u002F\u002Fgithub.com\u002FWaikato\u002Fmoa) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FWaikato\u002Fmoa.svg?cacheSeconds=86400) - MOA (Massive Online Analysis) is an open source framework for Big Data stream mining.\n* [MosaicML Streaming](https:\u002F\u002Fgithub.com\u002Fmosaicml\u002Fstreaming) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmosaicml\u002Fstreaming.svg?cacheSeconds=86400) - Fast, deterministic streaming of large datasets from cloud storage for distributed model training.\n* [RisingWave](https:\u002F\u002Fgithub.com\u002Frisingwavelabs\u002Frisingwave) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Frisingwavelabs\u002Frisingwave.svg?cacheSeconds=86400) - A distributed SQL streaming database that unifies stream processing and low-latency serving, ideal for building and serving features for online machine learning.\n* [TensorStore](https:\u002F\u002Fgithub.com\u002Fgoogle\u002Ftensorstore) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle\u002Ftensorstore.svg?cacheSeconds=86400) - Library for reading and writing large multi-dimensional arrays.\n\n\n## Deployment and Serving\n* [Agenta](https:\u002F\u002Fgithub.com\u002FAgenta-AI\u002Fagenta) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FAgenta-AI\u002Fagenta.svg?cacheSeconds=86400) - Agenta provides end-to-end tools for the entire LLMOps workflow: building (LLM playground, evaluation), deploying (prompt and configuration management), and  (LLM observability and tracing).\n* [AirLLM](https:\u002F\u002Fgithub.com\u002Flyogavin\u002Fairllm) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flyogavin\u002Fairllm.svg?cacheSeconds=86400) - AirLLM optimizes inference memory usage, allowing 70B large language models to run inference on a single 4GB GPU card without quantization, distillation and pruning.\n* [AITemplate](https:\u002F\u002Fgithub.com\u002Ffacebookincubator\u002FAITemplate) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffacebookincubator\u002FAITemplate.svg?cacheSeconds=86400) - AITemplate (AIT) is a Python framework that transforms deep neural networks into CUDA (NVIDIA GPU) \u002F HIP (AMD GPU) C++ code for lightning-fast inference serving.\n* [BentoML](https:\u002F\u002Fgithub.com\u002Fbentoml\u002FBentoML) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fbentoml\u002FBentoML.svg?cacheSeconds=86400) - BentoML is an open source framework for high performance ML model serving.\n* [BISHENG](https:\u002F\u002Fgithub.com\u002Fdataelement\u002Fbisheng) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdataelement\u002Fbisheng.svg?cacheSeconds=86400) - BISHENG is an open LLM application devops platform, focusing on enterprise scenarios.\n* [DeepDetect](https:\u002F\u002Fgithub.com\u002Fjolibrain\u002Fdeepdetect) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjolibrain\u002Fdeepdetect.svg?cacheSeconds=86400) - Machine Learning production server for TensorFlow, XGBoost and Cafe models written in C++ and maintained by Jolibrain.\n* [Dynamo](https:\u002F\u002Fgithub.com\u002Fai-dynamo\u002Fdynamo) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fai-dynamo\u002Fdynamo.svg?cacheSeconds=86400) - NVIDIA Dynamo is a high-throughput, low-latency inference framework designed for serving generative AI and reasoning models in multi-node distributed environments.\n* [exo](https:\u002F\u002Fgithub.com\u002Fexo-explore\u002Fexo) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fexo-explore\u002Fexo.svg?cacheSeconds=86400) - exo helps you run your AI cluster at home with everyday devices.\n* [Genkit](https:\u002F\u002Fgithub.com\u002Ffirebase\u002Fgenkit) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffirebase\u002Fgenkit.svg?cacheSeconds=86400) - Genkit is an open source framework for building AI-powered apps with familiar code-centric patterns. Genkit makes it easy to develop, integrate, and test AI features with observability and evaluations.\n* [Inference](https:\u002F\u002Fgithub.com\u002Froboflow\u002Finference) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Froboflow\u002Finference.svg?cacheSeconds=86400) - A fast, production-ready inference server for computer vision supporting deployment of many popular model architectures and fine-tuned models. With Inference, you can deploy models such as YOLOv5, YOLOv8, CLIP, SAM, and CogVLM on your own hardware using Docker.\n* [Infinity](https:\u002F\u002Fgithub.com\u002Fmichaelfeil\u002Finfinity) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmichaelfeil\u002Finfinity.svg?cacheSeconds=86400) - Infinity is a high-throughput, low-latency REST API for serving text-embeddings, reranking models and clip. \n* [IPEX-LLM](https:\u002F\u002Fgithub.com\u002Fintel\u002Fipex-llm) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fintel\u002Fipex-llm.svg?cacheSeconds=86400) - IPEX-LLM is a PyTorch library for running LLM on Intel CPU and GPU (e.g., local PC with iGPU, discrete GPU such as Arc, Flex and Max) with very low latency.\n* [LiteLLM](https:\u002F\u002Fgithub.com\u002FBerriAI\u002Flitellm) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FBerriAI\u002Flitellm.svg?cacheSeconds=86400) - LiteLLM is a Python SDK, Proxy Server (LLM Gateway) to call 100+ LLM APIs in OpenAI format - Bedrock, Azure, OpenAI, VertexAI, Cohere, Anthropic, Sagemaker, HuggingFace, Replicate, Groq.\n* [LitServe](https:\u002F\u002Fgithub.com\u002FLightning-AI\u002FLitServe) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FLightning-AI\u002FLitServe.svg?cacheSeconds=86400) - LitServe is a flexible serving engine for AI models built on FastAPI. It supports custom inference engines for models, agents, multi-modal systems, RAG, and complex ML pipelines.\n* [Jina-serve](https:\u002F\u002Fgithub.com\u002Fjina-ai\u002Fserve) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjina-ai\u002Fserve.svg?cacheSeconds=86400) - Jina-serve is a framework for building and deploying AI services that communicate via gRPC, HTTP and WebSockets.\n* [Kiln](https:\u002F\u002Fgithub.com\u002Fkiln-ai\u002Fkiln) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fkiln-ai\u002Fkiln.svg?cacheSeconds=86400) - Kiln is an OSS tool for fine-tuning LLM models, synthetic data generation, and collaborating on datasets.\n* [KServe](https:\u002F\u002Fgithub.com\u002Fkserve\u002Fkserve) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fkserve\u002Fkserve.svg?cacheSeconds=86400) - KServe provides a Kubernetes Custom Resource Definition for serving predictive and generative ML.\n* [KTransformers](https:\u002F\u002Fgithub.com\u002Fkvcache-ai\u002Fktransformers) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fkvcache-ai\u002Fktransformers.svg?cacheSeconds=86400) - KTransformers is a flexible framework for experiencing cutting-edge LLM inference optimizations.\n* [Langtrace](https:\u002F\u002Fgithub.com\u002FScale3-Labs\u002Flangtrace) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FScale3-Labs\u002Flangtrace.svg?cacheSeconds=86400) - Langtrace is an open-source, Open Telemetry based end-to-end observability tool for LLM applications, providing real-time tracing, evaluations and metrics for popular LLMs, LLM frameworks, vectorDBs and more.\n* [Lepton AI](https:\u002F\u002Fgithub.com\u002Fleptonai\u002Fleptonai) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fleptonai\u002Fleptonai.svg?cacheSeconds=86400) - LeptonAI Python library allows you to build an AI service from Python code with ease.\n* [LightLLM](https:\u002F\u002Fgithub.com\u002FModelTC\u002Flightllm) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FModelTC\u002Flightllm.svg?cacheSeconds=86400) - LightLLM is a Python-based LLM (Large Language Model) inference and serving framework, notable for its lightweight design, easy scalability, and high-speed performance.\n* [llama.cpp](https:\u002F\u002Fgithub.com\u002Fggml-org\u002Fllama.cpp) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fggml-org\u002Fllama.cpp.svg?cacheSeconds=86400) - llama.cpp is an open source software library that performs inference on various large language models such as Llama.\n* [llmfit](https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FAlexsJones\u002Fllmfit.svg?cacheSeconds=86400) - A terminal tool that right-sizes LLM models to your system's RAM, CPU, and GPU. Detects your hardware, scores each model across quality, speed, fit, and context dimensions, and tells you which ones will actually run well on your machine.\n* [LMDeploy](https:\u002F\u002Fgithub.com\u002FInternLM\u002Flmdeploy) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FInternLM\u002Flmdeploy.svg?cacheSeconds=86400) - LMDeploy is a toolkit for compressing, deploying, and serving LLM.\n* [LM Studio](https:\u002F\u002Fgithub.com\u002Flmstudio-ai\u002Flms) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flmstudio-ai\u002Flms.svg?cacheSeconds=86400) - LM Studio is a tool for deploying LLM models locally on the computer, even on a relatively modest machine, provided it meets the minimum requirements.\n* [LocalAI](https:\u002F\u002Fgithub.com\u002Fmudler\u002FLocalAI) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmudler\u002FLocalAI.svg?cacheSeconds=86400) - LocalAI is a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing.\n* [MindsDB](https:\u002F\u002Fgithub.com\u002Fmindsdb\u002Fmindsdb) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmindsdb\u002Fmindsdb.svg?cacheSeconds=86400) - MindsDB is the platform to create, serve, and fine-tune models in real-time from your database, vector store, and application data.\n* [mini-sglang](https:\u002F\u002Fgithub.com\u002Fsgl-project\u002Fmini-sglang) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fsgl-project\u002Fmini-sglang.svg?cacheSeconds=86400) - mini-sglang is a lightweight and efficient serving framework for large language models.\n* [MLRun](https:\u002F\u002Fgithub.com\u002Fmlrun\u002Fmlrun)![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmlrun\u002Fmlrun.svg?cacheSeconds=86400)- MLRun is an open MLOps framework for quickly building and managing continuous ML and generative AI applications across their lifecycle.\n* [MLServer](https:\u002F\u002Fgithub.com\u002FSeldonIO\u002Fmlserver) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FSeldonIO\u002Fmlserver.svg?cacheSeconds=86400) - An inference server for your machine learning models, including support for multiple frameworks, multi-model serving and more.\n* [Model Runner](https:\u002F\u002Fgithub.com\u002Fdocker\u002Fmodel-runner) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdocker\u002Fmodel-runner.svg?cacheSeconds=86400) - Docker Model Runner makes it easy to manage, run, and serve AI models using Docker, supporting LLMs and other AI models pulled directly from Docker Hub or any OCI-compliant registry.\n* [Mosec](https:\u002F\u002Fgithub.com\u002Fmosecorg\u002Fmosec) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmosecorg\u002Fmosec.svg?cacheSeconds=86400) - A rust-powered and multi-stage pipelined model server which offers dynamic batching and more. Super easy to implement and deploy as micro-services.\n* [nano-vllm](https:\u002F\u002Fgithub.com\u002FGeeeekExplorer\u002Fnano-vllm) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FGeeeekExplorer\u002Fnano-vllm.svg?cacheSeconds=86400) - nano-vllm is a lightweight vLLM implementation built from scratch, providing fast offline inference with optimization techniques such as prefix caching, tensor parallelism, and CUDA graph.\n* [nndeploy](https:\u002F\u002Fgithub.com\u002Fnndeploy\u002Fnndeploy) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fnndeploy\u002Fnndeploy.svg?cacheSeconds=86400) - An Easy-to-Use and High-Performance AI deployment framework.\n* [Nuclio](https:\u002F\u002Fgithub.com\u002Fnuclio\u002Fnuclio) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fnuclio\u002Fnuclio.svg?cacheSeconds=86400) - A high-performance \"serverless\" framework focused on data, I\u002FO, and compute-intensive workloads. It is well integrated with popular data science tools, such as Jupyter and Kubeflow; supports a variety of data and streaming sources; and supports execution over CPUs and GPUs.\n* [OpenLLM](https:\u002F\u002Fgithub.com\u002Fbentoml\u002FOpenLLM) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fbentoml\u002FOpenLLM.svg?cacheSeconds=86400) - OpenLLM allows developers to run any open-source LLMs (Llama 3.1, Qwen2, Phi3 and more) or custom models as OpenAI-compatible APIs with a single command.\n* [OpenVINO](https:\u002F\u002Fgithub.com\u002Fopenvinotoolkit\u002Fopenvino) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fopenvinotoolkit\u002Fopenvino.svg?cacheSeconds=86400) - OpenVINO is an open-source toolkit for optimizing and deploying AI inference.\n* [Open WebUI](https:\u002F\u002Fgithub.com\u002Fopen-webui\u002Fopen-webui) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fopen-webui\u002Fopen-webui.svg?cacheSeconds=86400) - Open WebUI is an extensible, feature-rich, and user-friendly self-hosted AI platform designed to operate entirely offline. It supports various LLM runners like Ollama and OpenAI-compatible APIs, with built-in inference engine for RAG, making it a powerful AI deployment solution.\n* [OptiLLM](https:\u002F\u002Fgithub.com\u002Falgorithmicsuperintelligence\u002Foptillm) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Falgorithmicsuperintelligence\u002Foptillm.svg?cacheSeconds=86400) - OptiLLM is an OpenAI API-compatible optimizing inference proxy that implements 20+ state-of-the-art techniques to dramatically improve LLM accuracy and performance on reasoning tasks - without requiring any model training or fine-tuning.\n* [PowerInfer](https:\u002F\u002Fgithub.com\u002FSJTU-IPADS\u002FPowerInfer) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FSJTU-IPADS\u002FPowerInfer.svg?cacheSeconds=86400) - PowerInfer is a CPU\u002FGPU LLM inference engine leveraging activation locality for your device.\n* [Prompt2Model](https:\u002F\u002Fgithub.com\u002Fneulab\u002Fprompt2model) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fneulab\u002Fprompt2model.svg?cacheSeconds=86400) - Prompt2Model is a system that takes a natural language task description (like the prompts used for LLMs such as ChatGPT) to train a small special-purpose model that is conducive for deployment.\n* [RamaLama](https:\u002F\u002Fgithub.com\u002Fcontainers\u002Framalama) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcontainers\u002Framalama.svg?cacheSeconds=86400) - RamaLama is an open-source tool that simplifies the local use and serving of AI models for inference through OCI containers, eliminating the need to configure the host system.\n* [RunAnywhere](https:\u002F\u002Fgithub.com\u002FRunanywhereAI\u002Frunanywhere-sdks) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FRunanywhereAI\u002Frunanywhere-sdks.svg?cacheSeconds=86400) - RunAnywhere is a production-ready SDK for running AI models (LLMs, speech-to-text, text-to-speech) on-device for iOS, Android, React Native, and Flutter - enabling private, offline, and fast mobile AI applications.\n* [Seldon Core](https:\u002F\u002Fgithub.com\u002FSeldonIO\u002Fseldon-core) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FSeldonIO\u002Fseldon-core.svg?cacheSeconds=86400) - Open source platform for deploying and  machine learning models in Kubernetes - [(Video)](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=pDlapGtecbY).\n* [SGLang](https:\u002F\u002Fgithub.com\u002Fsgl-project\u002Fsglang) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fsgl-project\u002Fsglang.svg?cacheSeconds=86400) - SGLang is a fast serving framework for large language models and vision language models.\n* [SkyPilot](https:\u002F\u002Fgithub.com\u002Fskypilot-org\u002Fskypilot) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fskypilot-org\u002Fskypilot.svg?cacheSeconds=86400) - SkyPilot is a framework for running LLMs, AI, and batch jobs on any cloud, offering maximum cost savings, highest GPU availability, and managed execution.\n* [Tensorflow Serving](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Fserving) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftensorflow\u002Fserving.svg?cacheSeconds=86400) - High-performant framework to serve Tensorflow models via grpc protocol able to handle 100k requests per second per core.\n* [text-generation-inference](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Ftext-generation-inference) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Ftext-generation-inference.svg?cacheSeconds=86400) - Large Language Model Text Generation Inference.\n* [TorchServe](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Fserve) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpytorch\u002Fserve.svg?cacheSeconds=86400) - TorchServe is a flexible and easy to use tool for serving PyTorch models.\n* [torchtune](https:\u002F\u002Fgithub.com\u002Fmeta-pytorch\u002Ftorchtune) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmeta-pytorch\u002Ftorchtune.svg?cacheSeconds=86400) - torchtune is a PyTorch library for easily authoring, post-training, and experimenting with LLMs.\n* [Transformer Lab](https:\u002F\u002Fgithub.com\u002Ftransformerlab\u002Ftransformerlab-app) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftransformerlab\u002Ftransformerlab-app.svg?cacheSeconds=86400) - Transformer Lab is an open-source LLM workspace for finetuning, evaluating, exporting, and testing models locally across inference engines and platforms.\n* [Triton Inference Server](https:\u002F\u002Fgithub.com\u002Ftriton-inference-server\u002Fserver) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftriton-inference-server\u002Fserver.svg?cacheSeconds=86400) - Triton is a high performance open source serving software to deploy AI models from any framework on GPU & CPU while maximizing utilization.\n* [Vercel AI](https:\u002F\u002Fgithub.com\u002Fvercel\u002Fai) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvercel\u002Fai.svg?cacheSeconds=86400) - Vercel AI is a TypeScript toolkit designed to help you build AI-powered applications using popular frameworks like Next.js, React, Svelte, Vue and runtimes like Node.js.\n* [Vespa](https:\u002F\u002Fgithub.com\u002Fvespa-engine\u002Fvespa) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvespa-engine\u002Fvespa.svg?cacheSeconds=86400) - Search, make inferences in and organize vectors, tensors, text and structured data, at serving time and any scale.\n* [vLLM](https:\u002F\u002Fgithub.com\u002Fvllm-project\u002Fvllm) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvllm-project\u002Fvllm.svg?cacheSeconds=86400) - vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs.\n\n\n## Evaluation and Monitoring\n* [AlpacaEval](https:\u002F\u002Fgithub.com\u002Ftatsu-lab\u002Falpaca_eval) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftatsu-lab\u002Falpaca_eval.svg?cacheSeconds=86400) - AlpacaEval is an automatic evaluator for instruction-following language models.\n* [ANN-Benchmarks](https:\u002F\u002Fgithub.com\u002Ferikbern\u002Fann-benchmarks) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ferikbern\u002Fann-benchmarks.svg?cacheSeconds=86400) - ANN-Benchmarks is a benchmarking environment for approximate nearest neighbor algorithms search.\n* [ARES](https:\u002F\u002Fgithub.com\u002Fstanford-futuredata\u002FARES) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fstanford-futuredata\u002FARES.svg?cacheSeconds=86400) - ARES is a framework for automatically evaluating Retrieval-Augmented Generation (RAG) models.\n* [BEIR](https:\u002F\u002Fgithub.com\u002Fbeir-cellar\u002Fbeir) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fbeir-cellar\u002Fbeir.svg?cacheSeconds=86400) - BEIR is a heterogeneous benchmark containing diverse IR tasks. It also provides a common and easy framework for evaluation of your NLP-based retrieval models within the benchmark.\n* [Code Generation LM Evaluation Harness](https:\u002F\u002Fgithub.com\u002Fbigcode-project\u002Fbigcode-evaluation-harness) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fbigcode-project\u002Fbigcode-evaluation-harness.svg?cacheSeconds=86400) - Code Generation LM Evaluation Harness is a framework for the evaluation of code generation models.\n* [COMET](https:\u002F\u002Fgithub.com\u002FUnbabel\u002FCOMET) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FUnbabel\u002FCOMET.svg?cacheSeconds=86400) - COMET is an open-source framework for machine learning evaluation.\n* [C-Eval](https:\u002F\u002Fgithub.com\u002Fhkust-nlp\u002Fceval) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhkust-nlp\u002Fceval.svg?cacheSeconds=86400) - C-Eval is a comprehensive Chinese evaluation suite for foundation models.\n* [Deepchecks](https:\u002F\u002Fgithub.com\u002Fdeepchecks\u002Fdeepchecks) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdeepchecks\u002Fdeepchecks.svg?cacheSeconds=86400) - Deepchecks is a holistic open-source solution for all of your AI & ML validation needs, enabling you to test your data and models from research to production thoroughly.\n* [DeepEval](https:\u002F\u002Fgithub.com\u002Fconfident-ai\u002Fdeepeval) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fconfident-ai\u002Fdeepeval.svg?cacheSeconds=86400) - DeepEval is a simple-to-use, open-source evaluation framework for LLM applications.\n* [DomainBed](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002FDomainBed) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffacebookresearch\u002FDomainBed.svg?cacheSeconds=86400) - DomainBed is a test suite containing benchmark datasets and algorithms for domain generalization\n* [EvalAI](https:\u002F\u002Fgithub.com\u002FCloud-CV\u002FEvalAI) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FCloud-CV\u002FEvalAI.svg?cacheSeconds=86400) - EvalAI is an open-source platform for evaluating and comparing AI algorithms at scale.\n* [Evalchemy](https:\u002F\u002Fgithub.com\u002Fmlfoundations\u002Fevalchemy) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmlfoundations\u002Fevalchemy.svg?cacheSeconds=86400) - Evalchemy is a unified and easy-to-use toolkit for evaluating post-trained language models.\n* [EvalPlus](https:\u002F\u002Fgithub.com\u002Fevalplus\u002Fevalplus) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fevalplus\u002Fevalplus.svg?cacheSeconds=86400) - EvalPlus is a robust evaluation framework for LLM4Code, featuring expanded HumanEval+ and MBPP+ benchmarks, efficiency assessment (EvalPerf), and a secure, extensible evaluation toolkit.\n* [Evals](https:\u002F\u002Fgithub.com\u002Fopenai\u002Fevals) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fopenai\u002Fevals.svg?cacheSeconds=86400) - Evals is a framework for evaluating OpenAI models and an open-source registry of benchmarks.\n* [EvalScope](https:\u002F\u002Fgithub.com\u002Fmodelscope\u002Fevalscope) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmodelscope\u002Fevalscope.svg?cacheSeconds=86400) - EvalScope is a streamlined and customizable framework for efficient large model evaluation and performance benchmarking.\n* [Evaluate](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fevaluate) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Fevaluate.svg?cacheSeconds=86400) - Evaluate is a library that makes evaluating and comparing models and reporting their performance easier and more standardized.\n* [Evidently](https:\u002F\u002Fgithub.com\u002Fevidentlyai\u002Fevidently) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fevidentlyai\u002Fevidently.svg?cacheSeconds=86400) - Evidently is an open-source framework to evaluate, test and monitor ML and LLM-powered systems.\n* [GAOKAO-Bench](https:\u002F\u002Fgithub.com\u002FOpenLMLab\u002FGAOKAO-Bench) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FOpenLMLab\u002FGAOKAO-Bench.svg?cacheSeconds=86400) - GAOKAO-Bench is an evaluation framework that uses Chinese National College Entrance Examination (GAOKAO) questions as a dataset to assess large models' language comprehension and logical reasoning abilities.\n* [Giskard](https:\u002F\u002Fgithub.com\u002FGiskard-AI\u002Fgiskard)![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FGiskard-AI\u002Fgiskard.svg?cacheSeconds=86400) - Giskard is an open-source Python library that automatically detects performance, bias & security issues in AI applications.\n* [guidellm](https:\u002F\u002Fgithub.com\u002Fvllm-project\u002Fguidellm) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvllm-project\u002Fguidellm.svg?cacheSeconds=86400) - guidellm is a benchmarking and performance evaluation tool for large language model inference systems.\n* [HumanEval](https:\u002F\u002Fgithub.com\u002Fopenai\u002Fhuman-eval)![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fopenai\u002Fhuman-eval.svg?cacheSeconds=86400) - HumanEval is a benchmark for evaluating the functional correctness of code generation models using Python programming problems with unit tests.\n* [Helicone](https:\u002F\u002Fgithub.com\u002FHelicone\u002Fhelicone) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FHelicone\u002Fhelicone.svg?cacheSeconds=86400) - Helicone is the all-in-one, open-source LLM developer platform.\n* [HELM](https:\u002F\u002Fgithub.com\u002Fstanford-crfm\u002Fhelm) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fstanford-crfm\u002Fhelm.svg?cacheSeconds=86400) - HELM (Holistic Evaluation of Language Models) provides tools for the holistic evaluation of language models, including standardized datasets, a unified API for various models, diverse metrics, r, and fairness perturbations, a prompt construction framework, and a proxy server for unified model access.\n* [Inspect](https:\u002F\u002Fgithub.com\u002FUKGovernmentBEIS\u002Finspect_ai) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FUKGovernmentBEIS\u002Finspect_ai.svg?cacheSeconds=86400) - Inspect is a framework for large language model evaluations.\n* [JiWER](https:\u002F\u002Fgithub.com\u002Fjitsi\u002Fjiwer) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjitsi\u002Fjiwer.svg?cacheSeconds=86400) - JiWER is a simple and fast python package to evaluate an automatic speech recognition system. \n* [Laminar](https:\u002F\u002Fgithub.com\u002Flmnr-ai\u002Flmnr) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flmnr-ai\u002Flmnr.svg?cacheSeconds=86400) - Laminar is an open-source platform to trace, evaluate, label, and analyze LLM data for AI products.\n* [Langfuse](https:\u002F\u002Fgithub.com\u002Flangfuse\u002Flangfuse) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flangfuse\u002Flangfuse.svg?cacheSeconds=86400) - Langfuse is an observability & analytics solution for LLM-based applications.\n* [LangTest](https:\u002F\u002Fgithub.com\u002FJohnSnowLabs\u002Flangtest) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FJohnSnowLabs\u002Flangtest.svg?cacheSeconds=86400) - LangTest is a comprehensive evaluation toolkit for NLP models.\n* [Language Model Evaluation Harness](https:\u002F\u002Fgithub.com\u002FEleutherAI\u002Flm-evaluation-harness) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FEleutherAI\u002Flm-evaluation-harness.svg?cacheSeconds=86400) - Language Model Evaluation Harness is a framework to test generative language models on a large number of different evaluation tasks.\n* [LangWatch](https:\u002F\u002Fgithub.com\u002Flangwatch\u002Flangwatch) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flangwatch\u002Flangwatch.svg?cacheSeconds=86400) - LangWatch is a visual interface for DSPy and a complete LLM Ops platform for monitoring, experimenting, measuring and improving LLM pipelines, with a fair-code distribution model.\n* [LightEval](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Flighteval) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Flighteval.svg?cacheSeconds=86400) - LightEval is a lightweight LLM evaluation suite.\n* [LLMPerf](https:\u002F\u002Fgithub.com\u002Fray-project\u002Fllmperf) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fray-project\u002Fllmperf.svg?cacheSeconds=86400) - LLMPerf is a tool for evaluating the performance of LLM APIs.\n* [lmms-eval](https:\u002F\u002Fgithub.com\u002FEvolvingLMMs-Lab\u002Flmms-eval) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FEvolvingLMMs-Lab\u002Flmms-eval.svg?cacheSeconds=86400) - lmms-eval is an evaluation framework meticulously crafted for consistent and efficient evaluation of LMM.\n* [Melting Pot](https:\u002F\u002Fgithub.com\u002Fgoogle-deepmind\u002Fmeltingpot) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle-deepmind\u002Fmeltingpot.svg?cacheSeconds=86400) - Melting Pot is a suite of test scenarios for multi-agent reinforcement learning.\n* [Meta-World](https:\u002F\u002Fgithub.com\u002FFarama-Foundation\u002FMetaworld) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FFarama-Foundation\u002FMetaworld.svg?cacheSeconds=86400) - Meta-World is an open-source simulated benchmark for meta-reinforcement learning and multi-task learning consisting of 50 distinct robotic manipulation tasks.\n* [mir_eval](https:\u002F\u002Fgithub.com\u002Fmir-evaluation\u002Fmir_eval) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmir-evaluation\u002Fmir_eval.svg?cacheSeconds=86400) - mir_eval is a Python library which provides a transparent, standardized, and straightforward way to evaluate Music Information Retrieval systems.\n* [MLPerf Inference](https:\u002F\u002Fgithub.com\u002Fmlcommons\u002Finference) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmlcommons\u002Finference.svg?cacheSeconds=86400) - MLPerf Inference is a benchmark suite for measuring how fast systems can run models in a variety of deployment scenarios.\n* [Massive Text Embedding Benchmark](https:\u002F\u002Fgithub.com\u002Fmlcommons\u002Finference) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmlcommons\u002Finference.svg?cacheSeconds=86400) - Massive Text Embedding Benchmark (MTEB) is a comprehensive evaluation framework that assesses the performance of text embedding models across diverse tasks and languages, encompassing 8 embedding tasks, 58 datasets, and 112 languages.\n* [NannyML](https:\u002F\u002Fgithub.com\u002FNannyML\u002Fnannyml) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FNannyML\u002Fnannyml.svg?cacheSeconds=86400) - NannyML is a library that allows you to estimate post-deployment model performance (without access to targets), detect data drift, and intelligently link data drift alerts back to changes in model performance.\n* [OGB](https:\u002F\u002Fgithub.com\u002Fsnap-stanford\u002Fogb) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fsnap-stanford\u002Fogb.svg?cacheSeconds=86400) - The Open Graph Benchmark (OGB) is a collection of benchmark datasets, data loaders, and evaluators for graph machine learning.\n* [Ollama Grid Search](https:\u002F\u002Fgithub.com\u002Fdezoito\u002Follama-grid-search) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdezoito\u002Follama-grid-search.svg?cacheSeconds=86400) - Ollama Grid Search automates the process of selecting the best models, prompts, or inference parameters for a given use-case, allowing you to iterate over their combinations and to visually inspect the results.\n* [OpenCompass](https:\u002F\u002Fgithub.com\u002Fopen-compass\u002FOpenCompass) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fopen-compass\u002FOpenCompass.svg?cacheSeconds=86400) - OpenCompass is an LLM evaluation platform, supporting a wide range of models (LLaMA, LLaMa2, ChatGLM2, ChatGPT, Claude, etc) over 50+ datasets.\n* [OpenLIT](https:\u002F\u002Fgithub.com\u002Fopenlit\u002Fopenlit) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fopenlit\u002Fopenlit.svg?cacheSeconds=86400) - OpenLIT is an open-source AI engineering platform that simplifies LLM workflows with observability, monitoring, guardrails, evaluations, and seamless integrations. \n* [OpenLLMetry](https:\u002F\u002Fgithub.com\u002Ftraceloop\u002Fopenllmetry) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftraceloop\u002Fopenllmetry.svg?cacheSeconds=86400) - OpenLLMetry provides developers with deep visibility into Large Language Model applications through performance monitoring, execution tracing, and debugging capabilities.\n* [Opik](https:\u002F\u002Fgithub.com\u002Fcomet-ml\u002Fopik) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcomet-ml\u002Fopik.svg?cacheSeconds=86400) - Opik is an open-source platform for evaluating, testing and monitoring LLM applications.\n* [Overcooked-AI](https:\u002F\u002Fgithub.com\u002FHumanCompatibleAI\u002Fovercooked_ai) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FHumanCompatibleAI\u002Fovercooked_ai.svg?cacheSeconds=86400) - Overcooked-AI is a benchmark environment for fully cooperative human-AI task performance, based on the wildly popular video game Overcooked.\n* [Phoenix](https:\u002F\u002Fgithub.com\u002FArize-ai\u002Fphoenix) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FArize-ai\u002Fphoenix.svg?cacheSeconds=86400) - Phoenix is an open-source AI observability platform designed for experimentation, evaluation, and troubleshooting.\n* [PromptBench](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Fpromptbench) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmicrosoft\u002Fpromptbench.svg?cacheSeconds=86400) - PromptBench is a unified evaluation framework for large language models\n* [Promptfoo](https:\u002F\u002Fgithub.com\u002Fpromptfoo\u002Fpromptfoo) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpromptfoo\u002Fpromptfoo.svg?cacheSeconds=86400) - LLM red teaming and evaluation framework for testing jailbreaks, prompt injection, and other vulnerabilities with CI\u002FCD integration. \n* [Prometheus-Eval](https:\u002F\u002Fgithub.com\u002Fprometheus-eval\u002Fprometheus-eval) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fprometheus-eval\u002Fprometheus-eval.svg?cacheSeconds=86400) - RagaAI Catalyst is a comprehensive platform designed to enhance the management and optimization of LLM projects. \n* [RagaAI Catalyst](https:\u002F\u002Fgithub.com\u002Fraga-ai-hub\u002FRagaAI-Catalyst) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fraga-ai-hub\u002FRagaAI-Catalyst.svg?cacheSeconds=86400) - Prometheus-Eval is a collection of tools for training, evaluating, and using language models specialized in evaluating other language models.\n* [Ragas](https:\u002F\u002Fgithub.com\u002Fexplodinggradients\u002Fragas) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fexplodinggradients\u002Fragas.svg?cacheSeconds=86400) - Ragas is a framework to evaluate RAG pipelines.\n* [RewardBench](https:\u002F\u002Fgithub.com\u002Fallenai\u002Freward-bench) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fallenai\u002Freward-bench.svg?cacheSeconds=86400) - RewardBench is a benchmark designed to evaluate the capabilities and safety of reward models.\n* [RLBench](https:\u002F\u002Fgithub.com\u002Fstepjam\u002FRLBench) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fstepjam\u002FRLBench.svg?cacheSeconds=86400) - RLBench is an ambitious large-scale benchmark and learning environment designed to facilitate research in a number of vision-guided manipulation research areas, including: reinforcement learning, imitation learning, multi-task learning, geometric computer vision, and in particular, few-shot learning.\n* [SimplerEnv](https:\u002F\u002Fgithub.com\u002Fsimpler-env\u002FSimplerEnv) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fsimpler-env\u002FSimplerEnv.svg?cacheSeconds=86400) - SimplerEnv is a simulated manipulation policy evaluation environments for real robot setups.\n* [SwanLab](https:\u002F\u002Fgithub.com\u002FSwanHubX\u002FSwanLab) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FSwanHubX\u002FSwanLab.svg?cacheSeconds=86400) - SwanLab is an AI training tracking and visualization tool.\n* [Speech-to-Text Benchmark](https:\u002F\u002Fgithub.com\u002FPicovoice\u002Fspeech-to-text-benchmark) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FPicovoice\u002Fspeech-to-text-benchmark.svg?cacheSeconds=86400) - Speech-to-Text Benchmark is a minimalist and extensible framework for benchmarking different speech-to-text engines.\n* [TensorFlow Model Analysis](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Fmodel-analysis) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftensorflow\u002Fmodel-analysis.svg?cacheSeconds=86400) - TensorFlow Model Analysis (TFMA) is a library for evaluating TensorFlow models on large amounts of data in a distributed manner, using the same metrics defined in their trainer.\n* [TorchBench](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Fbenchmark) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpytorch\u002Fbenchmark.svg?cacheSeconds=86400) - TorchBench is a collection of open source benchmarks used to evaluate PyTorch performance.\n* [TruLens](https:\u002F\u002Fgithub.com\u002Ftruera\u002Ftrulens) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftruera\u002Ftrulens.svg?cacheSeconds=86400) - TruLens provides a set of tools for evaluating and tracking LLM experiments.\n* [TrustLLM](https:\u002F\u002Fgithub.com\u002FHowieHwong\u002FTrustLLM) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FHowieHwong\u002FTrustLLM.svg?cacheSeconds=86400) - TrustLLM is a comprehensive framework to evaluate the trustworthiness of large language models, which includes principles, surveys, and benchmarks.\n* [VBench](https:\u002F\u002Fgithub.com\u002FVchitect\u002FVBench) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FVchitect\u002FVBench.svg?cacheSeconds=86400) - VBench is a comprehensive benchmark suite for video generative models.\n* [VLMEvalKit](https:\u002F\u002Fgithub.com\u002Fopen-compass\u002FVLMEvalKit) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fopen-compass\u002FVLMEvalKit.svg?cacheSeconds=86400) - VLMEvalKit is an open-source evaluation toolkit of large vision-language models (LVLMs).\n\n## Explainability and Fairness\n* [Aequitas](https:\u002F\u002Fgithub.com\u002Fdssg\u002Faequitas) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdssg\u002Faequitas.svg?cacheSeconds=86400) - An open-source bias audit toolkit for data scientists, machine learning researchers, and policymakers to audit machine learning models for discrimination and bias, and to make informed and equitable decisions around developing and deploying predictive risk-assessment tools.\n* [AI Explainability 360](https:\u002F\u002Fgithub.com\u002FTrusted-AI\u002FAIX360) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FTrusted-AI\u002FAIX360.svg?cacheSeconds=86400) - Interpretability and explainability of data and machine learning models including a comprehensive set of algorithms that cover different dimensions of explanations along with proxy explainability metrics.\n* [AI Fairness 360](https:\u002F\u002Fgithub.com\u002FTrusted-AI\u002FAIF360) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FTrusted-AI\u002FAIF360.svg?cacheSeconds=86400) - A comprehensive set of fairness metrics for datasets and machine learning models, explanations for these metrics, and algorithms to mitigate bias in datasets and models.\n* [Alibi](https:\u002F\u002Fgithub.com\u002FSeldonIO\u002Falibi) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FSeldonIO\u002Falibi.svg?cacheSeconds=86400) - Alibi is an open source Python library aimed at machine learning model inspection and interpretation. The initial focus on the library is on black-box, instance based model explanations.\n* [captum](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Fcaptum) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpytorch\u002Fcaptum.svg?cacheSeconds=86400) - model interpretability and understanding library for PyTorch developed by Facebook. It contains general purpose implementations of integrated gradients, saliency maps, smoothgrad, vargrad and others for PyTorch models.\n* [Fairlearn](https:\u002F\u002Fgithub.com\u002Ffairlearn\u002Ffairlearn) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffairlearn\u002Ffairlearn.svg?cacheSeconds=86400) - Fairlearn is a python toolkit to assess and mitigate unfairness in machine learning models.\n* [InterpretML](https:\u002F\u002Fgithub.com\u002Finterpretml\u002Finterpret) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Finterpretml\u002Finterpret.svg?cacheSeconds=86400) - InterpretML is an open-source package for training interpretable models and explaining blackbox systems.\n* [Lightly](https:\u002F\u002Fgithub.com\u002Flightly-ai\u002Flightly) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flightly-ai\u002Flightly.svg?cacheSeconds=86400) - A python framework for self-supervised learning on images. The learned representations can be used to analyze the distribution in unlabeled data and rebalance datasets.\n* [LOFO Importance](https:\u002F\u002Fgithub.com\u002Faerdem4\u002Flofo-importance) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Faerdem4\u002Flofo-importance.svg?cacheSeconds=86400) - LOFO (Leave One Feature Out) Importance calculates the importances of a set of features based on a metric of choice, for a model of choice, by iteratively removing each feature from the set, and evaluating the performance of the model, with a validation scheme of choice, based on the chosen metric.\n* [mljar-supervised](https:\u002F\u002Fgithub.com\u002Fmljar\u002Fmljar-supervised) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmljar\u002Fmljar-supervised.svg?cacheSeconds=86400) - A Python package for AutoML on tabular data with feature engineering, hyper-parameters tuning, explanations and automatic documentation.\n* [Quantus](https:\u002F\u002Fgithub.com\u002Funderstandable-machine-intelligence-lab\u002FQuantus) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Funderstandable-machine-intelligence-lab\u002FQuantus.svg?cacheSeconds=86400) - Quantus is an eXplainable AI toolkit for responsible evaluation of neural network explanations\n* [SHAP](https:\u002F\u002Fgithub.com\u002Fshap\u002Fshap) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fshap\u002Fshap.svg?cacheSeconds=86400) - SHapley Additive exPlanations is a unified approach to explain the output of any machine learning model.\n* [SHAPash](https:\u002F\u002Fgithub.com\u002FMAIF\u002Fshapash) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FMAIF\u002Fshapash.svg?cacheSeconds=86400) - Shapash is a Python library that provides several types of visualization that display explicit labels that everyone can understand.\n* [WhatIf](https:\u002F\u002Fgithub.com\u002Fpair-code\u002Fwhat-if-tool) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpair-code\u002Fwhat-if-tool.svg?cacheSeconds=86400) - An easy-to-use interface for expanding understanding of a black-box classification or regression ML model.\n\n## Feature Store\n* [FEAST](https:\u002F\u002Fgithub.com\u002Ffeast-dev\u002Ffeast)  ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffeast-dev\u002Ffeast.svg?cacheSeconds=86400) - Feast (Feature Store) is an open source feature store for machine learning. Feast is the fastest path to manage existing infrastructure to productionize analytic data for model training and online inference.\n* [Featureform](https:\u002F\u002Fgithub.com\u002Ffeatureform\u002Ffeatureform) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffeatureform\u002Ffeatureform.svg?cacheSeconds=86400) - A virtual featurestore. Plug-&-play with your existing infra. Data Scientist approved. Discovery, Governance, Lineage, & Collaboration just a pip install away. Supports pandas, Python, spark, SQL + integrations with major cloud vendors. \n* [Hopsworks Feature Store](https:\u002F\u002Fgithub.com\u002Flogicalclocks\u002Fhopsworks) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flogicalclocks\u002Fhopsworks.svg?cacheSeconds=86400) - Offline\u002FOnline Feature Store for ML [(Video)](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=N1BjPk1smdg).\n\n## Industry-strength Anomaly Detection\n* [Alibi Detect](https:\u002F\u002Fgithub.com\u002FSeldonIO\u002Falibi-detect) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FSeldonIO\u002Falibi-detect.svg?cacheSeconds=86400) - alibi-detect is a Python package focused on outlier, adversarial and concept drift detection.\n* [Darts](https:\u002F\u002Fgithub.com\u002Funit8co\u002Fdarts) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Funit8co\u002Fdarts.svg?cacheSeconds=86400) - Darts is a library for user-friendly forecasting and anomaly detection on time series.\n* [Deequ](https:\u002F\u002Fgithub.com\u002Fawslabs\u002Fdeequ) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fawslabs\u002Fdeequ.svg?cacheSeconds=86400) - A library built on top of Apache Spark for defining \"unit tests for data\", which measure data quality in large datasets.\n* [PyOD](https:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fyzhao062\u002Fpyod.svg?cacheSeconds=86400) - A Python Toolbox for Scalable Outlier Detection (Anomaly Detection).\n* [TFDV](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Fdata-validation) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftensorflow\u002Fdata-validation.svg?cacheSeconds=86400) - TFDV (Tensorflow Data Validation) is a library for exploring and validating machine learning data.\n\n## Industry Strength Computer Vision\n* [CameraTraps](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FCameraTraps) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmicrosoft\u002FCameraTraps.svg?cacheSeconds=86400) - CameraTraps (PyTorch Wildlife) is a collaborative deep learning framework for wildlife image analysis, providing detection and classification models trained on large-scale camera trap datasets.\n* [Deep Lake](https:\u002F\u002Fgithub.com\u002Factiveloopai\u002Fdeeplake) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Factiveloopai\u002Fdeeplake.svg?cacheSeconds=86400) - Deep Lake is a data infrastructure optimized for computer vision.\n* [DeepForest](https:\u002F\u002Fgithub.com\u002Fweecology\u002FDeepForest) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fweecology\u002FDeepForest.svg?cacheSeconds=86400) - DeepForest is a Python package for training and predicting individual tree crowns and species from airborne RGB imagery using deep learning.\n* [Detectron2](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fdetectron2) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffacebookresearch\u002Fdetectron2.svg?cacheSeconds=86400) - Detectron2 is Facebook AI Research's next generation library that provides state-of-the-art detection and segmentation algorithms.\n* [KerasCV](https:\u002F\u002Fgithub.com\u002Fkeras-team\u002Fkeras-cv) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fkeras-team\u002Fkeras-cv.svg?cacheSeconds=86400) - KerasCV is a library of modular computer vision oriented Keras components.\n* [Kornia](https:\u002F\u002Fgithub.com\u002Fkornia\u002Fkornia) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fkornia\u002Fkornia.svg?cacheSeconds=86400) - Kornia is a differentiable computer vision library built on PyTorch that provides a rich set of differentiable image processing and geometric vision algorithms.\n* [LAVIS](https:\u002F\u002Fgithub.com\u002Fsalesforce\u002FLAVIS) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fsalesforce\u002FLAVIS.svg?cacheSeconds=86400) - LAVIS is a deep learning library for LAnguage-and-VISion intelligence research and applications.\n* [libcom](https:\u002F\u002Fgithub.com\u002Fbcmi\u002Flibcom) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fbcmi\u002Flibcom.svg?cacheSeconds=86400) - libcom is an image composition toolbox.\n* [LightlyTrain](https:\u002F\u002Fgithub.com\u002Flightly-ai\u002Flightly-train) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flightly-ai\u002Flightly-train.svg?cacheSeconds=86400) - Pretrain computer vision models on unlabeled data for industrial applications.\n* [MMCV](https:\u002F\u002Fgithub.com\u002Fopen-mmlab\u002Fmmcv) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fopen-mmlab\u002Fmmcv.svg?cacheSeconds=86400) - MMCV is a foundational computer vision library from OpenMMLab that provides essential functionalities like image and video processing, data transformation and augmentation, CNN architectures, and optimized CUDA operations.\n* [SuperGradients](https:\u002F\u002Fgithub.com\u002FDeci-AI\u002Fsuper-gradients) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FDeci-AI\u002Fsuper-gradients.svg?cacheSeconds=86400) - SuperGradients is an open-source library for training PyTorch-based computer vision models.\n* [supervision](https:\u002F\u002Fgithub.com\u002Froboflow\u002Fsupervision) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Froboflow\u002Fsupervision.svg?cacheSeconds=86400) - Supervision is a Python library designed for efficient computer vision pipeline management, providing tools for annotation, visualization, and monitoring of models.\n* [VideoSys](https:\u002F\u002Fgithub.com\u002FNUS-HPC-AI-Lab\u002FVideoSys) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FNUS-HPC-AI-Lab\u002FVideoSys.svg?cacheSeconds=86400) - VideoSys supports many diffusion models with our various acceleration techniques, enabling these models to run faster and consume less memory.\n\n## Industry Strength Information Retrieval\n* [AutoRAG](https:\u002F\u002Fgithub.com\u002FMarker-Inc-Korea\u002FAutoRAG) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FMarker-Inc-Korea\u002FAutoRAG.svg?cacheSeconds=86400) - AutoRAG is a RAG AutoML tool for automatically finds an optimal RAG pipeline for your data.\n* [BGE](https:\u002F\u002Fgithub.com\u002FFlagOpen\u002FFlagEmbedding) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FFlagOpen\u002FFlagEmbedding.svg?cacheSeconds=86400) - BGE builds one-stop retrieval toolkit for search and RAG.\n* [Cognita](https:\u002F\u002Fgithub.com\u002Ftruefoundry\u002Fcognita) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftruefoundry\u002Fcognita.svg?cacheSeconds=86400) - Cognita is a RAG framework for building modular and production-ready applications.\n* [DocArray](https:\u002F\u002Fgithub.com\u002Fdocarray\u002Fdocarray) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdocarray\u002Fdocarray.svg?cacheSeconds=86400) - DocArray is a library for nested, unstructured, multimodal data in transit, including text, image, audio, video, 3D mesh, etc. It allows deep-learning engineers to efficiently process, embed, search, recommend, store, and transfer multimodal data with a Pythonic API.\n* [EmbedAnything](https:\u002F\u002Fgithub.com\u002FStarlightSearch\u002FEmbedAnything) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FStarlightSearch\u002FEmbedAnything.svg?cacheSeconds=86400) - EmbedAnything is a minimalist, lightweight, and high-performance embedding pipeline built in Rust for generating embeddings from text, images, audio, PDFs, and other media, with support for dense, sparse, ONNX, and late-interaction embeddings.\n* [Faiss](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Ffaiss) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffacebookresearch\u002Ffaiss.svg?cacheSeconds=86400) - Faiss is a library for efficient similarity search and clustering of dense vectors.\n* [fastRAG](https:\u002F\u002Fgithub.com\u002FIntelLabs\u002FfastRAG) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FIntelLabs\u002FfastRAG.svg?cacheSeconds=86400) - fastRAG is a research framework for efficient and optimized retrieval augmented generative pipelines, incorporating state-of-the-art LLMs and Information Retrieval.\n* [GraphRAG](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Fgraphrag) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmicrosoft\u002Fgraphrag.svg?cacheSeconds=86400) - GraphRAG is a data pipeline and transformation suite that is designed to extract meaningful, structured data from unstructured text using the power of LLMs.\n* [HippoRAG](https:\u002F\u002Fgithub.com\u002FOSU-NLP-Group\u002FHippoRAG) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FOSU-NLP-Group\u002FHippoRAG.svg?cacheSeconds=86400) - HippoRAG is a novel retrieval augmented generation (RAG) framework inspired by the neurobiology of human long-term memory that enables LLMs to continuously integrate knowledge across external documents.\n* [JamAI Base](https:\u002F\u002Fgithub.com\u002FEmbeddedLLM\u002FJamAIBase) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FEmbeddedLLM\u002FJamAIBase.svg?cacheSeconds=86400) - JamAI Base is an open-source RAG (Retrieval-Augmented Generation) backend platform that integrates an embedded database (SQLite) and an embedded vector database (LanceDB) with managed memory and RAG capabilities. It features built-in LLM, vector embeddings, and reranker orchestration and management, all accessible through a convenient, intuitive, spreadsheet-like UI and a simple REST API.\n* [LangExtract](https:\u002F\u002Fgithub.com\u002Fgoogle\u002Flangextract) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle\u002Flangextract.svg?cacheSeconds=86400) - LangExtract is a Python library that uses LLMs to extract structured information from unstructured text documents based on user-defined instructions. It processes materials such as clinical notes or reports, identifying and organizing key details while ensuring the extracted data corresponds to the source text.\n* [LightRAG](https:\u002F\u002Fgithub.com\u002FHKUDS\u002FLightRAG) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FHKUDS\u002FLightRAG.svg?cacheSeconds=86400) - A simple and fast retrieval-augmented generation framework.\n* [llmware](https:\u002F\u002Fgithub.com\u002Fllmware-ai\u002Fllmware) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fllmware-ai\u002Fllmware.svg?cacheSeconds=86400) - llmware provides a unified framework for building LLM-based applications (e.g, RAG, Agents), using small, specialized models that can be deployed privately, integrated with enterprise knowledge sources safely and securely, and cost-effectively tuned and adapted for any business process.\n* [Mem0](https:\u002F\u002Fgithub.com\u002Fmem0ai\u002Fmem0) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmem0ai\u002Fmem0.svg?cacheSeconds=86400) - Mem0 enhances AI assistants and agents with an intelligent memory layer, enabling personalized AI interactions.\n* [NGT](https:\u002F\u002Fgithub.com\u002Fyahoojapan\u002FNGT) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fyahoojapan\u002FNGT.svg?cacheSeconds=86400) - NGT provides commands and a library for performing high-speed approximate nearest neighbor searches against a large volume of data in high dimensional vector data space.\n* [NMSLIB](https:\u002F\u002Fgithub.com\u002Fnmslib\u002Fnmslib) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fnmslib\u002Fnmslib.svg?cacheSeconds=86400) - Non-Metric Space Library (NMSLIB): An efficient similarity search library and a toolkit for evaluation of k-NN methods for generic non-metric spaces.\n* [Qdrant](https:\u002F\u002Fgithub.com\u002Fqdrant\u002Fqdrant) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fqdrant\u002Fqdrant.svg?cacheSeconds=86400) - An open source vector similarity search engine with extended filtering support.\n* [R2R](https:\u002F\u002Fgithub.com\u002FSciPhi-AI\u002FR2R) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FSciPhi-AI\u002FR2R.svg?cacheSeconds=86400) - R2R (RAG to Riches) is a comprehensive platform for building, deploying, and scaling RAG applications with hybrid search, multimodal support, and advanced observability.\n* [RAGFlow](https:\u002F\u002Fgithub.com\u002Finfiniflow\u002Fragflow) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Finfiniflow\u002Fragflow.svg?cacheSeconds=86400) - RAGFlow is a RAG engine based on deep document understanding.\n* [RAGxplorer](https:\u002F\u002Fgithub.com\u002Fgabrielchua\u002FRAGxplorer) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgabrielchua\u002FRAGxplorer.svg?cacheSeconds=86400) - RAGxplorer is a tool to build RAG visualisations.\n* [RAG-FiT](https:\u002F\u002Fgithub.com\u002FIntelLabs\u002FRAG-FiT) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FIntelLabs\u002FRAG-FiT.svg?cacheSeconds=86400) - RAG-FiT is a library designed to improve LLMs ability to use external information by fine-tuning models on specially created RAG-augmented datasets.\n* [TextWorld](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FTextWorld) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmicrosoft\u002FTextWorld.svg?cacheSeconds=86400) - TextWorld is a text-based game generator and extensible sandbox learning environment for training and testing reinforcement learning (RL) agents.\n* [Vanna](https:\u002F\u002Fgithub.com\u002Fvanna-ai\u002Fvanna) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvanna-ai\u002Fvanna.svg?cacheSeconds=86400) - Vanna is a RAG framework for SQL generation and related functionality.\n\n## Industry Strength Natural Language Processing\n* [aisuite](https:\u002F\u002Fgithub.com\u002Fandrewyng\u002Faisuite) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fandrewyng\u002Faisuite.svg?cacheSeconds=86400) - aisuite is a simple, unified interface to multiple generative AI providers.\n* [Align-Anything](https:\u002F\u002Fgithub.com\u002FPKU-Alignment\u002Falign-anything) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FPKU-Alignment\u002Falign-anything.svg?cacheSeconds=86400) - Align-Anything aims to align any modality large models (any-to-any models), including LLMs, VLMs, and others, with human intentions and values\n* [BERTopic](https:\u002F\u002Fgithub.com\u002FMaartenGr\u002FBERTopic) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FMaartenGr\u002FBERTopic.svg?cacheSeconds=86400) - BERTopic is a topic modeling technique that leverages transformers and c-TF-IDF to create dense clusters allowing for easily interpretable topics whilst keeping important words in the topic descriptions.\n* [Burr](https:\u002F\u002Fgithub.com\u002Fdagworks-inc\u002Fburr) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdagworks-inc\u002Fburr.svg?cacheSeconds=86400) - Burr helps you develop applications that make decisions (chatbot, agent, simulation). It comes with production-ready features (telemetry, persistence, deployment, etc.) and the open-source, free, and local-first Burr UI.\n* [CodeTF](https:\u002F\u002Fgithub.com\u002Fsalesforce\u002FCodeTF) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fsalesforce\u002FCodeTF.svg?cacheSeconds=86400) - CodeTF is a one-stop Python transformer-based library for code large language models (Code LLMs) and code intelligence, provides a seamless interface for training and inferencing on code intelligence tasks like code summarization, translation, code generation and so on. \n* [Dify](https:\u002F\u002Fgithub.com\u002Flanggenius\u002Fdify) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flanggenius\u002Fdify.svg?cacheSeconds=86400) - Dify is an open-source LLM app development platform whose intuitive interface combines agentic AI workflow, RAG pipeline, agent capabilities, model management, observability features and more, letting you quickly go from prototype to production.\n* [dspy](https:\u002F\u002Fgithub.com\u002Fstanfordnlp\u002Fdspy) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fstanfordnlp\u002Fdspy.svg?cacheSeconds=86400) - A framework for programming with foundation models.\n* [Dust](https:\u002F\u002Fgithub.com\u002Fdust-tt\u002Fdust) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdust-tt\u002Fdust.svg?cacheSeconds=86400) - Dust assists in the design and deployment of large language model apps.\n* [ESPnet](https:\u002F\u002Fgithub.com\u002Fespnet\u002Fespnet) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fespnet\u002Fespnet.svg?cacheSeconds=86400) - ESPnet is an end-to-end speech processing toolkit.\n* [FastChat](https:\u002F\u002Fgithub.com\u002Flm-sys\u002FFastChat) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flm-sys\u002FFastChat.svg?cacheSeconds=86400) - FastChat is an open platform for training, serving, and evaluating large language model based chatbots.\n* [Flair](https:\u002F\u002Fgithub.com\u002FflairNLP\u002Fflair) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FflairNLP\u002Fflair.svg?cacheSeconds=86400) - Simple framework for state-of-the-art NLP developed by Zalando which builds directly on PyTorch.\n* [Gensim](https:\u002F\u002Fgithub.com\u002Fpiskvorky\u002Fgensim) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpiskvorky\u002Fgensim.svg?cacheSeconds=86400) - Gensim is a Python library for topic modelling, document indexing and similarity retrieval with large corpora.\n* [gpt-fast](https:\u002F\u002Fgithub.com\u002Fmeta-pytorch\u002Fgpt-fast) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmeta-pytorch\u002Fgpt-fast.svg?cacheSeconds=86400) - Simple and efficient pytorch-native transformer text generation.\n* [h2oGPT](https:\u002F\u002Fgithub.com\u002Fh2oai\u002Fh2ogpt) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fh2oai\u002Fh2ogpt.svg?cacheSeconds=86400) - h2oGPT is an open source generative AI, gives organizations like yours the power to own large language models while preserving your data ownership.\n* [Haystack](https:\u002F\u002Fgithub.com\u002Fdeepset-ai\u002Fhaystack) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdeepset-ai\u002Fhaystack.svg?cacheSeconds=86400) - Haystack is an open source NLP framework to interact with your data using Transformer models and LLMs (GPT-3 and alike). Haystack offers production-ready tools to quickly build ChatGPT-like question answering, semantic search, text generation, and more.\n* [Interactive Composition Explorer](https:\u002F\u002Fgithub.com\u002Foughtinc\u002Fice) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Foughtinc\u002Fice.svg?cacheSeconds=86400) - ICE is a Python library and trace visualizer for language model programs.\n* [Jan](https:\u002F\u002Fgithub.com\u002Fjanhq\u002Fjan) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjanhq\u002Fjan.svg?cacheSeconds=86400) - Jan is an open-source ChatGPT alternative that runs 100% offline on your computer, allowing you to download and run LLMs locally with full control and privacy.\n* [Lamini](https:\u002F\u002Fgithub.com\u002Flamini-ai\u002Flamini) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flamini-ai\u002Flamini.svg?cacheSeconds=86400) - Lamini is an LLM engine for rapidly customizing models.\n* [LangChain](https:\u002F\u002Fgithub.com\u002Flangchain-ai\u002Flangchain) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flangchain-ai\u002Flangchain.svg?cacheSeconds=86400) - LangChain assists in building applications with LLMs through composability.\n* [LlamaIndex](https:\u002F\u002Fgithub.com\u002Frun-llama\u002Fllama_index) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Frun-llama\u002Fllama_index.svg?cacheSeconds=86400) - LlamaIndex (GPT Index) is a data framework for your LLM application.\n* [LLaMA](https:\u002F\u002Fgithub.com\u002Fmeta-llama\u002Fllama) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmeta-llama\u002Fllama.svg?cacheSeconds=86400) - LLaMA is intended as a minimal, hackable and readable example to load LLaMA (arXiv) models and run inference.\n* [LLaMA-Factory](https:\u002F\u002Fgithub.com\u002Fhiyouga\u002FLLaMA-Factory) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhiyouga\u002FLLaMA-Factory.svg?cacheSeconds=86400) - LLaMA-Factory makes it easy to fine-tunes 100+ large language models with zero-code CLI and Web UI\n* [LLMBox](https:\u002F\u002Fgithub.com\u002FAlpha-VLLM\u002FLLaMA2-Accessory) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FAlpha-VLLM\u002FLLaMA2-Accessory.svg?cacheSeconds=86400) - LLMBox is a comprehensive library for implementing LLMs, including a unified training pipeline and comprehensive model evaluation.\n* [LLaMA2-Accessory](https:\u002F\u002Fgithub.com\u002FRUCAIBox\u002FLLMBox) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FRUCAIBox\u002FLLMBox.svg?cacheSeconds=86400) - LLaMA2-Accessory is an open-source toolkit for pretraining, finetuning and deployment of Large Language Models (LLMs) and multimodal LLMs.\n* [LMFlow](https:\u002F\u002Fgithub.com\u002FOptimalScale\u002FLMFlow) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FOptimalScale\u002FLMFlow.svg?cacheSeconds=86400) - LMFlow is an extensible, convenient, and efficient toolbox for finetuning large machine learning models.\n* [Megatron-LM](https:\u002F\u002Fgithub.com\u002FNVIDIA\u002FMegatron-LM) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FNVIDIA\u002FMegatron-LM.svg?cacheSeconds=86400) - Megatron-LM is a highly optimized and efficient library for training large language models.\n* [MindNLP](https:\u002F\u002Fgithub.com\u002Fmindspore-lab\u002Fmindnlp) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmindspore-lab\u002Fmindnlp.svg?cacheSeconds=86400) - MindNLP is an easy-to-use and high-performance NLP and LLM framework based on MindSpore, compatible with models and datasets of Huggingface.\n* [MLC LLM](https:\u002F\u002Fgithub.com\u002Fmlc-ai\u002Fmlc-llm) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmlc-ai\u002Fmlc-llm.svg?cacheSeconds=86400) - MLC LLM is a universal solution that allows any language models to be deployed natively on a diverse set of hardware backends and native applications, plus a productive framework for everyone to further optimize model performance for their own use cases.\n* [mlx-lm](https:\u002F\u002Fgithub.com\u002Fml-explore\u002Fmlx-lm) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fml-explore\u002Fmlx-lm.svg?cacheSeconds=86400) - MLX LM is a Python package for generating text and fine-tuning large language models on Apple silicon with MLX, featuring integration with Hugging Face Hub and support for quantization and distributed inference.\n* [Ollama](https:\u002F\u002Fgithub.com\u002Follama\u002Follama) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Follama\u002Follama.svg?cacheSeconds=86400) - Get up and running with large language models, locally.\n* [olmOCR](https:\u002F\u002Fgithub.com\u002Fallenai\u002Folmocr) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fallenai\u002Folmocr.svg?cacheSeconds=86400) - olmOCR is a toolkit for training language models to work with PDF documents in the wild.\n* [PaddleNLP](https:\u002F\u002Fgithub.com\u002FPaddlePaddle\u002FPaddleNLP) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FPaddlePaddle\u002FPaddleNLP.svg?cacheSeconds=86400) - PaddleNLP is a Large Language Model (LLM) development suite based on the PaddlePaddle deep learning framework, supporting efficient large model training, lossless compression, and high-performance inference on various hardware devices.\n* [PyLLMs](https:\u002F\u002Fgithub.com\u002Fkagisearch\u002Fpyllms) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fkagisearch\u002Fpyllms.svg?cacheSeconds=86400) - PyLLMs is a minimal Python library to connect to various Language Models (LLMs) with a built-in model performance benchmark.\n* [Semantic Kernel](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Fsemantic-kernel) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmicrosoft\u002Fsemantic-kernel.svg?cacheSeconds=86400) - Semantic Kernel is an SDK that integrates Large Language Models (LLMs) like OpenAI, Azure OpenAI, and Hugging Face with conventional programming languages like C#, Python, and Java. Semantic Kernel achieves this by allowing you to define plugins that can be chained together in just a few lines of code.\n* [Sentence Transformers](https:\u002F\u002Fgithub.com\u002FUKPLab\u002Fsentence-transformers) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FUKPLab\u002Fsentence-transformers.svg?cacheSeconds=86400) - Sentence Transformers provides an easy method to compute dense vector representations for sentences, paragraphs, and images.\n* [SpaCy](https:\u002F\u002Fgithub.com\u002Fexplosion\u002FspaCy) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fexplosion\u002FspaCy.svg?cacheSeconds=86400) - spaCy is a library for advanced Natural Language Processing in Python and Cython.\n* [SWIFT](https:\u002F\u002Fgithub.com\u002Fmodelscope\u002Fms-swift) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmodelscope\u002Fms-swift.svg?cacheSeconds=86400) - SWIFT is a scalable lightweight infrastructure for deep learning model fine-tuning.\n* [Tensorflow Lingvo](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Flingvo) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftensorflow\u002Flingvo.svg?cacheSeconds=86400) - A framework for building neural networks in Tensorflow, particularly sequence models.\n* [Tensorflow Text](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Ftext) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftensorflow\u002Ftext.svg?cacheSeconds=86400) - TensorFlow Text provides a collection of text related classes and ops ready to use with TensorFlow 2.0.\n* [ToolBench](https:\u002F\u002Fgithub.com\u002FOpenBMB\u002FToolBench) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FOpenBMB\u002FToolBench.svg?cacheSeconds=86400) - ToolBench is an open platform for training, serving, and evaluating large language model for tool learning.\n* [Transformers](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Ftransformers) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Ftransformers.svg?cacheSeconds=86400) - Huggingface's library of state-of-the-art pretrained models for Natural Language Processing (NLP).\n\n## Industry Strength Recommender System\n* [EasyRec](https:\u002F\u002Fgithub.com\u002Falibaba\u002FEasyRec) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Falibaba\u002FEasyRec.svg?cacheSeconds=86400) - EasyRec is a framework for large scale recommendation algorithms.\n* [Gorse](https:\u002F\u002Fgithub.com\u002Fgorse-io\u002Fgorse) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgorse-io\u002Fgorse.svg?cacheSeconds=86400) - Gorse aims to be a universal open-source recommender system that can be quickly introduced into a wide variety of online services.\n* [Merlin](https:\u002F\u002Fgithub.com\u002FNVIDIA-Merlin\u002FMerlin) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FNVIDIA-Merlin\u002FMerlin.svg?cacheSeconds=86400) - NVIDIA Merlin is an open source library providing end-to-end GPU-accelerated recommender systems, from feature engineering and preprocessing to training deep learning models and running inference in production.\n* [Recommenders](https:\u002F\u002Fgithub.com\u002Frecommenders-team\u002Frecommenders) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Frecommenders-team\u002Frecommenders.svg?cacheSeconds=86400) - Recommenders contains benchmark and best practices for building recommendation systems, provided as Jupyter notebooks.\n* [TorchRec](https:\u002F\u002Fgithub.com\u002Fmeta-pytorch\u002Ftorchrec) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmeta-pytorch\u002Ftorchrec.svg?cacheSeconds=86400) - TorchRec is a PyTorch domain library built to provide common sparsity and parallelism primitives needed for large-scale recommender systems (RecSys).\n\n## Industry Strength Reinforcement Learning\n* [Acme](https:\u002F\u002Fgithub.com\u002Fgoogle-deepmind\u002Facme) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle-deepmind\u002Facme.svg?cacheSeconds=86400) - Acme is a library of reinforcement learning (RL) building blocks that strives to expose simple, efficient, and readable agents.\n* [AReaL](https:\u002F\u002Fgithub.com\u002FinclusionAI\u002FAReaL) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FinclusionAI\u002FAReaL.svg?cacheSeconds=86400) - AReaL is a reinforcement learning library.\n* [ChatLearn](https:\u002F\u002Fgithub.com\u002Falibaba\u002FChatLearn) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Falibaba\u002FChatLearn.svg?cacheSeconds=86400) - ChatLearn is a flexible and efficient reinforcement learning training framework for large language models, supporting distributed training engines (FSDP2, Megatron) and inference engines (vLLM, SGLang) with modern RL algorithms such as GRPO and GSPO.\n* [CleanRL](https:\u002F\u002Fgithub.com\u002Fvwxyzjn\u002Fcleanrl) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvwxyzjn\u002Fcleanrl.svg?cacheSeconds=86400) - CleanRL is a Deep Reinforcement Learning library that provides high-quality single-file implementation with research-friendly features. The implementation is clean and simple, yet we can scale it to run thousands of experiments using AWS Batch.\n* [CompilerGym](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002FCompilerGym) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffacebookresearch\u002FCompilerGym.svg?cacheSeconds=86400) - CompilerGym is a library of easy to use and performant reinforcement learning environments for compiler tasks.\n* [d3rlpy](https:\u002F\u002Fgithub.com\u002Ftakuseno\u002Fd3rlpy) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftakuseno\u002Fd3rlpy.svg?cacheSeconds=86400) - d3rlpy is an offline deep reinforcement learning library for practitioners and researchers.\n* [D4RL](https:\u002F\u002Fgithub.com\u002FFarama-Foundation\u002FD4RL) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FFarama-Foundation\u002FD4RL.svg?cacheSeconds=86400) - D4RL is an open-source benchmark for offline reinforcement learning.\n* [Dopamine](https:\u002F\u002Fgithub.com\u002Fgoogle\u002Fdopamine) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle\u002Fdopamine.svg?cacheSeconds=86400) - Dopamine is a research framework for fast prototyping of reinforcement learning algorithms. It aims to fill the need for a small, easily grokked codebase in which users can freely experiment with wild ideas (speculative research).\n* [EvoTorch](https:\u002F\u002Fgithub.com\u002Fnnaisense\u002Fevotorch) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fnnaisense\u002Fevotorch.svg?cacheSeconds=86400) - EvoTorch is an open source evolutionary computation library developed at NNAISENSE, built on top of PyTorch.\n* [FinRL](https:\u002F\u002Fgithub.com\u002FAI4Finance-Foundation\u002FFinRL) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FAI4Finance-Foundation\u002FFinRL.svg?cacheSeconds=86400) - FinRL is the first open-source framework to demonstrate the great potential of financial reinforcement learning.\n* [Gymnasium](https:\u002F\u002Fgithub.com\u002FFarama-Foundation\u002FGymnasium) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FFarama-Foundation\u002FGymnasium.svg?cacheSeconds=86400) - Gymnasium is an open source Python library for developing and comparing reinforcement learning algorithms by providing a standard API to communicate between learning algorithms and environments, as well as a standard set of environments compliant with that API.\n* [Gymnasium-Robotics](https:\u002F\u002Fgithub.com\u002FFarama-Foundation\u002FGymnasium-Robotics) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FFarama-Foundation\u002FGymnasium-Robotics.svg?cacheSeconds=86400) - Gymnasium-Robotics contains a collection of Reinforcement Learning robotic environments that use the Gymansium API. The environments run with the MuJoCo physics engine and the maintained mujoco python bindings.\n* [Jumanji](https:\u002F\u002Fgithub.com\u002Finstadeepai\u002Fjumanji) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Finstadeepai\u002Fjumanji.svg?cacheSeconds=86400) - Jumanji is a suite of Reinforcement Learning (RL) environments written in JAX providing clean, hardware-accelerated environments for industry-driven research.\n* [MARLlib](https:\u002F\u002Fgithub.com\u002FReplicable-MARL\u002FMARLlib) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FReplicable-MARL\u002FMARLlib.svg?cacheSeconds=86400) - MARLlib is a comprehensive Multi-Agent Reinforcement Learning algorithm library based on RLlib. It provides MARL research community with a unified platform for building, training, and evaluating MARL algorithms.\n* [Mava](https:\u002F\u002Fgithub.com\u002Finstadeepai\u002FMava) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Finstadeepai\u002FMava.svg?cacheSeconds=86400) - Mava is a framework for distributed multi-agent reinforcement learning in JAX.\n* [Melting Pot](https:\u002F\u002Fgithub.com\u002Fgoogle-deepmind\u002Fmeltingpot) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle-deepmind\u002Fmeltingpot.svg?cacheSeconds=86400) - Melting Pot is a suite of test scenarios for multi-agent reinforcement learning.\n* [MetaDrive](https:\u002F\u002Fgithub.com\u002Fmetadriverse\u002Fmetadrive) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmetadriverse\u002Fmetadrive.svg?cacheSeconds=86400) - MetaDrive is a driving simulator that composes diverse driving scenarios for generalizable RL.\n* [Minigrid](https:\u002F\u002Fgithub.com\u002FFarama-Foundation\u002FMinigrid) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FFarama-Foundation\u002FMinigrid.svg?cacheSeconds=86400) - The Minigrid library contains a collection of discrete grid-world environments to conduct research on Reinforcement Learning. The environments follow the Gymnasium standard API and they are designed to be lightweight, fast, and easily customizable.\n* [MiniWorld](https:\u002F\u002Fgithub.com\u002FFarama-Foundation\u002FMiniworld) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FFarama-Foundation\u002FMiniworld.svg?cacheSeconds=86400) - MiniWorld is a minimalistic 3D interior environment simulator for reinforcement learning & robotics research.\n* [ML-Agents](https:\u002F\u002Fgithub.com\u002FUnity-Technologies\u002Fml-agents) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FUnity-Technologies\u002Fml-agents.svg?cacheSeconds=86400) - ML-Agents is an open-source project that enables games and simulations to serve as environments for training reinforcement learning intelligent agents.\n* [MLGym](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002FMLGym) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffacebookresearch\u002FMLGym.svg?cacheSeconds=86400) - MLGym is a gym environment enabling research on reinforcement learning (RL) algorithms for training such agents for ML tasks.\n* [MushroomRL](https:\u002F\u002Fgithub.com\u002FMushroomRL\u002Fmushroom-rl) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FMushroomRL\u002Fmushroom-rl.svg?cacheSeconds=86400) - MushroomRL is a Python reinforcement learning (RL) library whose modularity allows to easily use well-known Python libraries for tensor computation (e.g. PyTorch, Tensorflow) and RL benchmarks (e.g. OpenAI Gym, PyBullet, Deepmind Control Suite).\n* [OmniSafe](https:\u002F\u002Fgithub.com\u002FPKU-Alignment\u002Fomnisafe) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FPKU-Alignment\u002Fomnisafe.svg?cacheSeconds=86400) - OmniSafe is an infrastructural framework designed to accelerate safe reinforcement learning (RL) research.\n* [OpenRLHF](https:\u002F\u002Fgithub.com\u002FOpenRLHF\u002FOpenRLHF) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FOpenRLHF\u002FOpenRLHF.svg?cacheSeconds=86400) - OpenRLHF is an open-source framework for reinforcement learning from human feedback (RLHF).\n* [PARL](https:\u002F\u002Fgithub.com\u002FPaddlePaddle\u002FPARL) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FPaddlePaddle\u002FPARL.svg?cacheSeconds=86400) - PARL is a flexible and high-efficient reinforcement learning framework.\n* [PettingZoo](https:\u002F\u002Fgithub.com\u002FFarama-Foundation\u002FPettingZoo) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FFarama-Foundation\u002FPettingZoo.svg?cacheSeconds=86400) - PettingZoo is a Python library for conducting research in multi-agent reinforcement learning, akin to a multi-agent version of Gymnasium.\n* [ranx](https:\u002F\u002Fgithub.com\u002FAmenRa\u002Franx) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FAmenRa\u002Franx.svg?cacheSeconds=86400) - ranx is a library of fast ranking evaluation metrics implemented in Python, leveraging Numba for high-speed vector operations and automatic parallelization.\n* [RL4CO](https:\u002F\u002Fgithub.com\u002Fai4co\u002Frl4co) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fai4co\u002Frl4co.svg?cacheSeconds=86400) - RL4CO is a PyTorch library for all things reinforcement learning for combinatorial optimization (CO).\n* [RL2](https:\u002F\u002Fgithub.com\u002FChenmienTan\u002FRL2) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FChenmienTan\u002FRL2.svg?cacheSeconds=86400) - RL2 is a reinforcement learning library.\n* [RLinf](https:\u002F\u002Fgithub.com\u002FRLinf\u002FRLinf) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FRLinf\u002FRLinf.svg?cacheSeconds=86400) - RLinf is a reinforcement learning library.\n* [ROLL](https:\u002F\u002Fgithub.com\u002Falibaba\u002FROLL) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Falibaba\u002FROLL.svg?cacheSeconds=86400) - ROLL is a reinforcement learning library.\n* [skrl](https:\u002F\u002Fgithub.com\u002FToni-SM\u002Fskrl) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FToni-SM\u002Fskrl.svg?cacheSeconds=86400) - skrl is an open-source modular library for Reinforcement Learning written in Python (using PyTorch) and designed with a focus on readability, simplicity, and transparency of algorithm implementation.\n* [slime](https:\u002F\u002Fgithub.com\u002FTHUDM\u002Fslime) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FTHUDM\u002Fslime.svg?cacheSeconds=86400) - slime is an LLM post-training framework for RL Scaling.\n* [Stable Baselines](https:\u002F\u002Fgithub.com\u002FDLR-RM\u002Fstable-baselines3) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FDLR-RM\u002Fstable-baselines3.svg?cacheSeconds=86400) - A fork of OpenAI Baselines, implementations of reinforcement learning algorithms.\n* [TF-Agents](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Fagents) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftensorflow\u002Fagents.svg?cacheSeconds=86400) - A reliable, scalable and easy to use TensorFlow library for contextual bandits and reinforcement learning.\n* [TorchRL](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Frl) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpytorch\u002Frl.svg?cacheSeconds=86400) - TorchRL is an open-source Reinforcement Learning (RL) library for PyTorch.\n* [TRL](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Ftrl) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Ftrl.svg?cacheSeconds=86400) - Train transformer language models with reinforcement learning. \n* [veRL](https:\u002F\u002Fgithub.com\u002Fvolcengine\u002FveRL) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvolcengine\u002FveRL.svg?cacheSeconds=86400) - veRL (HybridFlow) is a flexible, efficient and industrial-level RL(HF) training framework designed for LLMs. \n\n## Industry Strength Robotics\n* [AI2-THOR](https:\u002F\u002Fgithub.com\u002Fallenai\u002Fai2thor) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fallenai\u002Fai2thor.svg?cacheSeconds=86400) - AI2-THOR is a near photo-realistic interactable framework for AI agents.\n* [Habitat-Sim](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fhabitat-sim) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffacebookresearch\u002Fhabitat-sim.svg?cacheSeconds=86400) - Habitat-Sim is a flexible, high-performance 3D simulator for Embodied AI research.\n* [IsaacLab](https:\u002F\u002Fgithub.com\u002Fisaac-sim\u002FIsaacLab) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fisaac-sim\u002FIsaacLab.svg?cacheSeconds=86400) - IsaacLab is a unified and modular framework for robot learning that leverages NVIDIA Isaac Sim.\n* [robosuite](https:\u002F\u002Fgithub.com\u002FARISE-Initiative\u002Frobosuite) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FARISE-Initiative\u002Frobosuite.svg?cacheSeconds=86400) - robosuite is a simulation framework powered by the MuJoCo physics engine for robot learning.\n* [RoboVerse](https:\u002F\u002Fgithub.com\u002FRoboVerseOrg\u002FRoboVerse) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FRoboVerseOrg\u002FRoboVerse.svg?cacheSeconds=86400) - RoboVerse is a comprehensive robotics simulation platform with diverse environments.\n\n## Industry Strength Visualisation\n* [Apache ECharts](https:\u002F\u002Fgithub.com\u002Fapache\u002Fecharts) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fecharts.svg?cacheSeconds=86400) - Apache ECharts is a powerful, interactive charting and data visualization library for browser.\n* [Apache Superset](https:\u002F\u002Fgithub.com\u002Fapache\u002Fsuperset) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fsuperset.svg?cacheSeconds=86400) - A modern, enterprise-ready business intelligence web application.\n* [Bokeh](https:\u002F\u002Fgithub.com\u002Fbokeh\u002Fbokeh) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fbokeh\u002Fbokeh.svg?cacheSeconds=86400) - Bokeh is an interactive visualization library for Python that enables beautiful and meaningful visual presentation of data in modern web browsers.\n* [Bread Dataset Viewer](https:\u002F\u002Fgithub.com\u002FBread-Technologies\u002Fmle_vscode_extension) - A VS Code extension for viewing and exploring large machine learning datasets (CSV, JSON, Parquet, etc.) directly within the editor without the IDE crashing.\n* [Bread WandB Viewer](https:\u002F\u002Fgithub.com\u002FBread-Technologies\u002Fbread_wandb_viewer_extension) - A VS Code extension to view Weights & Biases experiments, logs, and artifacts within the IDE, eliminating the need to switch to the web UI & preserving data privacy by being 100% offline.\n* [Data Formulator](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Fdata-formulator) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmicrosoft\u002Fdata-formulator.svg?cacheSeconds=86400) - Transform data and create rich visualizations iteratively with AI.\n* [ggplot2](https:\u002F\u002Fgithub.com\u002Ftidyverse\u002Fggplot2) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftidyverse\u002Fggplot2.svg?cacheSeconds=86400) - An implementation of the grammar of graphics for R.\n* [gradio](https:\u002F\u002Fgithub.com\u002Fgradio-app\u002Fgradio) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgradio-app\u002Fgradio.svg?cacheSeconds=86400) - Quickly create and share demos of models - by only writing Python. Debug models interactively in your browser, get feedback from collaborators, and generate public links without deploying anything.\n* [Kangas](https:\u002F\u002Fgithub.com\u002Fcomet-ml\u002Fkangas) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcomet-ml\u002Fkangas.svg?cacheSeconds=86400) - Kangas is a tool for exploring, analyzing, and visualizing large-scale multimedia data. It provides a straightforward Python API for logging large tables of data, along with an intuitive visual interface for performing complex queries against your dataset.\n* [matplotlib](https:\u002F\u002Fgithub.com\u002Fmatplotlib\u002Fmatplotlib) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmatplotlib\u002Fmatplotlib.svg?cacheSeconds=86400) - A Python 2D plotting library which produces publication-quality figures in a variety of hardcopy formats and interactive environments across platforms.\n* [Netron](https:\u002F\u002Fgithub.com\u002Flutzroeder\u002Fnetron) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flutzroeder\u002Fnetron.svg?cacheSeconds=86400) - Netron is a viewer for neural network, deep learning and machine learning models.\n* [Perspective](https:\u002F\u002Fgithub.com\u002Ffinos\u002Fperspective) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffinos\u002Fperspective.svg?cacheSeconds=86400) Streaming pivot visualization via WebAssembly.\n* [Plotly](https:\u002F\u002Fgithub.com\u002Fplotly\u002Fplotly.py) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fplotly\u002Fplotly.py.svg?cacheSeconds=86400) - An interactive, open source, and browser-based graphing library for Python.\n* [Redash](https:\u002F\u002Fgithub.com\u002Fgetredash\u002Fredash) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgetredash\u002Fredash.svg?cacheSeconds=86400) - Redash is anopen source visualisation framework that is built to allow easy access to big datasets leveraging multiple backends.\n* [Rerun](https:\u002F\u002Fgithub.com\u002Frerun-io\u002Frerun) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Frerun-io\u002Frerun.svg?cacheSeconds=86400) - Rerun is an open-source SDK for logging, storing, querying, and visualizing multimodal data, designed for robotics, computer vision, and spatial AI.\n* [seaborn](https:\u002F\u002Fgithub.com\u002Fmwaskom\u002Fseaborn) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmwaskom\u002Fseaborn.svg?cacheSeconds=86400) - Seaborn is a Python visualization library based on matplotlib. It provides a high-level interface for drawing attractive statistical graphics.\n* [Spotlight](https:\u002F\u002Fgithub.com\u002FRenumics\u002Fspotlight) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FRenumics\u002Fspotlight.svg?cacheSeconds=86400) - Spotlight helps you to identify critical data segments and model failure modes. It enables you to build and maintain reliable machine learning models by curating high-quality datasets.\n* [Streamlit](https:\u002F\u002Fgithub.com\u002Fstreamlit\u002Fstreamlit) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fstreamlit\u002Fstreamlit.svg?cacheSeconds=86400) - Streamlit lets you create apps for your machine learning projects with deceptively simple Python scripts. It supports hot-reloading, so your app updates live as you edit and save your file.\n* [tensorboardX](https:\u002F\u002Fgithub.com\u002Flanpa\u002FtensorboardX) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flanpa\u002FtensorboardX.svg?cacheSeconds=86400) - Write TensorBoard events with simple function call.\n* [TensorBoard](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Ftensorboard) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftensorflow\u002Ftensorboard.svg?cacheSeconds=86400) - TensorBoard is a visualization toolkit for machine learning experimentation that makes it easy to host, track, and share ML experiments.\n* [Transformer Explainer](https:\u002F\u002Fgithub.com\u002Fpoloclub\u002Ftransformer-explainer) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpoloclub\u002Ftransformer-explainer.svg?cacheSeconds=86400) - Transformer Explainer is an interactive visualization tool designed to help anyone learn how Transformer-based models like GPT work.\n* [Vega-Altair](https:\u002F\u002Fgithub.com\u002Fvega\u002Faltair) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvega\u002Faltair.svg?cacheSeconds=86400) - Vega-Altair is a declarative statistical visualization library for Python.\n* [ydata-profiling](https:\u002F\u002Fgithub.com\u002Fydataai\u002Fydata-profiling) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fydataai\u002Fydata-profiling.svg?cacheSeconds=86400) - ydata-profiling provides a one-line Exploratory Data Analysis (EDA) experience in a consistent and fast solution.\n\n## Metadata Management\n* [Amundsen](https:\u002F\u002Fgithub.com\u002Famundsen-io\u002Famundsen) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Famundsen-io\u002Famundsen.svg?cacheSeconds=86400) - Amundsen is a metadata driven application for improving the productivity of data analysts, data scientists and engineers when interacting with data.\n* [Apache Atlas](https:\u002F\u002Fgithub.com\u002Fapache\u002Fatlas) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fatlas.svg?cacheSeconds=86400) - Apache Atlas framework is an extensible set of core foundational governance services – enabling enterprises to effectively and efficiently meet their compliance requirements within Hadoop and allows integration with the whole enterprise data ecosystem.\n* [DataHub](https:\u002F\u002Fgithub.com\u002Fdatahub-project\u002Fdatahub) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdatahub-project\u002Fdatahub.svg?cacheSeconds=86400) - DataHub is LinkedIn's generalized metadata search & discovery tool.\n* [Marquez](https:\u002F\u002Fgithub.com\u002FMarquezProject\u002Fmarquez) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FMarquezProject\u002Fmarquez.svg?cacheSeconds=86400) - Marquez is an open source metadata service for the collection, aggregation, and visualization of a data ecosystem's metadata.\n* [Metacat](https:\u002F\u002Fgithub.com\u002FNetflix\u002Fmetacat) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FNetflix\u002Fmetacat.svg?cacheSeconds=86400) - Metacat is a unified metadata exploration API service. Metacat focuses on solving these problems: 1) federated views of metadata systems; 2) arbitrary metadata storage about data sets; 3) metadata discovery.\n* [ML Metadata](https:\u002F\u002Fgithub.com\u002Fgoogle\u002Fml-metadata) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle\u002Fml-metadata.svg?cacheSeconds=86400) - a library for recording and retrieving metadata associated with ML developer and data scientist workflows.\n\n## Model, Data and Experiment Management\n* [Aim](https:\u002F\u002Fgithub.com\u002Faimhubio\u002Faim) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Faimhubio\u002Faim.svg?cacheSeconds=86400) - A super-easy way to record, search and compare AI experiments.\n* [ClearML](https:\u002F\u002Fgithub.com\u002Fclearml\u002Fclearml) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fclearml\u002Fclearml.svg?cacheSeconds=86400) - Auto-Magical Experiment Manager & Version Control for AI (previously Trains).\n* [DataHub](https:\u002F\u002Fgithub.com\u002Fdatahub-project\u002Fdatahub) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdatahub-project\u002Fdatahub.svg?cacheSeconds=86400) - DataHub is an open-source data catalog for the modern data stack.\n* [Dolt](https:\u002F\u002Fgithub.com\u002Fdolthub\u002Fdolt) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdolthub\u002Fdolt.svg?cacheSeconds=86400) - Dolt is a SQL database that you can fork, clone, branch, merge, push and pull just like a git repository.\n* [DVC](https:\u002F\u002Fgithub.com\u002Fiterative\u002Fdvc) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fiterative\u002Fdvc.svg?cacheSeconds=86400) - DVC (Data Version Control) is a git fork that allows for version management of models.\n* [HuggingFace Model Downloader](https:\u002F\u002Fgithub.com\u002Fbodaay\u002FHuggingFaceModelDownloader) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fbodaay\u002FHuggingFaceModelDownloader.svg?cacheSeconds=86400) - HuggingFace Model Downloader is a utility tool for downloading models and datasets from the HuggingFace website. It offers multithreaded downloading for LFS files and ensures the integrity of downloaded models with SHA256 checksum verification.\n* [Keepsake](https:\u002F\u002Fgithub.com\u002Freplicate\u002Fkeepsake) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Freplicate\u002Fkeepsake.svg?cacheSeconds=86400) - Version control for machine learning.\n* [KitOps](https:\u002F\u002Fgithub.com\u002Fjozu-ai\u002Fkitops) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjozu-ai\u002Fkitops.svg?cacheSeconds=86400) - KitOps is an open and standards-based packaging and versioning system for AI\u002FML projects that works with all the AI\u002FML, development, and DevOps tools you are already using.\n* [lakeFS](https:\u002F\u002Fgithub.com\u002Ftreeverse\u002FlakeFS) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftreeverse\u002FlakeFS.svg?cacheSeconds=86400) - Repeatable, atomic and versioned data lake on top of object storage.\n* [MLflow](https:\u002F\u002Fgithub.com\u002Fmlflow\u002Fmlflow) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmlflow\u002Fmlflow.svg?cacheSeconds=86400) - Open source platform to manage the ML lifecycle, including experimentation, reproducibility and deployment.\n* [Neptune](https:\u002F\u002Fgithub.com\u002Fneptune-ai\u002Fneptune-client) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fneptune-ai\u002Fneptune-client.svg?cacheSeconds=86400) - Neptune is a scalable experiment tracker for teams that train foundation models.\n* [Polyaxon](https:\u002F\u002Fgithub.com\u002Fpolyaxon\u002Fpolyaxon) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpolyaxon\u002Fpolyaxon.svg?cacheSeconds=86400) - A platform for reproducible and scalable machine learning and deep learning on kubernetes - [(Video)](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=Iexwrka_hys).\n* [Quilt](https:\u002F\u002Fgithub.com\u002Fquiltdata\u002Fquilt) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fquiltdata\u002Fquilt.svg?cacheSeconds=86400) - Versioning, reproducibility and deployment of data and models.\n* [Sacred](https:\u002F\u002Fgithub.com\u002FIDSIA\u002Fsacred) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FIDSIA\u002Fsacred.svg?cacheSeconds=86400) - Tool to help you configure, organize, log and reproduce machine learning experiments.\n* [TerminusDB](https:\u002F\u002Fgithub.com\u002Fterminusdb\u002Fterminusdb) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fterminusdb\u002Fterminusdb.svg?cacheSeconds=86400) - A graph database management system that stores data like git.\n* [Weights & Biases](https:\u002F\u002Fgithub.com\u002Fwandb\u002Fwandb) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fwandb\u002Fwandb.svg?cacheSeconds=86400) - Weights & Biase is a machine learning experiment tracking, dataset versioning, hyperparameter search, visualization, and collaboration.\n\n## Model Training and Orchestration\n\n* [AutoTrain Advanced](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fautotrain-advanced) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Fautotrain-advanced.svg?cacheSeconds=86400) - AutoTrain Advanced is a no-code solution that allows you to train machine learning models in just a few clicks.\n* [Avalanche](https:\u002F\u002Fgithub.com\u002FContinualAI\u002Favalanche) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FContinualAI\u002Favalanche.svg?cacheSeconds=86400) - Avalanche is an end-to-end Continual Learning library to provide a shared and collaborative open-source (MIT licensed) codebase for fast prototyping, training and reproducible evaluation of continual learning algorithms.\n* [Axolotl](https:\u002F\u002Fgithub.com\u002Faxolotl-ai-cloud\u002Faxolotl) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Faxolotl-ai-cloud\u002Faxolotl.svg?cacheSeconds=86400) - Axolotl is a tool designed to streamline the fine-tuning of various AI models, offering support for multiple configurations and architectures.\n* [BindsNET](https:\u002F\u002Fgithub.com\u002FBindsNET\u002Fbindsnet) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FBindsNET\u002Fbindsnet.svg?cacheSeconds=86400) - BindsNET is a spiking neural network simulation library geared towards the development of biologically inspired algorithms for machine learning.\n* [CML](https:\u002F\u002Fgithub.com\u002Fiterative\u002Fcml) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fiterative\u002Fcml.svg?cacheSeconds=86400) - Continuous Machine Learning (CML) is an open-source library for implementing continuous integration & delivery (CI\u002FCD) in machine learning projects.\n* [CoreNet](https:\u002F\u002Fgithub.com\u002Fapple\u002Fcorenet) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapple\u002Fcorenet.svg?cacheSeconds=86400) - CoreNet is a deep neural network toolkit that allows researchers and engineers to train standard and novel small and large-scale models for variety of tasks, including foundation models (e.g., CLIP and LLM), object classification, object detection, and semantic segmentation.\n* [Determined](https:\u002F\u002Fgithub.com\u002Fdetermined-ai\u002Fdetermined) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdetermined-ai\u002Fdetermined.svg?cacheSeconds=86400) - Deep learning training platform with integrated support for distributed training, hyperparameter tuning, and model management (supports Tensorflow and Pytorch).\n* [dstack](https:\u002F\u002Fgithub.com\u002Fdstackai\u002Fdstack) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdstackai\u002Fdstack.svg?cacheSeconds=86400) - dstack is an open-source container orchestrator that simplifies workload orchestration and drives GPU utilization for ML teams.\n* [envd](https:\u002F\u002Fgithub.com\u002Ftensorchord\u002Fenvd) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftensorchord\u002Fenvd.svg?cacheSeconds=86400) - Machine learning development environment for data science and AI\u002FML engineering teams.\n* [Fairseq](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Ffairseq) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffacebookresearch\u002Ffairseq.svg?cacheSeconds=86400) - Fairseq(-py) is a sequence modeling toolkit that allows researchers and developers to train custom models for translation, summarization, language modeling and other text generation tasks.\n* [Fire-Flyer File System](https:\u002F\u002Fgithub.com\u002Fdeepseek-ai\u002F3FS) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdeepseek-ai\u002F3FS.svg?cacheSeconds=86400) - The Fire-Flyer File System (3FS) is a high-performance distributed file system designed to address the challenges of AI training and inference workloads. It leverages modern SSDs and RDMA networks to provide a shared storage layer that simplifies development of distributed applications.\n* [H2O-3](https:\u002F\u002Fgithub.com\u002Fh2oai\u002Fh2o-3) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fh2oai\u002Fh2o-3.svg?cacheSeconds=86400) - Fast scalable Machine Learning platform for smarter applications: Deep Learning, Gradient Boosting & XGBoost, Random Forest, Generalized Linear Modeling (Logistic Regression, Elastic Net), K-Means, PCA, Stacked Ensembles, Automatic Machine Learning (AutoML), etc..\n* [Hopsworks](https:\u002F\u002Fgithub.com\u002Flogicalclocks\u002Fhopsworks) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flogicalclocks\u002Fhopsworks.svg?cacheSeconds=86400) - Hopsworks is a data-intensive platform for the design and operation of machine learning pipelines.\n* [Ignite](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Fignite) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpytorch\u002Fignite.svg?cacheSeconds=86400) - Ignite is a high-level library to help with training and evaluating neural networks in PyTorch flexibly and transparently.\n* [Kubeflow](https:\u002F\u002Fgithub.com\u002Fkubeflow\u002Fkubeflow) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fkubeflow\u002Fkubeflow.svg?cacheSeconds=86400) - A cloud-native platform for machine learning based on Google's internal machine learning pipelines.\n* [Ludwig](https:\u002F\u002Fgithub.com\u002Fludwig-ai\u002Fludwig) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fludwig-ai\u002Fludwig.svg?cacheSeconds=86400) - Ludwig is a low-code framework for building custom AI models like LLMs and other deep neural networks.\n* [MFTCoder](https:\u002F\u002Fgithub.com\u002Fcodefuse-ai\u002FMFTCoder) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcodefuse-ai\u002FMFTCoder.svg?cacheSeconds=86400) - MFTCoder is an open-source project of CodeFuse for accurate and efficient Multi-task Fine-tuning(MFT) on Large Language Models(LLMs), especially on Code-LLMs(large language model for code tasks).\n* [MLeap](https:\u002F\u002Fgithub.com\u002Fcombust\u002Fmleap) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcombust\u002Fmleap.svg?cacheSeconds=86400) - Standardisation of pipeline and model serialization for Spark, Tensorflow and sklearn.\n* [Nanotron](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fnanotron) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Fnanotron.svg?cacheSeconds=86400) - Nanotron provides distributed primitives to train a variety of models efficiently using 3D parallelism.\n* [NeMo](https:\u002F\u002Fgithub.com\u002FNVIDIA\u002FNeMo) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FNVIDIA\u002FNeMo.svg?cacheSeconds=86400) - NVIDIA NeMo is a scalable and cloud-native generative AI framework built for researchers and PyTorch developers working on Large Language Models (LLMs), Multimodal Models (MMs), Automatic Speech Recognition (ASR), Text to Speech (TTS), and Computer Vision (CV) domains. It is designed to help you efficiently create, customize, and deploy new generative AI models by leveraging existing code and pre-trained model checkpoints.\n* [Prime](https:\u002F\u002Fgithub.com\u002FPrimeIntellect-ai\u002Fprime) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FPrimeIntellect-ai\u002Fprime.svg?cacheSeconds=86400) - Prime is a framework for efficient, globally distributed training of AI models over the internet.\n* [PyCaret](https:\u002F\u002Fgithub.com\u002Fpycaret\u002Fpycaret) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpycaret\u002Fpycaret.svg?cacheSeconds=86400)) - low-code library for training and deploying models (scikit-learn, XGBoost, LightGBM, spaCy)\n* [Sematic](https:\u002F\u002Fgithub.com\u002Fsematic-ai\u002Fsematic) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fsematic-ai\u002Fsematic.svg?cacheSeconds=86400) - Platform to build resource-intensive pipelines with simple Python.\n* [Skaffold](https:\u002F\u002Fgithub.com\u002FGoogleContainerTools\u002Fskaffold) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FGoogleContainerTools\u002Fskaffold.svg?cacheSeconds=86400) - Skaffold is a command line tool that facilitates continuous development for Kubernetes applications. You can iterate on your application source code locally then deploy to local or remote Kubernetes clusters.\n* [TFX](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Ftfx) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftensorflow\u002Ftfx.svg?cacheSeconds=86400) - Tensorflow Extended (TFX) is a production oriented configuration framework for ML based on TensorFlow, incl. monitoring and model version management.\n* [unsloth](https:\u002F\u002Fgithub.com\u002Funslothai\u002Funsloth) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Funslothai\u002Funsloth.svg?cacheSeconds=86400) - Fine-tuning & Reinforcement Learning for LLMs. Train OpenAI gpt-oss, DeepSeek-R1, Qwen3, Gemma 3, TTS 2x faster with 70% less VRAM.\n\n## Model Storage Optimisation\n* [AutoAWQ](https:\u002F\u002Fgithub.com\u002Fcasper-hansen\u002FAutoAWQ) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcasper-hansen\u002FAutoAWQ.svg?cacheSeconds=86400) - AutoAWQ is an easy-to-use package for 4-bit quantized models.\n* [AutoGPTQ](https:\u002F\u002Fgithub.com\u002FAutoGPTQ\u002FAutoGPTQ) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FAutoGPTQ\u002FAutoGPTQ.svg?cacheSeconds=86400) - An easy-to-use LLMs quantization package with user-friendly apis, based on GPTQ algorithm.\n* [AWQ](https:\u002F\u002Fgithub.com\u002Fmit-han-lab\u002Fllm-awq) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmit-han-lab\u002Fllm-awq.svg?cacheSeconds=86400) - Activation-aware Weight Quantization for LLM Compression and Acceleration.\n* [GGML](https:\u002F\u002Fgithub.com\u002Fggml-org\u002Fggml) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fggml-org\u002Fggml.svg?cacheSeconds=86400) - GGML is a high-performance, tensor library for machine learning that enables efficient inference on CPUs, particularly optimized for large language models.\n* [neural-compressor](https:\u002F\u002Fgithub.com\u002Fintel\u002Fneural-compressor) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fintel\u002Fneural-compressor.svg?cacheSeconds=86400) - Intel® Neural Compressor aims to provide popular model compression techniques such as quantization, pruning (sparsity), distillation, and neural architecture search on mainstream frameworks.\n* [NNEF](https:\u002F\u002Fwww.khronos.org\u002Fnnef) - Neural Network Exchange Format (NNEF) is an open standard for representing neural network models to enable interoperability and portability across different machine learning frameworks and platforms.\n* [ONNX](https:\u002F\u002Fgithub.com\u002Fonnx\u002Fonnx) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fonnx\u002Fonnx.svg?cacheSeconds=86400) - ONNX (Open Neural Network Exchange) is an open-source format designed to facilitate interoperability and portability of machine learning models across different frameworks and platforms.\n* [PFA](https:\u002F\u002Fdmg.org\u002Fpfa) - PFA (Portable Format for Analytics) format is a standard for representing and exchanging predictive models and analytics workflows in a portable, JSON-based format.\n* [PMML](https:\u002F\u002Fdmg.org\u002Fpmml) - PMML (Predictive Model Markup Language) is an XML-based standard for representing and sharing predictive models between different applications.\n* [Quanto](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Foptimum-quanto) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Foptimum-quanto.svg?cacheSeconds=86400) - Quanto aims to simplify quantizing deep learning models.\n\n## Privacy and Safety\n* [AI Gateway](https:\u002F\u002Fgithub.com\u002Fportkey-ai\u002Fgateway) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fportkey-ai\u002Fgateway.svg?cacheSeconds=86400) - The AI Gateway is a blazing fast AI Gateway with integrated guardrails.\n* [AI Job Displacement Tracker](https:\u002F\u002Fgithub.com\u002Fnoahaust2\u002Fai-displacement-tracker) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fnoahaust2\u002Fai-displacement-tracker.svg?cacheSeconds=86400) - Structured, source-backed dataset tracking 96 AI-attributed workforce reductions (457K workers affected, 13 countries, 13 sectors). Every entry includes source URLs, attribution tier, and job functions.\n* [ART](https:\u002F\u002Fgithub.com\u002FTrusted-AI\u002Fadversarial-robustness-toolbox) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FTrusted-AI\u002Fadversarial-robustness-toolbox.svg?cacheSeconds=86400) - ART (Adversarial Robustness Toolbox) provides tools that enable developers and researchers to defend and evaluate Machine Learning models and applications against the adversarial threats of Evasion, Poisoning, Extraction, and Inference.\n* [CipherChat](https:\u002F\u002Fgithub.com\u002FRobustNLP\u002FCipherChat) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FRobustNLP\u002FCipherChat.svg?cacheSeconds=86400) - CipherChat is a framework to evaluate the generalization capability of safety alignment for LLMs\n* [DeepTeam](https:\u002F\u002Fgithub.com\u002Fconfident-ai\u002Fdeepteam) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fconfident-ai\u002Fdeepteam.svg?cacheSeconds=86400) - DeepTeam is a simple-to-use, open-source LLM red teaming framework, for penetration testing and safe guarding large-language model systems.\n* [FATE](https:\u002F\u002Fgithub.com\u002FFederatedAI\u002FFATE) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FFederatedAI\u002FFATE.svg?cacheSeconds=86400) - FATE (Federated AI Technology Enabler) is the world's first industrial grade federated learning open source framework to enable enterprises and institutions to collaborate on data while protecting data security and privacy.\n* [FedML](https:\u002F\u002Fgithub.com\u002FFedML-AI\u002FFedML) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FFedML-AI\u002FFedML.svg?cacheSeconds=86400) - FedML provides a research and production integrated edge-cloud platform for Federated\u002FDistributed Machine Learning at anywhere at any scale.\n* [Flower](https:\u002F\u002Fgithub.com\u002Fadap\u002Fflower) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fadap\u002Fflower.svg?cacheSeconds=86400) - Flower is a Federated Learning Framework with a unified approach. It enables the federation of any ML workload, with any ML framework, and any programming language.\n* [Google's Differential Privacy](https:\u002F\u002Fgithub.com\u002Fgoogle\u002Fdifferential-privacy) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle\u002Fdifferential-privacy.svg?cacheSeconds=86400) - This is a C++ library of ε-differentially private algorithms, which can be used to produce aggregate statistics over numeric data sets containing private or sensitive information.\n* [Guardrails](https:\u002F\u002Fgithub.com\u002Fguardrails-ai\u002Fguardrails) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fguardrails-ai\u002Fguardrails.svg?cacheSeconds=86400) - Guardrails is a package that lets a user add structure, type and quality guarantees to the outputs of large language models.\n* [NeMo Guardrails](https:\u002F\u002Fgithub.com\u002FNVIDIA\u002FNeMo-Guardrails) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FNVIDIA\u002FNeMo-Guardrails.svg?cacheSeconds=86400) - NeMo Guardrails is an open-source toolkit for easily adding programmable guardrails to LLM-based conversational systems.\n* [Opacus](https:\u002F\u002Fgithub.com\u002Fmeta-pytorch\u002Fopacus)  ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmeta-pytorch\u002Fopacus.svg?cacheSeconds=86400) - Opacus is a library that enables training PyTorch models with differential privacy. It supports training with minimal code changes required on the client, has little impact on training performance, and allows the client to online track the privacy budget expended at any given moment.\n* [OpenFL](https:\u002F\u002Fgithub.com\u002Fsecurefederatedai\u002Fopenfl)  ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fsecurefederatedai\u002Fopenfl.svg?cacheSeconds=86400) - OpenFL is a Python framework for Federated Learning. OpenFL is designed to be a flexible, extensible and easily learnable tool for data scientists. OpenFL is developed by Intel Internet of Things Group (IOTG) and Intel Labs.\n* [PySyft](https:\u002F\u002Fgithub.com\u002FOpenMined\u002FPySyft) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FOpenMined\u002FPySyft.svg?cacheSeconds=86400) - A Python library for secure, private Deep Learning. PySyft decouples private data from model training, using Multi-Party (MPC) within PyTorch.\n* [Tensorflow Privacy](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Fprivacy) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftensorflow\u002Fprivacy.svg?cacheSeconds=86400) - A Python library that includes implementations of TensorFlow optimizers for training machine learning models with differential privacy.\n* [TF Encrypted](https:\u002F\u002Fgithub.com\u002Ftf-encrypted\u002Ftf-encrypted) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftf-encrypted\u002Ftf-encrypted.svg?cacheSeconds=86400) - A Framework for Confidential Machine Learning on Encrypted Data in TensorFlow.\n\n# Other Awesome Lists\n\n* [Awesome AI Regulation](https:\u002F\u002Fgithub.com\u002FEthicalML\u002Fawesome-artificial-intelligence-regulation) - Covers governance, compliance, and regulatory frameworks essential for responsible ML system deployment across different jurisdictions.\n* [Awesome Production GenAI](https:\u002F\u002Fgithub.com\u002FEthicalML\u002Fawesome-production-genai) - Focuses specifically on generative AI deployment, including LLM operations, prompt engineering, and GenAI-specific monitoring and safety tools.\n* [Awesome RAG Production](https:\u002F\u002Fgithub.com\u002FYigtwxx\u002FAwesome-RAG-Production) - Curated list of production-grade tools and best practices for building scalable RAG systems.\n","[![Awesome](https:\u002F\u002Fawesome.re\u002Fbadge.svg)](https:\u002F\u002Fawesome.re)\n[![X](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FX-%23000000?logo=X&logoColor=white)](https:\u002F\u002Ftwitter.com\u002FEthicalML)\n\n# 优秀生产级机器学习\n\n本仓库包含一份精心策划的开源库列表，将帮助您部署、监控、版本控制、扩展和保障您的生产级机器学习 🚀\n\n您可以通过关注此 GitHub 仓库来保持更新，每月通过 [发布](https:\u002F\u002Fgithub.com\u002FEthicalML\u002Fawesome-production-machine-learning\u002Freleases) 获取新增的生产级机器学习 (Machine Learning, ML) 库摘要 🤩\n\n此外，我们提供了一个 [搜索工具包](https:\u002F\u002Fhuggingface.co\u002Fspaces\u002Fzhiminy\u002FAwesome-Production-Machine-Learning-Search)，帮助您快速浏览工具链。\n\n## 本页各章节的快速链接\n\n| | | |\n|-|-|-|\n| [🔧 自动机器学习 (AutoML)](#automl) | [🧮 计算与通信优化 (Computation & Communication Optimisation)](#computation-and-communication-optimisation) | [🏷️ 数据标注与合成 (Data Annotation & Synthesis)](#data-annotation-and-synthesis) |\n| [🧵 数据管道 (Data Pipeline)](#data-pipeline) | [📓 数据科学笔记本 (Data Science Notebook)](#data-science-notebook) | [💾 数据存储优化 (Data Storage Optimisation)](#data-storage-optimisation) |\n| [💸 数据流处理 (Data Stream Processing)](#data-stream-processing) | [💪 部署与服务 (Deployment & Serving)](#deployment-and-serving) | [📈 评估与监控 (Evaluation & Monitoring)](#evaluation-and-monitoring) |\n| [🔍 可解释性与公平性 (Explainability & Fairness)](#explainability-and-fairness) | [🎁 特征存储 (Feature Store)](#feature-store) | [🔴 工业级异常检测 (Industry-strength Anomaly Detection)](#industry-strength-anomaly-detection) |\n| [👁️ 工业级计算机视觉 (Industry-strength Computer Vision)](#industry-strength-computer-vision) | [🔥 工业级信息检索 (Industry-strength Information Retrieval)](#industry-strength-information-retrieval) | [🔠 工业级自然语言处理 (Industry-strength NLP)](#industry-strength-nlp) |\n| [🙌 工业级推荐系统 (Industry-strength Recommender System)](#industry-strength-recommender-system) | [🍕 工业级强化学习 (Industry-strength Reinforcement Learning)](#industry-strength-reinforcement-learning) | [🤖 工业级机器人 (Industry-strength Robotics)](#industry-strength-robotics) |\n| [📊 工业级可视化 (Industry-strength Visualisation)](#industry-strength-visualisation) | [📅 元数据管理 (Metadata Management)](#metadata-management) | [📜 模型、数据与实验管理 (Model, Data & Experiment Management)](#model-data-and-experiment-management) |\n| [🔩 模型存储优化 (Model Storage Optimisation)](#model-storage-optimisation) | [🏁 模型训练与编排 (Model Training & Orchestration)](#model-training-and-orchestration) | [🔏 隐私与安全 (Privacy & Safety)](#privacy-and-safety) |\n\n## 贡献列表\n\n提交 PR 时请查看我们的 [CONTRIBUTING.md](https:\u002F\u002Fgithub.com\u002FEthicalML\u002Fawesome-production-machine-learning\u002Fblob\u002Fmaster\u002FCONTRIBUTING.md) 要求，以帮助我们保持列表整洁和最新 - 感谢社区支持其稳步增长 🚀\n\n\u003Cpicture>\n  \u003Csource\n    media=\"(prefers-color-scheme: grey)\"\n    srcset=\"\n      https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FEthicalML_awesome-production-machine-learning_readme_4d8efabc3db6.png&theme=dark\n    \"\n  \u002F>\n  \u003Csource\n    media=\"(prefers-color-scheme: light)\"\n    srcset=\"\n      https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FEthicalML_awesome-production-machine-learning_readme_4d8efabc3db6.png\n    \"\n  \u002F>\n  \u003Cimg\n    alt=\"Star History Chart\"\n    src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FEthicalML_awesome-production-machine-learning_readme_4d8efabc3db6.png\"\n  \u002F>\n\u003C\u002Fpicture>\n\n## 10 分钟视频概览\n\n\u003Ctable>\n  \u003Ctr>\n    \u003Ctd width=\"30%\">\n        这 \u003Ca href=\"https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=Ynb6X0KZKxY\">10 分钟视频\u003C\u002Fa> 提供了机器学习运维 (MLOps) 的动机概述，以及对本仓库中部分工具的高层次介绍。这 \u003Ca href=\"https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=NycftytgPnk\">较新的视频\u003C\u002Fa> 涵盖了更新后的 2024 年 MLOps 现状。\n    \u003C\u002Ftd>\n    \u003Ctd width=\"70%\">\n        \u003Ca href=\"https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=Ynb6X0KZKxY\">\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FEthicalML_awesome-production-machine-learning_readme_5a15cf128666.png\">\u003C\u002Fa>\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n\u003C\u002Ftable>\n\n## 想要接收关于此仓库及其他进展的定期更新？\n\n\u003Ctable>\n  \u003Ctr>\n    \u003Ctd width=\"30%\">\n         您可以加入 [Machine Learning Engineer](https:\u002F\u002Fethical.institute\u002Fmle.html) 通讯。加入超过 70,000 名机器学习专业人士和爱好者，他们每周收到关于生产级机器学习的精选文章和教程。\n    \u003C\u002Ftd>\n    \u003Ctd width=\"70%\">\n        \u003Ca href=\"https:\u002F\u002Fethical.institute\u002Fmle.html\">\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FEthicalML_awesome-production-machine-learning_readme_bf9a0a64abe8.png\">\u003C\u002Fa>\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd width=\"30%\">\n         同时请查看 [Awesome Production GenAI](https:\u002F\u002Fgithub.com\u002FEthicalML\u002Fawesome-production-genai\u002F) 列表，我们旨在映射一份精选的开源库列表，用于部署、监控、版本控制和扩展您的生成式人工智能 (Generative AI) 应用和系统。\n    \u003C\u002Ftd>\n    \u003Ctd width=\"70%\">\n        \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FEthicalML\u002Fawesome-production-genai\u002F\">\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FEthicalML_awesome-production-machine-learning_readme_6cb3f367f959.jpg\">\u003C\u002Fa>\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n\u003C\u002Ftable>\n\n# 主要内容\n\n## 自动机器学习 (AutoML)\n* [AIDE](https:\u002F\u002Fgithub.com\u002FWecoAI\u002Faideml) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FWecoAI\u002Faideml.svg?cacheSeconds=86400) - AIDE 是一个开源的机器学习工程代理，使用树搜索算法自主探索、实施和评估机器学习任务的解决方案策略。\n* [AutoGluon](https:\u002F\u002Fgithub.com\u002Fautogluon\u002Fautogluon) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fautogluon\u002Fautogluon.svg?cacheSeconds=86400) - 基于流行的机器学习库（Scikit-Learn, LightGBM, CatBoost, PyTorch, MXNet），为表格、图像和文本数据提供自动化的特征、模型和超参数选择。\n* [Autokeras](https:\u002F\u002Fgithub.com\u002Fkeras-team\u002Fautokeras) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fkeras-team\u002Fautokeras.svg?cacheSeconds=86400) - 基于 [\"Auto-Keras: Efficient Neural Architecture Search with Network Morphism\"](https:\u002F\u002Farxiv.org\u002Fabs\u002F1806.10282) 的 Keras 自动机器学习库。\n* [auto-sklearn](https:\u002F\u002Fgithub.com\u002Fautoml\u002Fauto-sklearn) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fautoml\u002Fauto-sklearn.svg?cacheSeconds=86400) - 用于自动化 sklearn 算法和超参数调优的框架。\n* [Ax](https:\u002F\u002Fgithub.com\u002Ffacebook\u002FAx) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffacebook\u002FAx.svg?cacheSeconds=86400) - Ax 是一个易于访问的通用平台，用于理解、管理、部署和自动化自适应实验。\n* [BoTorch](https:\u002F\u002Fgithub.com\u002Fmeta-pytorch\u002Fbotorch) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmeta-pytorch\u002Fbotorch.svg?cacheSeconds=86400) - BoTorch 是一个基于 PyTorch 构建的贝叶斯优化库。\n* [EvalML](https:\u002F\u002Fgithub.com\u002Falteryx\u002Fevalml) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Falteryx\u002Fevalml.svg?cacheSeconds=86400) - EvalML 是一个自动机器学习库，它使用领域特定目标函数来构建、优化和评估机器学习流水线。\n* [Feature Engine](https:\u002F\u002Fgithub.com\u002Ffeature-engine\u002Ffeature_engine) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffeature-engine\u002Ffeature_engine.svg?cacheSeconds=86400) - Feature-engine 是一个 Python 库，包含多个转换器，用于为机器学习模型构建特征。\n* [Featuretools](https:\u002F\u002Fgithub.com\u002Falteryx\u002Ffeaturetools) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Falteryx\u002Ffeaturetools.svg?cacheSeconds=86400) - 一个用于自动化特征工程的开源框架。\n* [FLAML](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FFLAML) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmicrosoft\u002FFLAML.svg?cacheSeconds=86400) - FLAML 是一个用于自动机器学习和调优的快速库。\n* [HEBO](https:\u002F\u002Fgithub.com\u002Fhuawei-noah\u002FHEBO) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuawei-noah\u002FHEBO.svg?cacheSeconds=86400) - 一组开源超参数优化框架，包括在超参数调优任务上测试的 [NeurIPS 2020 黑盒优化挑战](https:\u002F\u002Fbbochallenge.com\u002Fleaderboard) 获胜提交方案。 \n* [Katib](https:\u002F\u002Fgithub.com\u002Fkubeflow\u002Fkatib) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fkubeflow\u002Fkatib.svg?cacheSeconds=86400) - 一个基于 Kubernetes 的超参数调优和神经架构搜索系统。\n* [keras-tuner](https:\u002F\u002Fgithub.com\u002Fkeras-team\u002Fkeras-tuner) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fkeras-team\u002Fkeras-tuner.svg?cacheSeconds=86400) - Keras Tuner 是一个易于使用、可分发的超参数优化框架，解决了执行超参数搜索的痛点。Keras Tuner 使得定义搜索空间并利用内置算法找到最佳超参数值变得简单。\n* [Optuna](https:\u002F\u002Fgithub.com\u002Foptuna\u002Foptuna) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Foptuna\u002Foptuna.svg?cacheSeconds=86400) - Optuna 是一个自动超参数优化软件框架，特别针对机器学习设计。\n* [OSS Vizier](https:\u002F\u002Fgithub.com\u002Fgoogle\u002Fvizier) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle\u002Fvizier.svg?cacheSeconds=86400) - OSS Vizier 是一个基于 Python 的黑盒优化和研究服务，是首批专为大规模工作设计的超参数调优服务之一。\n* [Perpetual](https:\u002F\u002Fgithub.com\u002Fperpetual-ml\u002Fperpetual) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fperpetual-ml\u002Fperpetual.svg?cacheSeconds=86400) - 一种不需要超参数优化的梯度提升机，具有一个简单的预算参数来控制模型复杂度。\n* [TPOT](https:\u002F\u002Fgithub.com\u002Fepistasislab\u002Ftpot) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fepistasislab\u002Ftpot.svg?cacheSeconds=86400) - 自动化 sklearn 流水线创建（包括特征选择、预处理等）。\n* [tsfresh](https:\u002F\u002Fgithub.com\u002Fblue-yonder\u002Ftsfresh) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fblue-yonder\u002Ftsfresh.svg?cacheSeconds=86400) - 从时间序列中自动提取相关特征。\n\n## 计算与通信优化\n\n* [Accelerate](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Faccelerate) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Faccelerate.svg?cacheSeconds=86400) - Accelerate 抽象并仅抽象与多 GPU（图形处理器）\u002FTPU（张量处理单元）\u002Fmixed-precision（混合精度）相关的样板代码，并保持其余代码不变。\n* [Adapters](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FEthicalML_awesome-production-machine-learning_readme_f947e5cdc62b.png) ![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FEthicalML_awesome-production-machine-learning_readme_f947e5cdc62b.png) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fadapter-hub\u002Fadapters.svg?cacheSeconds=86400) - Adapters 是一个用于参数高效和模块化 transfer learning（迁移学习）的统一库。\n* [BitBLAS](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FBitBLAS) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmicrosoft\u002FBitBLAS.svg?cacheSeconds=86400) - BitBLAS 是一个支持在 GPU 上进行 mixed-precision（混合精度）BLAS（基本线性代数子程序）操作的库。\n* [Colossal-AI](https:\u002F\u002Fgithub.com\u002Fhpcaitech\u002FColossalAI) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhpcaitech\u002FColossalAI.svg?cacheSeconds=86400) - 面向大模型时代的统一深度学习系统，帮助用户高效快速地部署大型 AI 模型训练和推理。\n* [Composer](https:\u002F\u002Fgithub.com\u002Fmosaicml\u002Fcomposer) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmosaicml\u002Fcomposer.svg?cacheSeconds=86400) - Composer 是一个 PyTorch 库，使您能够以更低的成本、更高的速度、更高的准确率训练神经网络。\n* [CuDF](https:\u002F\u002Fgithub.com\u002Frapidsai\u002Fcudf) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Frapidsai\u002Fcudf.svg?cacheSeconds=86400) - 基于 Apache Arrow 列式内存格式构建，cuDF 是一个 GPU DataFrame（数据框）库，用于加载、连接、聚合、过滤以及操纵数据。\n* [CuML](https:\u002F\u002Fgithub.com\u002Frapidsai\u002Fcuml) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Frapidsai\u002Fcuml.svg?cacheSeconds=86400) - cuML 是一套实现机器学习算法和数学原始函数的库，与其他 RAPIDS 项目共享兼容的 APIs（应用程序接口）。\n* [CuPy](https:\u002F\u002Fgithub.com\u002Fcupy\u002Fcupy) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcupy\u002Fcupy.svg?cacheSeconds=86400) - 在 CUDA 上实现的 NumPy 兼容的多维数组。CuPy 由核心多维数组类 cupy.ndarray 及其上的许多函数组成。\n* [DEAP](https:\u002F\u002Fgithub.com\u002FDEAP\u002Fdeap) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FDEAP\u002Fdeap.svg?cacheSeconds=86400) - 一种新颖的进化计算框架，用于快速原型设计和测试想法。它旨在使算法显式化并使数据结构透明化。它与 multiprocessing（多进程）和 SCOOP 等并行化机制完美协作。\n* [DeepEP](https:\u002F\u002Fgithub.com\u002Fdeepseek-ai\u002FDeepEP) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdeepseek-ai\u002FDeepEP.svg?cacheSeconds=86400) - DeepEP 是专为 Mixture-of-Experts（MoE，专家混合模型）和 expert parallelism（EP，专家并行）定制的通信库。它提供高吞吐量和低延迟的 all-to-all GPU 内核，也称为 MoE dispatch（分发）和 combine（合并）。该库还支持低精度操作，包括 FP8。\n* [DGL](https:\u002F\u002Fgithub.com\u002Fdmlc\u002Fdgl) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdmlc\u002Fdgl.svg?cacheSeconds=86400) - DGL 是一个易于使用、高性能且可扩展的 Python 包，用于图上的深度学习。\n* [DLRover](https:\u002F\u002Fgithub.com\u002Fintelligent-machine-learning\u002Fdlrover) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fintelligent-machine-learning\u002Fdlrover.svg?cacheSeconds=86400) - DLRover 使大型 AI 模型的分布式训练变得简单、稳定、快速和绿色。\n* [Dask](https:\u002F\u002Fgithub.com\u002Fdask\u002Fdask) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdask\u002Fdask.svg?cacheSeconds=86400) - Pandas 和 NumPy 计算的分布式并行处理框架。\n* [DeepSpeed](https:\u002F\u002Fgithub.com\u002Fdeepspeedai\u002FDeepSpeed) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdeepspeedai\u002FDeepSpeed.svg?cacheSeconds=86400) - DeepSpeed 是一个深度学习优化库，使分布式训练和推理变得简单、高效和有效。\n* [FlagGems](https:\u002F\u002Fgithub.com\u002FFlagOpen\u002FFlagGems) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FFlagOpen\u002FFlagGems.svg?cacheSeconds=86400) - FlagGems 是用 OpenAI Triton 实现的高性能通用算子库。它建立在一系列后端中立内核之上，旨在加速跨不同硬件平台的 LLM（大型语言模型）训练和推理。\n* [Flashlight](https:\u002F\u002Fgithub.com\u002Fflashlight\u002Fflashlight) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fflashlight\u002Fflashlight.svg?cacheSeconds=86400) - 一个完全用 C++ 编写的快速、灵活的机器学习库，来自 Facebook AI Research 以及 Torch、TensorFlow、Eigen 和 Deep Speech 的创作者。\n* [Flax](https:\u002F\u002Fgithub.com\u002Fgoogle\u002Fflax) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle\u002Fflax.svg?cacheSeconds=86400) - 为灵活性而设计的 JAX 神经网络库和生态系统。\n* [GPUStack](https:\u002F\u002Fgithub.com\u002Fgpustack\u002Fgpustack) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgpustack\u002Fgpustack.svg?cacheSeconds=86400) - GPUStack 是一个用于运行 AI 模型的开源 GPU 集群管理器。\n* [Hivemind](https:\u002F\u002Fgithub.com\u002Flearning-at-home\u002Fhivemind) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flearning-at-home\u002Fhivemind.svg?cacheSeconds=86400) - PyTorch 中的去中心化深度学习。\n* [Horovod](https:\u002F\u002Fgithub.com\u002Fhorovod\u002Fhorovod) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhorovod\u002Fhorovod.svg?cacheSeconds=86400) - Uber 的分布式训练框架，适用于 TensorFlow、Keras 和 PyTorch。\n* [Jax](https:\u002F\u002Fgithub.com\u002Fjax-ml\u002Fjax) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjax-ml\u002Fjax.svg?cacheSeconds=86400) - Python+NumPy 程序的组合转换：微分、向量化、JIT 到 GPU\u002FTPU 等。\n* [Kompute](https:\u002F\u002Fgithub.com\u002Flava-nc\u002Flava) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flava-nc\u002Flava.svg?cacheSeconds=86400) - 极速、轻量且支持移动端的 Vulkan 计算框架，针对高级 GPU 数据处理用例进行了优化。\n* [Lava](https:\u002F\u002Fgithub.com\u002FKomputeProject\u002Fkompute) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FKomputeProject\u002Fkompute.svg?cacheSeconds=86400) - Lava 是一个开源框架，用于开发适用于神经形态硬件架构的应用程序。\n* [Liger Kernel](https:\u002F\u002Fgithub.com\u002Flinkedin\u002FLiger-Kernel) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flinkedin\u002FLiger-Kernel.svg?cacheSeconds=86400) - Liger Kernel 是一组专门为 LLM 训练设计的 Triton 内核。\n* [LightGBM](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FLightGBM) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmicrosoft\u002FLightGBM.svg?cacheSeconds=86400) - LightGBM 是一种使用基于树的算法的梯度提升框架。\n* [MLX](https:\u002F\u002Fgithub.com\u002Fml-explore\u002Fmlx) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fml-explore\u002Fmlx.svg?cacheSeconds=86400) - MLX 是用于 Apple Silicon 机器学习的数组框架。\n* [Modin](https:\u002F\u002Fgithub.com\u002Fmodin-project\u002Fmodin) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmodin-project\u002Fmodin.svg?cacheSeconds=86400) - 通过更改一行代码来加速您的 Pandas 工作流。\n* [NVIDIA TensorRT](https:\u002F\u002Fgithub.com\u002FNVIDIA\u002FTensorRT) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FNVIDIA\u002FTensorRT.svg?cacheSeconds=86400) - TensorRT 是一个用于 NVIDIA GPU 和深度学习加速器上进行高性能推理的 C++ 库。\n* [Nevergrad](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fnevergrad) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffacebookresearch\u002Fnevergrad.svg?cacheSeconds=86400) - Nevergrad 是一个无梯度优化平台。\n* [Norse](https:\u002F\u002Fgithub.com\u002Fnorse\u002Fnorse) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fnorse\u002Fnorse.svg?cacheSeconds=86400) - Norse 旨在利用生物启发式神经组件的优势，它们是稀疏的和事件驱动的——这与人工神经网络有根本区别。\n* [Numba](https:\u002F\u002Fgithub.com\u002Fnumba\u002Fnumba) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fnumba\u002Fnumba.svg?cacheSeconds=86400) - 用于 Python 数组和数值函数的编译器。\n* [Optimum](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Foptimum) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Foptimum.svg?cacheSeconds=86400) - Optimum 是 Transformers 和 Diffusers 的扩展，提供一组优化工具，使在目标硬件上训练和运行模型达到最大效率，同时保持易用性。\n* [PEFT](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fpeft) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Fpeft.svg?cacheSeconds=86400) - Parameter-Efficient Fine-Tuning（PEFT，参数高效微调）方法使预训练语言模型（PLM，预训练语言模型）能够高效适应各种下游应用，而无需微调模型的所有参数。\n* [PaddlePaddle](https:\u002F\u002Fgithub.com\u002FPaddlePaddle\u002FPaddle) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FPaddlePaddle\u002FPaddle.svg?cacheSeconds=86400) - PaddlePaddle 是一个用于跨数百个节点分布的数据源进行大规模深度网络训练的框架。\n* [PyG](https:\u002F\u002Fgithub.com\u002Fpyg-team\u002Fpytorch_geometric) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpyg-team\u002Fpytorch_geometric.svg?cacheSeconds=86400) - PyG（PyTorch Geometric）是一个基于 PyTorch 的库，用于轻松编写和训练图神经网络（GNN，图神经网络），适用于广泛的与结构化数据相关的应用。\n* [PyTorch Lightning](https:\u002F\u002Fgithub.com\u002FLightning-AI\u002Fpytorch-lightning) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FLightning-AI\u002Fpytorch-lightning.svg?cacheSeconds=86400) - PyTorch Lightning 可在多个 GPU、TPU 上预训练、微调并部署 AI 模型，无需更改代码。\n* [PyTorch](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Fpytorch) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpytorch\u002Fpytorch.svg?cacheSeconds=86400) - PyTorch 是一个用于开发和训练基于神经网络的深度学习模型的库。\n* [Ray](https:\u002F\u002Fgithub.com\u002Fray-project\u002Fray) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fray-project\u002Fray.svg?cacheSeconds=86400) - Ray 是一个灵活、高性能的机器学习分布式执行框架。\n* [SetFit](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fsetfit) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Fsetfit.svg?cacheSeconds=86400) - SetFit 是一个高效且无需提示的 Sentence Transformers 少样本微调框架。\n* [Sonnet](https:\u002F\u002Fgithub.com\u002Fgoogle-deepmind\u002Fsonnet) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle-deepmind\u002Fsonnet.svg?cacheSeconds=86400) - Sonnet 是一个基于 TensorFlow 2 构建的库，旨在为机器学习研究提供简单、可组合的抽象。\n* [Streaming](https:\u002F\u002Fgithub.com\u002Fmosaicml\u002Fstreaming) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmosaicml\u002Fstreaming.svg?cacheSeconds=86400) - 用于高效神经网络训练的数据流式传输库。\n* [TensorFlow](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Ftensorflow) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftensorflow\u002Ftensorflow.svg?cacheSeconds=86400) - TensorFlow 是一个领先的库，旨在开发和部署最先进的机器学习应用程序。\n* [ThunderKittens](https:\u002F\u002Fgithub.com\u002FHazyResearch\u002FThunderKittens) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FHazyResearch\u002FThunderKittens.svg?cacheSeconds=86400) ThunderKittens 是一个框架，使在 CUDA 中编写快速深度学习内核变得容易。\n* [TorchOpt](https:\u002F\u002Fgithub.com\u002Fmetaopt\u002Ftorchopt) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmetaopt\u002Ftorchopt.svg?cacheSeconds=86400) - TorchOpt 是一个基于 PyTorch 的高效可微优化库。\n* [Triton](https:\u002F\u002Fgithub.com\u002Ftriton-lang\u002Ftriton) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftriton-lang\u002Ftriton.svg?cacheSeconds=86400) - Triton 是一种语言和编译器，用于编写高效的自定义深度学习原语。Triton 的目标是提供一个开源环境，以比 CUDA 更高的生产力编写快速代码，同时也比其他现有的 DSL（领域特定语言）具有更高的灵活性。\n* [Vaex](https:\u002F\u002Fgithub.com\u002Fvaexio\u002Fvaex) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvaexio\u002Fvaex.svg?cacheSeconds=86400) Vaex 是一个高性能 Python 库，用于惰性 Out-of-Core（外存）DataFrames（类似于 Pandas），用于可视化和探索大型表格数据集。Vaex 使用内存映射、零内存拷贝策略和惰性计算以获得最佳性能（不浪费内存）。\n* [Vowpal Wabbit](https:\u002F\u002Fgithub.com\u002FVowpalWabbit\u002Fvowpal_wabbit) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FVowpalWabbit\u002Fvowpal_wabbit.svg?cacheSeconds=86400) Vowpal Wabbit 是一个机器学习系统，通过在线、哈希、allreduce、归约、learning2search、主动和交互式学习等技术推动机器学习的前沿。\n* [XGBoost](https:\u002F\u002Fgithub.com\u002Fdmlc\u002Fxgboost) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdmlc\u002Fxgboost.svg?cacheSeconds=86400) - XGBoost 是一个优化的分布式梯度提升库，设计为高效、灵活和便携。\n* [YDF](https:\u002F\u002Fgithub.com\u002Fgoogle\u002Fyggdrasil-decision-forests) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle\u002Fyggdrasil-decision-forests.svg?cacheSeconds=86400) - YDF（Yggdrasil Decision Forests）是一个用于训练、评估、解释和部署随机森林、梯度提升决策树、CART（分类与回归树）和孤立森林模型的库。\n* [bitsandbytes](https:\u002F\u002Fgithub.com\u002Fbitsandbytes-foundation\u002Fbitsandbytes) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fbitsandbytes-foundation\u002Fbitsandbytes.svg?cacheSeconds=86400) - Bitsandbytes 库是一个围绕 CUDA 自定义函数的轻量级 Python 封装，特别是 8 位优化器、矩阵乘法（LLM.int8()）和 8 & 4 位量化函数。\n* [einops](https:\u002F\u002Fgithub.com\u002Farogozhnikov\u002Feinops) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Farogozhnikov\u002Feinops.svg?cacheSeconds=86400) - 用于可读和可靠代码的灵活强大的张量操作。\n* [scikit-learn](https:\u002F\u002Fgithub.com\u002Fscikit-learn\u002Fscikit-learn) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fscikit-learn\u002Fscikit-learn.svg?cacheSeconds=86400) - Scikit-learn 是一个功能强大的机器学习库，提供广泛的数据访问、数据准备和统计模型构建模块。\n* [snnTorch](https:\u002F\u002Fgithub.com\u002Fjeshraghian\u002Fsnntorch) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjeshraghian\u002Fsnntorch.svg?cacheSeconds=86400) - snnTorch 是一个带有脉冲神经网络的深度和在线学习库。\n* [torchdistill](https:\u002F\u002Fgithub.com\u002Fyoshitomo-matsubara\u002Ftorchdistill) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fyoshitomo-matsubara\u002Ftorchdistill.svg?cacheSeconds=86400) - torchdistill 提供各种最先进的知识蒸馏方法，并允许您只需编辑声明式 yaml 配置文件而非 Python 代码即可设计（新的）实验。\n* [torchkeras](https:\u002F\u002Fgithub.com\u002Flyhue1991\u002Ftorchkeras?tab=readme-ov-file) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flyhue1991\u002Ftorchkeras?tab=readme-ov-file.svg?cacheSeconds=86400) torchkeras 库是一个简单的工具，用于以 Keras 风格在 PyTorch 中训练神经网络。\n* [veScale](https:\u002F\u002Fgithub.com\u002Fvolcengine\u002FveScale) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvolcengine\u002FveScale.svg?cacheSeconds=86400) - veScale 是一个 PyTorch 原生的 LLM 训练框架。\n* [yellowbrick](https:\u002F\u002Fgithub.com\u002FDistrictDataLabs\u002Fyellowbrick) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FDistrictDataLabs\u002Fyellowbrick.svg?cacheSeconds=86400) - yellowbrick 是一个基于 matplotlib 的 scikit-learn 和其他机器学习库的模型评估图表。\n\n## 数据标注与合成\n* [Argilla](https:\u002F\u002Fgithub.com\u002Fargilla-io\u002Fargilla) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fargilla-io\u002Fargilla.svg?cacheSeconds=86400) - Argilla 帮助领域专家和数据团队在更短的时间内构建更好的 NLP（自然语言处理）数据集。\n* [cleanlab](https:\u002F\u002Fgithub.com\u002Fcleanlab\u002Fcleanlab) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcleanlab\u002Fcleanlab.svg?cacheSeconds=86400) - 面向数据驱动的 AI（人工智能）的 Python 库。可自动：查找错误标记的数据、检测异常值、评估多标注者数据集的一致性和标注者质量，并建议下一步最适合（重新）标记的数据。\n* [COCO Annotator](https:\u002F\u002Fgithub.com\u002Fjsbroks\u002Fcoco-annotator) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjsbroks\u002Fcoco-annotator.svg?cacheSeconds=86400) - 基于 Web 的图像分割工具，用于目标检测、定位和关键点识别\n* [CVAT](https:\u002F\u002Fgithub.com\u002Fcvat-ai\u002Fcvat) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcvat-ai\u002Fcvat.svg?cacheSeconds=86400) - CVAT（计算机视觉标注工具）是 OpenCV 的基于 Web 的标注工具，适用于计算机算法的视频和图像。\n* [Doccano](https:\u002F\u002Fgithub.com\u002Fdoccano\u002Fdoccano) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdoccano\u002Fdoccano.svg?cacheSeconds=86400) - 供人类使用的开源文本标注工具，提供情感分析、命名实体识别和机器翻译功能。\n* [Gretel Synthetics](https:\u002F\u002Fgithub.com\u002Fgretelai\u002Fgretel-synthetics) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgretelai\u002Fgretel-synthetics.svg?cacheSeconds=86400) - Gretel Synthetics 是一个结构化和非结构化文本的合成数据生成器，具有差分隐私学习特性。\n* [Label Studio](https:\u002F\u002Fgithub.com\u002FHumanSignal\u002Flabel-studio) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FHumanSignal\u002Flabel-studio.svg?cacheSeconds=86400) - 支持多领域的标准化输出格式数据标注工具。\n* [NeMo Curator](https:\u002F\u002Fgithub.com\u002FNVIDIA\u002FNeMo-Curator) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FNVIDIA\u002FNeMo-Curator.svg?cacheSeconds=86400) - NeMo Curator 是一个 GPU（图形处理器）加速框架，用于高效的大语言模型（LLM）数据策展。\n* [refinery](https:\u002F\u002Fgithub.com\u002Fcode-kern-ai\u002Frefinery) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcode-kern-ai\u002Frefinery.svg?cacheSeconds=86400) - 数据科学家扩展、评估和维护自然语言数据的开源选择。\n* [SDV](https:\u002F\u002Fgithub.com\u002Fsdv-dev\u002FSDV) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fsdv-dev\u002FSDV.svg?cacheSeconds=86400) - 合成数据仓库（Synthetic Data Vault，SDV）是一个合成数据生成库生态系统，允许用户轻松学习单表、多表和时间序列数据集，以便随后生成具有与原始数据集相同格式和统计特性的新合成数据。\n* [Semantic Segmentation Editor](https:\u002F\u002Fgithub.com\u002FHitachi-Automotive-And-Industry-Lab\u002Fsemantic-segmentation-editor) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FHitachi-Automotive-And-Industry-Lab\u002Fsemantic-segmentation-editor.svg?cacheSeconds=86400) - 日立公司用于标注相机和 LIDAR（激光雷达）数据的开源工具。\n* [synthcity](https:\u002F\u002Fgithub.com\u002Fvanderschaarlab\u002Fsynthcity) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvanderschaarlab\u002Fsynthcity.svg?cacheSeconds=86400) - synthcity 是一个用于生成和评估合成表格数据的库。\n* [TabGAN](https:\u002F\u002Fgithub.com\u002FDiyago\u002FTabular-data-generation) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FDiyago\u002FTabular-data-generation.svg?cacheSeconds=86400) - 使用 GANs（生成对抗网络）(CTGAN)、扩散模型和 LLMs（大语言模型）进行合成表格数据生成，具备对抗过滤、隐私指标和 sklearn 集成。\n* [ViPE](https:\u002F\u002Fgithub.com\u002Fnv-tlabs\u002Fvipe) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fnv-tlabs\u002Fvipe.svg?cacheSeconds=86400) - ViPE 是一种空间 AI 工具，用于从原始视频中标注相机姿态和密集深度图。\n* [YData Synthetic](https:\u002F\u002Fgithub.com\u002Fydataai\u002Fydata-synthetic) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fydataai\u002Fydata-synthetic.svg?cacheSeconds=86400) - YData Synthetic 是一个利用最先进生成模型来生成合成表格和时间序列数据的包。\n\n## 数据流水线\n* [Apache Airflow](https:\u002F\u002Fgithub.com\u002Fapache\u002Fairflow) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fairflow.svg?cacheSeconds=86400) - 基于 Python 构建的数据流水线框架，包括调度器、DAG（有向无环图）定义和用于可视化的 UI。\n* [Apache Nifi](https:\u002F\u002Fgithub.com\u002Fapache\u002Fnifi) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fnifi.svg?cacheSeconds=86400) - Apache NiFi 专为数据流而设计。它支持高度可配置的数据路由、转换和系统中介逻辑的有向图。\n* [Apache Oozie](https:\u002F\u002Fgithub.com\u002Fapache\u002Foozie) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Foozie.svg?cacheSeconds=86400) - Hadoop 作业的工作流调度器。\n* [Argo Workflows](https:\u002F\u002Fgithub.com\u002Fargoproj\u002Fargo-workflows) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fargoproj\u002Fargo-workflows.svg?cacheSeconds=86400) - 用于在 Kubernetes 上编排并行作业的开源容器原生工作流引擎。Argo Workflows 实现为 Kubernetes CRD（自定义资源定义）。\n* [Couler](https:\u002F\u002Fgithub.com\u002Fcouler-proj\u002Fcouler) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcouler-proj\u002Fcouler.svg?cacheSeconds=86400) - 在不同工作流引擎（如 Argo Workflows、Tekton Pipelines 和 Apache Airflow）上构建和管理机器学习工作流的统一接口。\n* [DataTrove](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fdatatrove) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Fdatatrove.svg?cacheSeconds=86400) - DataTrove 是一个用于在超大规模下处理、过滤和去重文本数据的库。\n* [Dagster](https:\u002F\u002Fgithub.com\u002Fdagster-io\u002Fdagster) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdagster-io\u002Fdagster.svg?cacheSeconds=86400) - 面向机器学习、分析和 ETL（提取、转换、加载）的数据编排工具。\n* [DBT](https:\u002F\u002Fgithub.com\u002Fdbt-labs\u002Fdbt-core) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdbt-labs\u002Fdbt-core.svg?cacheSeconds=86400) - 用于在数据仓库内运行转换的 ETL 工具。\n* [Flyte](https:\u002F\u002Fgithub.com\u002Fflyteorg\u002Fflyte) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fflyteorg\u002Fflyte.svg?cacheSeconds=86400) - Lyft 的云原生机器学习和数据处理平台 - [(演示)](https:\u002F\u002Fyoutu.be\u002FKdUJGSP1h9U?t=1451)。\n* [Genie](https:\u002F\u002Fgithub.com\u002FNetflix\u002Fgenie) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FNetflix\u002Fgenie.svg?cacheSeconds=86400) - 作业编排引擎，用于接口并触发基于 Hadoop 系统的作业执行。\n* [Hamilton](https:\u002F\u002Fgithub.com\u002Fdagworks-inc\u002Fhamilton) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdagworks-inc\u002Fhamilton.svg?cacheSeconds=86400) - 用于定义数据流的微编排框架。在任何 Python 运行的地方运行（例如 jupyter, fastAPI, spark, ray, dask）。在不被察觉的情况下引入软件工程最佳实践。用它来定义特征工程转换、端到端模型流水线以及 LLM（大型语言模型）工作流。它补充了宏观编排系统（例如 kedro, luigi, airflow, dbt 等），因为它替换了这些宏任务内的代码。自带可自托管的 UI，捕获血缘与溯源、执行遥测和数据摘要，并构建自填充目录；既可用于开发也可用于生产。\n* [Instill VDP](https:\u002F\u002Fgithub.com\u002Finstill-ai\u002Finstill-core) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Finstill-ai\u002Finstill-core.svg?cacheSeconds=86400) - Instill VDP（多功能数据流水线）旨在简化从开始到完成的数据处理流水线。\n* [Instructor](https:\u002F\u002Fgithub.com\u002Finstructor-ai\u002Finstructor) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Finstructor-ai\u002Finstructor.svg?cacheSeconds=86400) - Instructor 让从 GPT-3.5、GPT-4、GPT-4-Vision 和开源模型等 LLM 获取结构化数据（如 JSON）变得简单。\n* [Kedro](https:\u002F\u002Fgithub.com\u002Fkedro-org\u002Fkedro) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fkedro-org\u002Fkedro.svg?cacheSeconds=86400) - Kedro 是一种工作流开发工具，帮助您构建稳健、可扩展、可部署、可复现且版本化的数据流水线。\n* [Luigi](https:\u002F\u002Fgithub.com\u002Fspotify\u002Fluigi) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fspotify\u002Fluigi.svg?cacheSeconds=86400) - Luigi 是一个 Python 模块，帮助您构建复杂的批处理作业流水线，处理依赖解析、工作流管理、可视化等。\n* [Metaflow](https:\u002F\u002Fgithub.com\u002FNetflix\u002Fmetaflow) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FNetflix\u002Fmetaflow.svg?cacheSeconds=86400) - 一个供数据科学家轻松构建和管理现实世界数据科学项目的框架。\n* [Pachyderm](https:\u002F\u002Fgithub.com\u002Fpachyderm\u002Fpachyderm) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpachyderm\u002Fpachyderm.svg?cacheSeconds=86400) - 基于 Kubernetes 构建的开源分布式处理框架，主要专注于动态构建生产级机器学习流水线 - [(视频)](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=LamKVhe2RSM)。\n* [Ploomber](https:\u002F\u002Fgithub.com\u002Fploomber\u002Fploomber) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fploomber\u002Fploomber.svg?cacheSeconds=86400) - 构建数据流水线的最快方式。迭代开发，随处部署。\n* [Pixeltable](https:\u002F\u002Fgithub.com\u002Fpixeltable\u002Fpixeltable) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpixeltable\u002Fpixeltable.svg?cacheSeconds=86400) – 提供声明式、增量数据基础设施的开源 Python 库，用于构建和管理多模态 AI 工作负载。\n* [Prefect Core](https:\u002F\u002Fgithub.com\u002FPrefectHQ\u002Fprefect) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FPrefectHQ\u002Fprefect.svg?cacheSeconds=86400) - 工作流管理系统，使您能够轻松地将重试、日志记录、动态映射、缓存、失败通知等功能添加到数据流水线中。\n* [SeqIO](https:\u002F\u002Fgithub.com\u002Fgoogle\u002Fseqio) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle\u002Fseqio.svg?cacheSeconds=86400) - SeqIO 是一个用于处理顺序数据以供下游序列模型使用的库。\n* [Snakemake](https:\u002F\u002Fgithub.com\u002Fsnakemake\u002Fsnakemake) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fsnakemake\u002Fsnakemake.svg?cacheSeconds=86400) - 用于可复现和可扩展数据分析的工作流管理系统。\n* [Towhee](https:\u002F\u002Fgithub.com\u002Ftowhee-io\u002Ftowhee) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftowhee-io\u002Ftowhee.svg?cacheSeconds=86400) - 使用一个或多个 ML 模型生成嵌入向量的通用机器学习流水线。\n* [unstructured](https:\u002F\u002Fgithub.com\u002FUnstructured-IO\u002Funstructured) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FUnstructured-IO\u002Funstructured.svg?cacheSeconds=86400) - unstructured 简化和优化 LLM 的数据处理工作流，摄入和预处理图像和文本文档，如 PDF、HTML、Word 文档等。 \n* [ZenML](https:\u002F\u002Fgithub.com\u002Fzenml-io\u002Fzenml) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fzenml-io\u002Fzenml.svg?cacheSeconds=86400) - ZenML 是一个可扩展的开源 MLOps（机器学习运维）框架，用于创建可复现的 ML 流水线，重点关注自动化元数据跟踪、缓存以及对其他工具的许多集成。\n\n## 数据科学笔记本\n* [Apache Zeppelin](https:\u002F\u002Fgithub.com\u002Fapache\u002Fzeppelin) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fzeppelin.svg?cacheSeconds=86400) - 基于 Web 的笔记本，支持使用 SQL、Scala 等进行数据驱动、交互式数据分析及协作文档。\n* [Deepnote](https:\u002F\u002Fgithub.com\u002Fdeepnote\u002Fdeepnote) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdeepnote\u002Fdeepnote.svg?cacheSeconds=86400) - Deepnote 是 Jupyter 的直接替代品，采用以 AI 为首要的设计，拥有流畅的用户界面 (UI)、新的代码块以及原生数据集成。在您喜欢的集成开发环境 (IDE) 中本地使用 Python、R 和 SQL，然后扩展到 Deepnote 云端进行实时协作、使用 Deepnote 智能体 (Agent) 以及部署可运行的数据应用。\n* [Jupyter Notebooks](https:\u002F\u002Fgithub.com\u002Fjupyter\u002Fnotebook) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjupyter\u002Fnotebook.svg?cacheSeconds=86400) - 用于可重复开发的 Web 界面 Python 沙箱 (Sandbox) 环境\n* [Marimo](https:\u002F\u002Fgithub.com\u002Fmarimo-team\u002Fmarimo) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmarimo-team\u002Fmarimo.svg?cacheSeconds=86400) - 响应式 Python 笔记本——运行可复现的实验，作为脚本执行，作为应用部署，并使用 Git 进行版本控制。\n* [Papermill](https:\u002F\u002Fgithub.com\u002Fnteract\u002Fpapermill) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fnteract\u002Fpapermill.svg?cacheSeconds=86400) - Papermill 是一个用于对笔记本进行参数化并像 Python 脚本一样执行它们的库。\n* [Polynote](https:\u002F\u002Fgithub.com\u002Fpolynote\u002Fpolynote) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpolynote\u002Fpolynote.svg?cacheSeconds=86400) - Polynote 是一个实验性的多语言 (Polyglot) 笔记本环境。目前，它支持 Scala 和 Python（带或不带 Spark）、SQL 和 Vega。\n* [RMarkdown](https:\u002F\u002Fgithub.com\u002Frstudio\u002Frmarkdown) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Frstudio\u002Frmarkdown.svg?cacheSeconds=86400) - rmarkdown 包是基于 Pandoc 的下一代 R Markdown 实现。\n* [Stencila](https:\u002F\u002Fgithub.com\u002Fstencila\u002Fstencila) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fstencila\u002Fstencila.svg?cacheSeconds=86400) - Stencila 是一个用于创建、协作和共享数据驱动内容的平台。内容透明且可复现。\n* [Voilà](https:\u002F\u002Fgithub.com\u002Fvoila-dashboards\u002Fvoila) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvoila-dashboards\u002Fvoila.svg?cacheSeconds=86400) - Voilà 将 Jupyter 笔记本转换为独立的 Web 应用程序，例如可用于仪表板 (Dashboard)。\n* [.NET Interactive](https:\u002F\u002Fgithub.com\u002Fdotnet\u002Finteractive) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdotnet\u002Finteractive.svg?cacheSeconds=86400) - .NET Interactive 利用 .NET 的强大功能，并将其嵌入到您的交互体验中。\n\n## 数据存储优化\n* [AIStore](https:\u002F\u002Fgithub.com\u002FNVIDIA\u002Faistore) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FNVIDIA\u002Faistore.svg?cacheSeconds=86400) - AIStore 是一个轻量级的对象存储系统，具备随每个新增存储节点线性扩展的能力，并特别专注于 PB 级深度学习。\n* [Alluxio](https:\u002F\u002Fgithub.com\u002FAlluxio\u002Falluxio) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FAlluxio\u002Falluxio.svg?cacheSeconds=86400) - 一个虚拟分布式存储系统，连接计算框架与存储系统之间的桥梁。\n* [Apache Arrow](https:\u002F\u002Fgithub.com\u002Fapache\u002Farrow) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Farrow.svg?cacheSeconds=86400) - 一种内存中的列式数据表示格式，兼容 Pandas、基于 Hadoop 的系统等。\n* [Apache Druid](https:\u002F\u002Fgithub.com\u002Fapache\u002Fdruid) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fdruid.svg?cacheSeconds=86400) - 一款高性能实时分析数据库。查看此 [文章](https:\u002F\u002Ftowardsdatascience.com\u002Fintroduction-to-druid-4bf285b92b5a) 了解介绍。\n* [Apache Hudi](https:\u002F\u002Fgithub.com\u002Fapache\u002Fhudi) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fhudi.svg?cacheSeconds=86400) - Hudi 是一个事务性 (ACID) 数据湖平台，将核心仓库和数据库功能直接带入数据湖。Hudi 非常适合流式工作负载，也允许创建高效的增量批处理管道。支持流行的查询引擎，包括 Spark、Flink、Presto、Trino、Hive 等。更多信息请[点击这里](https:\u002F\u002Fhudi.apache.org\u002F)。\n* [Apache Iceberg](https:\u002F\u002Fgithub.com\u002Fapache\u002Ficeberg) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Ficeberg.svg?cacheSeconds=86400) - Iceberg 是一种符合 ACID 规范的高性能格式，专为超大规模分析表（包含数十 PB 数据）构建，它将 SQL 表的可靠性和简洁性带入大数据领域，同时使得 Spark、Trino、Flink、Presto、Hive 和 Impala 等引擎能够安全地同时操作相同的表。更多信息请[点击这里](https:\u002F\u002Ficeberg.apache.org\u002F)。\n* [Apache Ignite](https:\u002F\u002Fgithub.com\u002Fapache\u002Fignite) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fignite.svg?cacheSeconds=86400) - 一个以内存为中心的分布式数据库、缓存和处理平台，适用于事务性、分析和流式工作负载，在 PB 级规模下提供内存速度 - [演示](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=Xt4PWQ__YPw)。\n* [Apache Parquet](https:\u002F\u002Fgithub.com\u002Fapache\u002Fparquet-java) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fparquet-java.svg?cacheSeconds=86400) - 一种磁盘上的列式数据表示格式，兼容 Pandas、基于 Hadoop 的系统等。\n* [Apache Pinot](https:\u002F\u002Fgithub.com\u002Fapache\u002Fpinot) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fpinot.svg?cacheSeconds=86400) - 一个实时分布式 OLAP (联机分析处理) 数据存储。关于大数据开源 OLAP 系统的比较：ClickHouse、Druid 和 Pinot 可在 [此处](https:\u002F\u002Fmedium.com\u002F@leventov\u002Fcomparison-of-the-open-source-olap-systems-for-big-data-clickhouse-druid-and-pinot-8e042a5ed1c7) 找到。\n* [Casibase](https:\u002F\u002Fgithub.com\u002Fcasibase\u002Fcasibase) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcasibase\u002Fcasibase.svg?cacheSeconds=86400) - Casibase 是一个类似 LangChain 的 RAG (检索增强生成) 知识库，带有 Web UI 和企业级单点登录 (SSO)。\n* [Chroma](https:\u002F\u002Fgithub.com\u002Fchroma-core\u002Fchroma) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fchroma-core\u002Fchroma.svg?cacheSeconds=86400) - Chroma 是一个开源的嵌入 (Embedding) 数据库。\n* [ClickHouse](https:\u002F\u002Fgithub.com\u002FClickHouse\u002FClickHouse) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FClickHouse\u002FClickHouse.svg?cacheSeconds=86400) - ClickHouse 是一个开源的列式数据库管理系统。\n* [Delta Lake](https:\u002F\u002Fgithub.com\u002Fdelta-io\u002Fdelta) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdelta-io\u002Fdelta.svg?cacheSeconds=86400) - Delta Lake 是一个存储层，为 Apache Spark 和其他大数据引擎带来可扩展的 ACID 事务。\n* [EdgeDB](https:\u002F\u002Fgithub.com\u002Fgeldata\u002Fgel) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgeldata\u002Fgel.svg?cacheSeconds=86400) - Gel 通过现代数据模型、图查询、身份验证与 AI 解决方案等，增强了 Postgres 的功能。\n* [GPTCache](https:\u002F\u002Fgithub.com\u002Fzilliztech\u002FGPTCache) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fzilliztech\u002FGPTCache.svg?cacheSeconds=86400) - GPTCache 是一个用于为大语言模型查询创建语义缓存的库。\n* [InfluxDB](https:\u002F\u002Fgithub.com\u002Finfluxdata\u002Finfluxdb) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Finfluxdata\u002Finfluxdb.svg?cacheSeconds=86400) 用于指标、事件和实时分析的可扩展数据存储。\n* [Milvus](https:\u002F\u002Fgithub.com\u002Fmilvus-io\u002Fmilvus) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmilvus-io\u002Fmilvus.svg?cacheSeconds=86400) Milvus 是一个云原生、开源的向量数据库，旨在管理由机器学习和神经网络生成的嵌入向量。\n* [Marqo](https:\u002F\u002Fgithub.com\u002Fmarqo-ai\u002Fmarqo) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmarqo-ai\u002Fmarqo.svg?cacheSeconds=86400) Marqo 是一个端到端的向量搜索引擎。\n* [pgvector](https:\u002F\u002Fgithub.com\u002Fpgvector\u002Fpgvector) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpgvector\u002Fpgvector.svg?cacheSeconds=86400) pgvector 帮助 Postgres 进行向量相似度搜索。\n* [PostgresML](https:\u002F\u002Fgithub.com\u002Fpostgresml\u002Fpostgresml) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpostgresml\u002Fpostgresml.svg?cacheSeconds=86400) PostgresML 是 PostgreSQL 的一个机器学习扩展，允许您使用 SQL 查询对文本和表格数据进行训练和推理。\n* [Redis](https:\u002F\u002Fgithub.com\u002Fredis\u002Fredis) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fredis\u002Fredis.svg?cacheSeconds=86400) Redis 是一个开源的内存数据存储，支持向量相似度搜索，使其适用于 AI\u002FML 应用，如语义搜索和推荐系统。\n* [Safetensors](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fsafetensors) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Fsafetensors.svg?cacheSeconds=86400) 一种简单、安全的方式来存储和分发张量 (Tensor)。\n* [TimescaleDB](https:\u002F\u002Fgithub.com\u002Ftimescale\u002Ftimescaledb) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftimescale\u002Ftimescaledb.svg?cacheSeconds=86400) 一个开源的时间序列 (Time-series) SQL 数据库，针对快速摄入和复杂查询进行了优化，作为 PostgreSQL 扩展包发布 - [(视频)](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=zbjub8BQPyE)。\n* [Weaviate](https:\u002F\u002Fgithub.com\u002Fweaviate\u002Fweaviate) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fweaviate\u002Fweaviate.svg?cacheSeconds=86400) - 一个低延迟的向量搜索引擎（支持 GraphQL、RESTful），开箱即用支持不同的媒体类型。模块包括语义搜索、问答、分类、可定制模型（PyTorch\u002FTensorFlow\u002FKeras）等。\n* [Zarr](https:\u002F\u002Fgithub.com\u002Fzarr-developers\u002Fzarr-python) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fzarr-developers\u002Fzarr-python.svg?cacheSeconds=86400) - 专为并行计算设计的分块、压缩、N 维数组的 Python 实现。\n\n## 数据流处理\n* [Apache Beam](https:\u002F\u002Fgithub.com\u002Fapache\u002Fbeam) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fbeam.svg?cacheSeconds=86400) Apache Beam 是一个用于批处理和流处理的统一编程模型。\n* [Apache Flink](https:\u002F\u002Fgithub.com\u002Fapache\u002Fflink) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fflink.svg?cacheSeconds=86400) - 开源流处理框架，具有强大的流处理和批处理能力。\n* [Apache Kafka](https:\u002F\u002Fgithub.com\u002Fapache\u002Fkafka) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fkafka.svg?cacheSeconds=86400) - Kafka 客户端库，用于构建输入和输出存储在 Kafka 集群中的应用和微服务。\n* [Apache Samza](https:\u002F\u002Fgithub.com\u002Fapache\u002Fsamza) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fsamza.svg?cacheSeconds=86400) - 分布式流处理框架。它使用 Apache Kafka 进行消息传递，并使用 Apache Hadoop YARN 提供容错、处理器隔离、安全和资源管理。\n* [Apache Spark](https:\u002F\u002Fgithub.com\u002Fapache\u002Fspark) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fspark.svg?cacheSeconds=86400) - 使用 Apache Spark 框架作为后端支持有状态精确一次语义的流式微批处理。\n* [Bytewax](https:\u002F\u002Fgithub.com\u002Fbytewax\u002Fbytewax) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fbytewax\u002Fbytewax.svg?cacheSeconds=86400) - 基于 Rust 引擎构建的灵活以 Python 为中心的有状态流处理框架。\n* [FastStream](https:\u002F\u002Fgithub.com\u002Fairtai\u002Ffaststream) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fairtai\u002Ffaststream.svg?cacheSeconds=86400) - 一个现代的中间件无关流式 Python 框架，支持 Apache Kafka、RabbitMQ 和 NATS 协议，受 FastAPI 启发，易于与其他 Web 框架集成。\n* [MOA](https:\u002F\u002Fgithub.com\u002FWaikato\u002Fmoa) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FWaikato\u002Fmoa.svg?cacheSeconds=86400) - MOA（大规模在线分析）是一个用于大数据流挖掘的开源框架。\n* [MosaicML Streaming](https:\u002F\u002Fgithub.com\u002Fmosaicml\u002Fstreaming) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmosaicml\u002Fstreaming.svg?cacheSeconds=86400) - 从云存储快速、确定性地流式传输大型数据集，用于分布式模型训练。\n* [RisingWave](https:\u002F\u002Fgithub.com\u002Frisingwavelabs\u002Frisingwave) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Frisingwavelabs\u002Frisingwave.svg?cacheSeconds=86400) - 一个统一的流处理和低延迟服务的分布式 SQL 流数据库，非常适合构建和提供在线机器学习功能。\n* [TensorStore](https:\u002F\u002Fgithub.com\u002Fgoogle\u002Ftensorstore) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle\u002Ftensorstore.svg?cacheSeconds=86400) - 用于读写大型多维数组的库。\n\n## 部署与服务\n* [Agenta](https:\u002F\u002Fgithub.com\u002FAgenta-AI\u002Fagenta) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FAgenta-AI\u002Fagenta.svg?cacheSeconds=86400) - Agenta 提供用于整个 LLMOps (大语言模型运维) 工作流的端到端工具：构建（LLM (大语言模型) 游乐场、评估）、部署（提示词和配置管理），以及（LLM 可观测性和追踪）。\n* [AirLLM](https:\u002F\u002Fgithub.com\u002Flyogavin\u002Fairllm) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flyogavin\u002Fairllm.svg?cacheSeconds=86400) - AirLLM 优化推理内存使用，允许 70B 大语言模型在单张 4GB GPU (图形处理器) 卡上运行推理，无需量化 (Quantization)、蒸馏 (Distillation) 和剪枝 (Pruning)。\n* [AITemplate](https:\u002F\u002Fgithub.com\u002Ffacebookincubator\u002FAITemplate) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffacebookincubator\u002FAITemplate.svg?cacheSeconds=86400) - AITemplate (AIT) 是一个 Python 框架，可将深度神经网络转换为 CUDA (NVIDIA GPU) \u002F HIP (AMD GPU) C++ 代码，以实现闪电般的快速推理服务。\n* [BentoML](https:\u002F\u002Fgithub.com\u002Fbentoml\u002FBentoML) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fbentoml\u002FBentoML.svg?cacheSeconds=86400) - BentoML 是一个用于高性能机器学习模型服务的开源框架。\n* [BISHENG](https:\u002F\u002Fgithub.com\u002Fdataelement\u002Fbisheng) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdataelement\u002Fbisheng.svg?cacheSeconds=86400) - BISHENG 是一个面向企业场景的开源 LLM 应用 DevOps (开发运维) 平台。\n* [DeepDetect](https:\u002F\u002Fgithub.com\u002Fjolibrain\u002Fdeepdetect) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjolibrain\u002Fdeepdetect.svg?cacheSeconds=86400) - 由 Jolibrain 维护的用于 TensorFlow、XGBoost 和 Cafe 模型的 C++ 机器学习生产服务器。\n* [Dynamo](https:\u002F\u002Fgithub.com\u002Fai-dynamo\u002Fdynamo) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fai-dynamo\u002Fdynamo.svg?cacheSeconds=86400) - NVIDIA Dynamo 是一个高吞吐、低延迟的推理框架，专为在多节点分布式环境中服务生成式 AI 和推理模型而设计。\n* [exo](https:\u002F\u002Fgithub.com\u002Fexo-explore\u002Fexo) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fexo-explore\u002Fexo.svg?cacheSeconds=86400) - exo 帮助你使用日常设备在家中运行 AI 集群。\n* [Genkit](https:\u002F\u002Fgithub.com\u002Ffirebase\u002Fgenkit) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffirebase\u002Fgenkit.svg?cacheSeconds=86400) - Genkit 是一个用于使用熟悉的以代码为中心的模式构建 AI 驱动应用的开源框架。Genkit 使得利用可观测性 (Observability) 和评估轻松开发、集成和测试 AI 功能变得简单。\n* [Inference](https:\u002F\u002Fgithub.com\u002Froboflow\u002Finference) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Froboflow\u002Finference.svg?cacheSeconds=86400) - 一个快速、生产就绪的计算机视觉推理服务器，支持部署许多流行的模型架构和微调模型。使用 Inference，你可以使用 Docker 在自己的硬件上部署 YOLOv5、YOLOv8、CLIP、SAM 和 CogVLM 等模型。\n* [Infinity](https:\u002F\u002Fgithub.com\u002Fmichaelfeil\u002Finfinity) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmichaelfeil\u002Finfinity.svg?cacheSeconds=86400) - Infinity 是一个用于服务文本嵌入 (Text-embeddings)、重排序模型 (Reranking models) 和 clip 的高吞吐、低延迟 REST API。\n* [IPEX-LLM](https:\u002F\u002Fgithub.com\u002Fintel\u002Fipex-llm) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fintel\u002Fipex-llm.svg?cacheSeconds=86400) - IPEX-LLM 是一个 PyTorch 库，用于在 Intel CPU (中央处理器) 和 GPU (例如带有集成显卡的本地 PC、Arc、Flex 和 Max 等独立显卡) 上运行 LLM，具有极低的延迟。\n* [LiteLLM](https:\u002F\u002Fgithub.com\u002FBerriAI\u002Flitellm) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FBerriAI\u002Flitellm.svg?cacheSeconds=86400) - LiteLLM 是一个 Python SDK (软件开发工具包)、代理服务器 (LLM 网关)，用于以 OpenAI 格式调用 100+ LLM API (应用程序接口) - Bedrock, Azure, OpenAI, VertexAI, Cohere, Anthropic, Sagemaker, HuggingFace, Replicate, Groq。\n* [LitServe](https:\u002F\u002Fgithub.com\u002FLightning-AI\u002FLitServe) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FLightning-AI\u002FLitServe.svg?cacheSeconds=86400) - LitServe 是一个基于 FastAPI 构建的 AI 模型灵活服务引擎。它支持为模型、智能体、多模态系统、RAG (检索增强生成) 和复杂机器学习流水线定制推理引擎。\n* [Jina-serve](https:\u002F\u002Fgithub.com\u002Fjina-ai\u002Fserve) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjina-ai\u002Fserve.svg?cacheSeconds=86400) - Jina-serve 是一个用于构建和部署通过 gRPC、HTTP 和 WebSockets 通信的 AI 服务的框架。\n* [Kiln](https:\u002F\u002Fgithub.com\u002Fkiln-ai\u002Fkiln) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fkiln-ai\u002Fkiln.svg?cacheSeconds=86400) - Kiln 是一个用于微调 LLM 模型、合成数据生成 (Synthetic Data Generation) 和协作处理数据集的 OSS (开源软件) 工具。\n* [KServe](https:\u002F\u002Fgithub.com\u002Fkserve\u002Fkserve) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fkserve\u002Fkserve.svg?cacheSeconds=86400) - KServe 为服务预测性和生成式机器学习提供了一个 Kubernetes (K8s) 自定义资源定义 (Custom Resource Definition)。\n* [KTransformers](https:\u002F\u002Fgithub.com\u002Fkvcache-ai\u002Fktransformers) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fkvcache-ai\u002Fktransformers.svg?cacheSeconds=86400) - KTransformers 是一个体验前沿 LLM 推理优化的灵活框架。\n* [Langtrace](https:\u002F\u002Fgithub.com\u002FScale3-Labs\u002Flangtrace) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FScale3-Labs\u002Flangtrace.svg?cacheSeconds=86400) - Langtrace 是一个基于 OpenTelemetry 的开源、端到端 LLM 应用可观测性工具，为流行的 LLM、LLM 框架、VectorDB (向量数据库) 等提供实时追踪、评估和指标 (Metrics)。\n* [Lepton AI](https:\u002F\u002Fgithub.com\u002Fleptonai\u002Fleptonai) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fleptonai\u002Fleptonai.svg?cacheSeconds=86400) - LeptonAI Python 库让你能够轻松地从 Python 代码构建 AI 服务。\n* [LightLLM](https:\u002F\u002Fgithub.com\u002FModelTC\u002Flightllm) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FModelTC\u002Flightllm.svg?cacheSeconds=86400) - LightLLM 是一个基于 Python 的 LLM (大语言模型) 推理和服务框架，以其轻量级设计、易于扩展和高速性能而闻名。\n* [llama.cpp](https:\u002F\u002Fgithub.com\u002Fggml-org\u002Fllama.cpp) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fggml-org\u002Fllama.cpp.svg?cacheSeconds=86400) - llama.cpp 是一个开源软件库，可在各种大语言模型（如 Llama）上执行推理。\n* [llmfit](https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FAlexsJones\u002Fllmfit.svg?cacheSeconds=86400) - 一个终端工具，可为你的系统的 RAM、CPU 和 GPU 匹配合适大小的 LLM 模型。检测你的硬件，根据质量、速度、适配度和上下文维度对每个模型进行评分，并告诉你哪些实际上能在你的机器上运行良好。\n* [LMDeploy](https:\u002F\u002Fgithub.com\u002FInternLM\u002Flmdeploy) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FInternLM\u002Flmdeploy.svg?cacheSeconds=86400) - LMDeploy 是一个用于压缩、部署和服务 LLM 的工具包。\n* [LM Studio](https:\u002F\u002Fgithub.com\u002Flmstudio-ai\u002Flms) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flmstudio-ai\u002Flms.svg?cacheSeconds=86400) - LM Studio 是一个用于在计算机上本地部署 LLM 模型的工具，即使是在相对普通的机器上，只要满足最低要求即可。\n* [LocalAI](https:\u002F\u002Fgithub.com\u002Fmudler\u002FLocalAI) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmudler\u002FLocalAI.svg?cacheSeconds=86400) - LocalAI 是一个即用型替代 REST API，兼容 OpenAI API 规范，用于本地推理。\n* [MindsDB](https:\u002F\u002Fgithub.com\u002Fmindsdb\u002Fmindsdb) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmindsdb\u002Fmindsdb.svg?cacheSeconds=86400) - MindsDB 是一个从你的数据库、向量存储和应用数据中实时创建、服务和微调模型的平台。\n* [mini-sglang](https:\u002F\u002Fgithub.com\u002Fsgl-project\u002Fmini-sglang) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fsgl-project\u002Fmini-sglang.svg?cacheSeconds=86400) - mini-sglang 是一个轻量级且高效的大语言模型服务框架。\n* [MLRun](https:\u002F\u002Fgithub.com\u002Fmlrun\u002Fmlrun)![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmlrun\u002Fmlrun.svg?cacheSeconds=86400)- MLRun 是一个开源 MLOps (机器学习运维) 框架，用于在其整个生命周期内快速构建和管理持续的机器学习和生成式 AI 应用。\n* [MLServer](https:\u002F\u002Fgithub.com\u002FSeldonIO\u002Fmlserver) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FSeldonIO\u002Fmlserver.svg?cacheSeconds=86400) - 用于你的机器学习模型的推理服务器，包括支持多种框架、多模型服务等更多功能。\n* [Model Runner](https:\u002F\u002Fgithub.com\u002Fdocker\u002Fmodel-runner) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdocker\u002Fmodel-runner.svg?cacheSeconds=86400) - Docker Model Runner 使得使用 Docker 管理、运行和服务 AI 模型变得容易，支持直接从 Docker Hub 或任何符合 OCI (开放容器倡议) 标准的注册表拉取的 LLM 和其他 AI 模型。\n* [Mosec](https:\u002F\u002Fgithub.com\u002Fmosecorg\u002Fmosec) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmosecorg\u002Fmosec.svg?cacheSeconds=86400) - 一个由 Rust 驱动的多阶段流水线模型服务器，提供动态批处理等功能。作为微服务 (Micro-services) 实施和部署非常容易。\n* [nano-vllm](https:\u002F\u002Fgithub.com\u002FGeeeekExplorer\u002Fnano-vllm) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FGeeeekExplorer\u002Fnano-vllm.svg?cacheSeconds=86400) - nano-vllm 是一个从头构建的轻量级 vLLM 实现，提供具有前缀缓存 (Prefix Caching)、张量并行 (Tensor Parallelism) 和 CUDA 图 (CUDA Graph) 等优化技术的快速离线推理。\n* [nndeploy](https:\u002F\u002Fgithub.com\u002Fnndeploy\u002Fnndeploy) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fnndeploy\u002Fnndeploy.svg?cacheSeconds=86400) - 一个易用且高性能的 AI 部署框架。\n* [Nuclio](https:\u002F\u002Fgithub.com\u002Fnuclio\u002Fnuclio) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fnuclio\u002Fnuclio.svg?cacheSeconds=86400) - 一个专注于数据、I\u002FO 和计算密集型工作负载的高性能“无服务器” (Serverless) 框架。它与流行的数据科学工具（如 Jupyter 和 Kubeflow）集成良好；支持多种数据和流式数据源 (Streaming Sources)；并支持在 CPU 和 GPU 上执行。\n* [OpenLLM](https:\u002F\u002Fgithub.com\u002Fbentoml\u002FOpenLLM) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fbentoml\u002FOpenLLM.svg?cacheSeconds=86400) - OpenLLM 允许开发者使用单个命令运行任何开源 LLM（Llama 3.1, Qwen2, Phi3 等）或自定义模型，作为兼容 OpenAI 的 API。\n* [OpenVINO](https:\u002F\u002Fgithub.com\u002Fopenvinotoolkit\u002Fopenvino) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fopenvinotoolkit\u002Fopenvino.svg?cacheSeconds=86400) - OpenVINO 是一个用于优化和部署 AI 推理的开源工具包。\n* [Open WebUI](https:\u002F\u002Fgithub.com\u002Fopen-webui\u002Fopen-webui) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fopen-webui\u002Fopen-webui.svg?cacheSeconds=86400) - Open WebUI 是一个可扩展、功能丰富且用户友好的自托管 AI 平台，旨在完全离线运行。它支持各种 LLM 运行器（如 Ollama）和兼容 OpenAI 的 API，内置 RAG 推理引擎，使其成为强大的 AI 部署解决方案。\n* [OptiLLM](https:\u002F\u002Fgithub.com\u002Falgorithmicsuperintelligence\u002Foptillm) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Falgorithmicsuperintelligence\u002Foptillm.svg?cacheSeconds=86400) - OptiLLM 是一个兼容 OpenAI API 的优化推理代理，实现了 20 多种最先进技术，在不要求任何模型训练或微调的情况下，显著提高 LLM 在推理任务上的准确性和性能。\n* [PowerInfer](https:\u002F\u002Fgithub.com\u002FSJTU-IPADS\u002FPowerInfer) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FSJTU-IPADS\u002FPowerInfer.svg?cacheSeconds=86400) - PowerInfer 是一个利用激活局部性 (Activation Locality) 为你的设备提供的 CPU\u002FGPU LLM 推理引擎。\n* [Prompt2Model](https:\u002F\u002Fgithub.com\u002Fneulab\u002Fprompt2model) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fneulab\u002Fprompt2model.svg?cacheSeconds=86400) - Prompt2Model 是一个系统，它接受自然语言任务描述（如用于 ChatGPT 等 LLM 的提示词），以训练一个小型专用模型，便于部署。\n* [RamaLama](https:\u002F\u002Fgithub.com\u002Fcontainers\u002Framalama) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcontainers\u002Framalama.svg?cacheSeconds=86400) - RamaLama 是一个开源工具，通过 OCI 容器简化 AI 模型的本地使用和推理服务，消除了配置主机系统的需求。\n* [RunAnywhere](https:\u002F\u002Fgithub.com\u002FRunanywhereAI\u002Frunanywhere-sdks) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FRunanywhereAI\u002Frunanywhere-sdks.svg?cacheSeconds=86400) - RunAnywhere 是一个生产就绪的 SDK，用于在 iOS、Android、React Native 和 Flutter 的设备上运行 AI 模型（LLM、语音转文本、文本转语音）—— enabling 私有、离线且快速的移动 AI 应用。\n* [Seldon Core](https:\u002F\u002Fgithub.com\u002FSeldonIO\u002Fseldon-core) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FSeldonIO\u002Fseldon-core.svg?cacheSeconds=86400) - 用于在 Kubernetes 中部署和管理机器学习模型的开源平台 - [(视频)](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=pDlapGtecbY)。\n* [SGLang](https:\u002F\u002Fgithub.com\u002Fsgl-project\u002Fsglang) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fsgl-project\u002Fsglang.svg?cacheSeconds=86400) - SGLang 是一个用于大语言模型和视觉语言模型的快速服务框架。\n* [SkyPilot](https:\u002F\u002Fgithub.com\u002Fskypilot-org\u002Fskypilot) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fskypilot-org\u002Fskypilot.svg?cacheSeconds=86400) - SkyPilot 是一个在任何云上运行 LLM、AI 和批处理作业 (Batch Jobs) 的框架，提供最大成本节省、最高 GPU 可用性和托管执行。\n* [Tensorflow Serving](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Fserving) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftensorflow\u002Fserving.svg?cacheSeconds=86400) - 通过 grpc 协议服务 TensorFlow 模型的高性能框架，每核心每秒能处理 10 万次请求。\n* [text-generation-inference](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Ftext-generation-inference) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Ftext-generation-inference.svg?cacheSeconds=86400) - 大语言模型文本生成推理。\n* [TorchServe](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpytorch\u002Fserve.svg?cacheSeconds=86400) - TorchServe 是一个灵活且易于使用的用于服务 PyTorch 模型的工具。\n* [torchtune](https:\u002F\u002Fgithub.com\u002Fmeta-pytorch\u002Ftorchtune) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmeta-pytorch\u002Ftorchtune.svg?cacheSeconds=86400) - torchtune 是一个 PyTorch 库，用于轻松编写、后训练 (Post-training) 和实验 LLM。\n* [Transformer Lab](https:\u002F\u002Fgithub.com\u002Ftransformerlab\u002Ftransformerlab-app) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftransformerlab\u002Ftransformerlab-app.svg?cacheSeconds=86400) - Transformer Lab 是一个开源 LLM 工作区，用于在推理引擎和平台上本地微调、评估、导出和测试模型。\n* [Triton Inference Server](https:\u002F\u002Fgithub.com\u002Ftriton-inference-server\u002Fserver) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftriton-inference-server\u002Fserver.svg?cacheSeconds=86400) - Triton 是一个高性能开源服务软件，用于在 GPU 和 CPU 上部署来自任何框架的 AI 模型，同时最大化利用率。\n* [Vercel AI](https:\u002F\u002Fgithub.com\u002Fvercel\u002Fai) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvercel\u002Fai.svg?cacheSeconds=86400) - Vercel AI 是一个 TypeScript 工具包，旨在帮助你使用 Next.js、React、Svelte、Vue 等流行框架和 Node.js 等运行时构建 AI 驱动的应用。\n* [Vespa](https:\u002F\u002Fgithub.com\u002Fvespa-engine\u002Fvespa) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvespa-engine\u002Fvespa.svg?cacheSeconds=86400) - 在服务时间和任何规模下搜索、推理并整理向量 (Vectors)、张量 (Tensors)、文本和结构化数据。\n* [vLLM](https:\u002F\u002Fgithub.com\u002Fvllm-project\u002Fvllm) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvllm-project\u002Fvllm.svg?cacheSeconds=86400) - vLLM 是一个用于 LLM 的高吞吐和内存高效的推理和服务引擎。\n\n您尚未提供需要翻译的 README 原文内容。请补充提供具体内容，以便我为您完成翻译任务。\n\n## 评估与监控\n* [AlpacaEval](https:\u002F\u002Fgithub.com\u002Ftatsu-lab\u002Falpaca_eval) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftatsu-lab\u002Falpaca_eval.svg?cacheSeconds=86400) - AlpacaEval 是一个用于评估遵循指令的语言模型的自动评估工具。\n* [ANN-Benchmarks](https:\u002F\u002Fgithub.com\u002Ferikbern\u002Fann-benchmarks) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ferikbern\u002Fann-benchmarks.svg?cacheSeconds=86400) - ANN-Benchmarks 是近似最近邻（Approximate Nearest Neighbor, ANN）算法搜索的基准测试环境。\n* [ARES](https:\u002F\u002Fgithub.com\u002Fstanford-futuredata\u002FARES) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fstanford-futuredata\u002FARES.svg?cacheSeconds=86400) - ARES 是一个用于自动评估检索增强生成（Retrieval-Augmented Generation, RAG）模型的框架。\n* [BEIR](https:\u002F\u002Fgithub.com\u002Fbeir-cellar\u002Fbeir) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fbeir-cellar\u002Fbeir.svg?cacheSeconds=86400) - BEIR 是一个包含多样化信息检索（Information Retrieval, IR）任务的异构基准测试集。它还提供了一个通用且易于使用的框架，用于在基准测试中评估基于自然语言处理（Natural Language Processing, NLP）的检索模型。\n* [Code Generation LM Evaluation Harness](https:\u002F\u002Fgithub.com\u002Fbigcode-project\u002Fbigcode-evaluation-harness) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fbigcode-project\u002Fbigcode-evaluation-harness.svg?cacheSeconds=86400) - Code Generation LM Evaluation Harness 是一个用于评估代码生成模型的框架。\n* [COMET](https:\u002F\u002Fgithub.com\u002FUnbabel\u002FCOMET) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FUnbabel\u002FCOMET.svg?cacheSeconds=86400) - COMET 是一个开源的机器学习评估框架。\n* [C-Eval](https:\u002F\u002Fgithub.com\u002Fhkust-nlp\u002Fceval) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhkust-nlp\u002Fceval.svg?cacheSeconds=86400) - C-Eval 是一个面向基础模型的综合性中文评估套件。\n* [Deepchecks](https:\u002F\u002Fgithub.com\u002Fdeepchecks\u002Fdeepchecks) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdeepchecks\u002Fdeepchecks.svg?cacheSeconds=86400) - Deepchecks 是一个全面的开源解决方案，满足您所有的 AI 和机器学习（ML）验证需求，使您能够彻底测试从研究到生产的数据和模型。\n* [DeepEval](https:\u002F\u002Fgithub.com\u002Fconfident-ai\u002Fdeepeval) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fconfident-ai\u002Fdeepeval.svg?cacheSeconds=86400) - DeepEval 是一个简单易用的开源框架，用于大语言模型（Large Language Model, LLM）应用的评估。\n* [DomainBed](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002FDomainBed) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffacebookresearch\u002FDomainBed.svg?cacheSeconds=86400) - DomainBed 是一个测试套件，包含用于领域泛化的基准数据集和算法。\n* [EvalAI](https:\u002F\u002Fgithub.com\u002FCloud-CV\u002FEvalAI) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FCloud-CV\u002FEvalAI.svg?cacheSeconds=86400) - EvalAI 是一个开源平台，用于大规模评估和比较 AI 算法。\n* [Evalchemy](https:\u002F\u002Fgithub.com\u002Fmlfoundations\u002Fevalchemy) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmlfoundations\u002Fevalchemy.svg?cacheSeconds=86400) - Evalchemy 是一个统一且易于使用的工具包，用于评估后训练语言模型。\n* [EvalPlus](https:\u002F\u002Fgithub.com\u002Fevalplus\u002Fevalplus) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fevalplus\u002Fevalplus.svg?cacheSeconds=86400) - EvalPlus 是一个针对 LLM4Code 的稳健评估框架，具有扩展的 HumanEval+ 和 MBPP+ 基准、效率评估（EvalPerf）以及安全、可扩展的评估工具包。\n* [Evals](https:\u002F\u002Fgithub.com\u002Fopenai\u002Fevals) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fopenai\u002Fevals.svg?cacheSeconds=86400) - Evals 是一个用于评估 OpenAI 模型的框架，也是一个开源的基准注册表。\n* [EvalScope](https:\u002F\u002Fgithub.com\u002Fmodelscope\u002Fevalscope) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmodelscope\u002Fevalscope.svg?cacheSeconds=86400) - EvalScope 是一个精简且可定制的框架，用于高效的大模型评估和性能基准测试。\n* [Evaluate](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fevaluate) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Fevaluate.svg?cacheSeconds=86400) - Evaluate 是一个库，使评估和比较模型及其性能报告变得更加容易和标准化。\n* [Evidently](https:\u002F\u002Fgithub.com\u002Fevidentlyai\u002Fevidently) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fevidentlyai\u002Fevidently.svg?cacheSeconds=86400) - Evidently 是一个开源框架，用于评估、测试和监控由机器学习和大语言模型驱动的系统。\n* [GAOKAO-Bench](https:\u002F\u002Fgithub.com\u002FOpenLMLab\u002FGAOKAO-Bench) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FOpenLMLab\u002FGAOKAO-Bench.svg?cacheSeconds=86400) - GAOKAO-Bench 是一个评估框架，使用中国高考（GAOKAO）题目作为数据集来评估大模型的语言理解和逻辑推理能力。\n* [Giskard](https:\u002F\u002Fgithub.com\u002FGiskard-AI\u002Fgiskard)![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FGiskard-AI\u002Fgiskard.svg?cacheSeconds=86400) - Giskard 是一个开源 Python 库，可自动检测 AI 应用中的性能、偏差和安全问题。\n* [guidellm](https:\u002F\u002Fgithub.com\u002Fvllm-project\u002Fguidellm) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvllm-project\u002Fguidellm.svg?cacheSeconds=86400) - guidellm 是大语言模型推理系统的基准测试和性能评估工具。\n* [HumanEval](https:\u002F\u002Fgithub.com\u002Fopenai\u002Fhuman-eval)![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fopenai\u002Fhuman-eval.svg?cacheSeconds=86400) - HumanEval 是一个基准测试，使用带有单元测试的 Python 编程问题来评估代码生成模型的功能正确性。\n* [Helicone](https:\u002F\u002Fgithub.com\u002FHelicone\u002Fhelicone) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FHelicone\u002Fhelicone.svg?cacheSeconds=86400) - Helicone 是一个全功能的开源大语言模型开发者平台。\n* [HELM](https:\u002F\u002Fgithub.com\u002Fstanford-crfm\u002Fhelm) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fstanford-crfm\u002Fhelm.svg?cacheSeconds=86400) - HELM（语言模型综合评估）提供了用于语言模型综合评估的工具，包括标准化数据集、各种模型的统一 API、多样化的指标、鲁棒性（Robustness）和公平性扰动、提示构建框架以及用于统一模型访问的代理服务器。\n* [Inspect](https:\u002F\u002Fgithub.com\u002FUKGovernmentBEIS\u002Finspect_ai) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FUKGovernmentBEIS\u002Finspect_ai.svg?cacheSeconds=86400) - Inspect 是一个用于大语言模型评估的框架。\n* [JiWER](https:\u002F\u002Fgithub.com\u002Fjitsi\u002Fjiwer) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjitsi\u002Fjiwer.svg?cacheSeconds=86400) - JiWER 是一个简单快速的 Python 包，用于评估自动语音识别系统。\n* [Laminar](https:\u002F\u002Fgithub.com\u002Flmnr-ai\u002Flmnr) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flmnr-ai\u002Flmnr.svg?cacheSeconds=86400) - Laminar 是一个开源平台，用于追踪、评估、标记和分析 AI 产品的大语言模型数据。\n* [Langfuse](https:\u002F\u002Fgithub.com\u002Flangfuse\u002Flangfuse) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flangfuse\u002Flangfuse.svg?cacheSeconds=86400) - Langfuse 是为基于大语言模型的应用提供的可观测性（Observability）与分析解决方案。\n* [LangTest](https:\u002F\u002Fgithub.com\u002FJohnSnowLabs\u002Flangtest) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FJohnSnowLabs\u002Flangtest.svg?cacheSeconds=86400) - LangTest 是一个用于自然语言处理模型的全面评估工具包。\n* [Language Model Evaluation Harness](https:\u002F\u002Fgithub.com\u002FEleutherAI\u002Flm-evaluation-harness) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FEleutherAI\u002Flm-evaluation-harness.svg?cacheSeconds=86400) - Language Model Evaluation Harness 是一个框架，用于在大量不同的评估任务上测试生成式语言模型。\n* [LangWatch](https:\u002F\u002Fgithub.com\u002Flangwatch\u002Flangwatch) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flangwatch\u002Flangwatch.svg?cacheSeconds=86400) - LangWatch 是 DSPy 的可视化界面，也是一个完整的大语言模型运维（LLM Ops）平台，用于监控、实验、测量和改进大语言模型管道，采用公平代码分发模式。\n* [LightEval](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Flighteval) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Flighteval.svg?cacheSeconds=86400) - LightEval 是一个轻量级的大语言模型评估套件。\n* [LLMPerf](https:\u002F\u002Fgithub.com\u002Fray-project\u002Fllmperf) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fray-project\u002Fllmperf.svg?cacheSeconds=86400) - LLMPerf 是一个用于评估大语言模型 API 性能的工具。\n* [lmms-eval](https:\u002F\u002Fgithub.com\u002FEvolvingLMMs-Lab\u002Flmms-eval) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FEvolvingLMMs-Lab\u002Flmms-eval.svg?cacheSeconds=86400) - lmms-eval 是一个精心打造的评估框架，用于一致且高效地评估多模态大语言模型（Large Multimodal Models, LMM）。\n* [Melting Pot](https:\u002F\u002Fgithub.com\u002Fgoogle-deepmind\u002Fmeltingpot) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle-deepmind\u002Fmeltingpot.svg?cacheSeconds=86400) - Melting Pot 是一套用于多智能体强化学习的测试场景套件。\n* [Meta-World](https:\u002F\u002Fgithub.com\u002FFarama-Foundation\u002FMetaworld) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FFarama-Foundation\u002FMetaworld.svg?cacheSeconds=86400) - Meta-World 是一个开源模拟基准，用于元强化学习和多任务学习，包含 50 种不同的机器人操作任务。\n* [mir_eval](https:\u002F\u002Fgithub.com\u002Fmir-evaluation\u002Fmir_eval) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmir-evaluation\u002Fmir_eval.svg?cacheSeconds=86400) - mir_eval 是一个 Python 库，提供了一种透明、标准化且直接的方式来评估音乐信息检索系统。\n* [MLPerf Inference](https:\u002F\u002Fgithub.com\u002Fmlcommons\u002Finference) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmlcommons\u002Finference.svg?cacheSeconds=86400) - MLPerf Inference 是一套基准测试套件，用于衡量系统在各种部署场景中运行模型的速度。\n* [Massive Text Embedding Benchmark](https:\u002F\u002Fgithub.com\u002Fmlcommons\u002Finference) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmlcommons\u002Finference.svg?cacheSeconds=86400) - 大规模文本嵌入基准（Massive Text Embedding Benchmark, MTEB）是一个全面的评估框架，评估文本嵌入模型在不同任务和语言上的性能，涵盖 8 个嵌入任务、58 个数据集和 112 种语言。\n* [NannyML](https:\u002F\u002Fgithub.com\u002FNannyML\u002Fnannyml) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FNannyML\u002Fnannyml.svg?cacheSeconds=86400) - NannyML 是一个库，允许您估算部署后的模型性能（无需访问目标值），检测数据漂移（Data Drift），并智能地将数据漂移警报链接回模型性能的变化。\n* [OGB](https:\u002F\u002Fgithub.com\u002Fsnap-stanford\u002Fogb) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fsnap-stanford\u002Fogb.svg?cacheSeconds=86400) - 开放图基准（Open Graph Benchmark, OGB）是用于图机器学习的基准数据集、数据加载器和评估器的集合。\n* [Ollama Grid Search](https:\u002F\u002Fgithub.com\u002Fdezoito\u002Follama-grid-search) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdezoito\u002Follama-grid-search.svg?cacheSeconds=86400) - Ollama Grid Search 自动化了为给定用例选择最佳模型、提示或推理参数的过程，允许您迭代它们的组合并直观地检查结果。\n* [OpenCompass](https:\u002F\u002Fgithub.com\u002Fopen-compass\u002FOpenCompass) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fopen-compass\u002FOpenCompass.svg?cacheSeconds=86400) - OpenCompass 是一个大语言模型评估平台，支持广泛的模型（LLaMA, LLaMa2, ChatGLM2, ChatGPT, Claude 等）在 50 多个数据集上的评估。\n* [OpenLIT](https:\u002F\u002Fgithub.com\u002Fopenlit\u002Fopenlit) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fopenlit\u002Fopenlit.svg?cacheSeconds=86400) - OpenLIT 是一个开源 AI 工程平台，通过可观测性、监控、护栏机制（Guardrails）、评估和无缝集成简化大语言模型工作流程。\n* [OpenLLMetry](https:\u002F\u002Fgithub.com\u002Ftraceloop\u002Fopenllmetry) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftraceloop\u002Fopenllmetry.svg?cacheSeconds=86400) - OpenLLMetry 通过性能监控、执行跟踪和调试功能，为开发人员提供对大语言模型应用的深入可见性。\n* [Opik](https:\u002F\u002Fgithub.com\u002Fcomet-ml\u002Fopik) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcomet-ml\u002Fopik.svg?cacheSeconds=86400) - Opik 是一个用于评估、测试和监控大语言模型应用的开源平台。\n* [Overcooked-AI](https:\u002F\u002Fgithub.com\u002FHumanCompatibleAI\u002Fovercooked_ai) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FHumanCompatibleAI\u002Fovercooked_ai.svg?cacheSeconds=86400) - Overcooked-AI 是一个完全协作的人机任务绩效基准环境，基于广受欢迎的电子游戏《煮糊了》（Overcooked）。\n* [Phoenix](https:\u002F\u002Fgithub.com\u002FArize-ai\u002Fphoenix) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FArize-ai\u002Fphoenix.svg?cacheSeconds=86400) - Phoenix 是一个开源 AI 可观测性平台，专为实验、评估和故障排除而设计。\n* [PromptBench](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Fpromptbench) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmicrosoft\u002Fpromptbench.svg?cacheSeconds=86400) - PromptBench 是一个用于大语言模型的统一评估框架。\n* [Promptfoo](https:\u002F\u002Fgithub.com\u002Fpromptfoo\u002Fpromptfoo) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpromptfoo\u002Fpromptfoo.svg?cacheSeconds=86400) - 用于测试越狱（Jailbreaks）、提示词注入（Prompt Injection）和其他漏洞的大语言模型红队测试（Red Teaming）和评估框架，支持持续集成\u002F持续部署（CI\u002FCD）集成。\n* [Prometheus-Eval](https:\u002F\u002Fgithub.com\u002Fprometheus-eval\u002Fprometheus-eval) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fprometheus-eval\u002Fprometheus-eval.svg?cacheSeconds=86400) - RagaAI Catalyst 是一个综合平台，旨在增强大语言模型项目的管理和优化。\n* [RagaAI Catalyst](https:\u002F\u002Fgithub.com\u002Fraga-ai-hub\u002FRagaAI-Catalyst) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fraga-ai-hub\u002FRagaAI-Catalyst.svg?cacheSeconds=86400) - Prometheus-Eval 是一套工具集合，用于训练、评估和使用专门用于评估其他语言模型的语言模型。\n* [Ragas](https:\u002F\u002Fgithub.com\u002Fexplodinggradients\u002Fragas) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fexplodinggradients\u002Fragas.svg?cacheSeconds=86400) - Ragas 是一个用于评估检索增强生成管道的框架。\n* [RewardBench](https:\u002F\u002Fgithub.com\u002Fallenai\u002Freward-bench) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fallenai\u002Freward-bench.svg?cacheSeconds=86400) - RewardBench 是一个旨在评估奖励模型能力和安全的基准测试。\n* [RLBench](https:\u002F\u002Fgithub.com\u002Fstepjam\u002FRLBench) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fstepjam\u002FRLBench.svg?cacheSeconds=86400) - RLBench 是一个雄心勃勃的大规模基准和学习环境，旨在促进多个视觉引导操作研究领域的研究，包括：强化学习、模仿学习、多任务学习、几何计算机视觉，特别是少样本学习（Few-shot Learning）。\n* [SimplerEnv](https:\u002F\u002Fgithub.com\u002Fsimpler-env\u002FSimplerEnv) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fsimpler-env\u002FSimplerEnv.svg?cacheSeconds=86400) - SimplerEnv 是用于真实机器人设置的模拟操作策略评估环境。\n* [SwanLab](https:\u002F\u002Fgithub.com\u002FSwanHubX\u002FSwanLab) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FSwanHubX\u002FSwanLab.svg?cacheSeconds=86400) - SwanLab 是一个 AI 训练跟踪和可视化工具。\n* [Speech-to-Text Benchmark](https:\u002F\u002Fgithub.com\u002FPicovoice\u002Fspeech-to-text-benchmark) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FPicovoice\u002Fspeech-to-text-benchmark.svg?cacheSeconds=86400) - Speech-to-Text Benchmark 是一个极简且可扩展的框架，用于对不同语音转文本引擎进行基准测试。\n* [TensorFlow Model Analysis](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Fmodel-analysis) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftensorflow\u002Fmodel-analysis.svg?cacheSeconds=86400) - TensorFlow Model Analysis (TFMA) 是一个库，用于以分布式方式在大量数据上评估 TensorFlow 模型，使用训练器中定义的相同指标。\n* [TorchBench](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Fbenchmark) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpytorch\u002Fbenchmark.svg?cacheSeconds=86400) - TorchBench 是一组用于评估 PyTorch 性能的开源基准测试集合。\n* [TruLens](https:\u002F\u002Fgithub.com\u002Ftruera\u002Ftrulens) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftruera\u002Ftrulens.svg?cacheSeconds=86400) - TruLens 提供了一套用于评估和跟踪大语言模型实验的工具。\n* [TrustLLM](https:\u002F\u002Fgithub.com\u002FHowieHwong\u002FTrustLLM) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FHowieHwong\u002FTrustLLM.svg?cacheSeconds=86400) - TrustLLM 是一个全面的框架，用于评估大语言模型的可信度，包括原则、调查和基准测试。\n* [VBench](https:\u002F\u002Fgithub.com\u002FVchitect\u002FVBench) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FVchitect\u002FVBench.svg?cacheSeconds=86400) - VBench 是一个用于视频生成模型的综合性基准测试套件。\n* [VLMEvalKit](https:\u002F\u002Fgithub.com\u002Fopen-compass\u002FVLMEvalKit) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fopen-compass\u002FVLMEvalKit.svg?cacheSeconds=86400) - VLMEvalKit 是一个用于大型视觉语言模型（Large Vision-Language Models, LVLMs）的开源评估工具包。\n\n## 可解释性与公平性\n* [Aequitas](https:\u002F\u002Fgithub.com\u002Fdssg\u002Faequitas) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdssg\u002Faequitas.svg?cacheSeconds=86400) - 一个开源的偏差审计工具包，供数据科学家、机器学习研究人员和政策制定者用于审计机器学习模型是否存在歧视和偏差，并围绕开发和部署预测风险评估工具做出知情且公平的决策。\n* [AI Explainability 360](https:\u002F\u002Fgithub.com\u002FTrusted-AI\u002FAIX360) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FTrusted-AI\u002FAIX360.svg?cacheSeconds=86400) - 数据和机器学习模型的解释性（Interpretability）与可解释性（Explainability），包含一套全面的算法，覆盖不同维度的解释以及代理可解释性指标。\n* [AI Fairness 360](https:\u002F\u002Fgithub.com\u002FTrusted-AI\u002FAIF360) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FTrusted-AI\u002FAIF360.svg?cacheSeconds=86400) - 针对数据集和机器学习模型的一套全面的公平性指标，这些指标的说明，以及减轻数据集和模型中偏差的算法。\n* [Alibi](https:\u002F\u002Fgithub.com\u002FSeldonIO\u002Falibi) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FSeldonIO\u002Falibi.svg?cacheSeconds=86400) - Alibi 是一个面向机器学习模型检查和解释的开源 Python 库。该库最初的焦点在于基于实例的黑盒模型解释。\n* [captum](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Fcaptum) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpytorch\u002Fcaptum.svg?cacheSeconds=86400) - 由 Facebook 开发的 PyTorch 模型解释和理解库。它包含集成梯度（integrated gradients）、显著性图（saliency maps）、smoothgrad、vargrad 等针对 PyTorch 模型的通用实现。\n* [Fairlearn](https:\u002F\u002Fgithub.com\u002Ffairlearn\u002Ffairlearn) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffairlearn\u002Ffairlearn.svg?cacheSeconds=86400) - Fairlearn 是一个 Python 工具包，用于评估和减轻机器学习模型中的不公平性。\n* [InterpretML](https:\u002F\u002Fgithub.com\u002Finterpretml\u002Finterpret) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Finterpretml\u002Finterpret.svg?cacheSeconds=86400) - InterpretML 是一个开源软件包，用于训练可解释模型并解释黑盒系统。\n* [Lightly](https:\u002F\u002Fgithub.com\u002Flightly-ai\u002Flightly) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flightly-ai\u002Flightly.svg?cacheSeconds=86400) - 一个用于图像自监督学习（self-supervised learning）的 Python 框架。学习到的表示可用于分析未标记数据中的分布并重新平衡数据集。\n* [LOFO Importance](https:\u002F\u002Fgithub.com\u002Faerdem4\u002Flofo-importance) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Faerdem4\u002Flofo-importance.svg?cacheSeconds=86400) - LOFO（Leave One Feature Out，留一特征法）重要性计算一组特征的重要性，基于所选指标，针对所选模型，通过迭代地从集合中移除每个特征，并使用所选验证方案基于所选指标评估模型性能。\n* [mljar-supervised](https:\u002F\u002Fgithub.com\u002Fmljar\u002Fmljar-supervised) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmljar\u002Fmljar-supervised.svg?cacheSeconds=86400) - 一个用于表格数据的 AutoML 的 Python 包，具有特征工程、超参数调优、解释和自动文档功能。\n* [Quantus](https:\u002F\u002Fgithub.com\u002Funderstandable-machine-intelligence-lab\u002FQuantus) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Funderstandable-machine-intelligence-lab\u002FQuantus.svg?cacheSeconds=86400) - Quantus 是一个可解释人工智能（eXplainable AI）工具包，用于负责任地评估神经网络解释。\n* [SHAP](https:\u002F\u002Fgithub.com\u002Fshap\u002Fshap) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fshap\u002Fshap.svg?cacheSeconds=86400) - SHapley Additive exPlanations（SHAP）是一种统一的解释任何机器学习模型输出的方法。\n* [SHAPash](https:\u002F\u002Fgithub.com\u002FMAIF\u002Fshapash) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FMAIF\u002Fshapash.svg?cacheSeconds=86400) - Shapash 是一个 Python 库，提供多种可视化类型，显示每个人都能理解的显式标签。\n* [WhatIf](https:\u002F\u002Fgithub.com\u002Fpair-code\u002Fwhat-if-tool) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpair-code\u002Fwhat-if-tool.svg?cacheSeconds=86400) - 一个易于使用的界面，用于扩展对黑盒分类或回归 ML 模型的理解。\n\n## 特征存储\n* [FEAST](https:\u002F\u002Fgithub.com\u002Ffeast-dev\u002Ffeast)  ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffeast-dev\u002Ffeast.svg?cacheSeconds=86400) - Feast（特征存储）是机器学习的开源特征存储。Feast 是将现有基础设施管理到生产环境的最快途径，用于模型训练的分析和在线推理数据的生产化。\n* [Featureform](https:\u002F\u002Fgithub.com\u002Ffeatureform\u002Ffeatureform) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffeatureform\u002Ffeatureform.svg?cacheSeconds=86400) - 一个虚拟特征存储。与您现有的基础设施即插即用。数据科学家批准。发现、治理、血缘和协作只需 pip install 即可。支持 pandas、Python、spark、SQL + 与主要云供应商的集成。 \n* [Hopsworks Feature Store](https:\u002F\u002Fgithub.com\u002Flogicalclocks\u002Fhopsworks) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flogicalclocks\u002Fhopsworks.svg?cacheSeconds=86400) - 机器学习离线\u002F在线特征存储 [(视频)](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=N1BjPk1smdg)。\n\n## 工业级异常检测\n* [Alibi Detect](https:\u002F\u002Fgithub.com\u002FSeldonIO\u002Falibi-detect) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FSeldonIO\u002Falibi-detect.svg?cacheSeconds=86400) - alibi-detect 是一个专注于离群点、对抗样本和概念漂移检测的 Python 包。\n* [Darts](https:\u002F\u002Fgithub.com\u002Funit8co\u002Fdarts) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Funit8co\u002Fdarts.svg?cacheSeconds=86400) - Darts 是一个用于时间序列友好型预测和异常检测的库。\n* [Deequ](https:\u002F\u002Fgithub.com\u002Fawslabs\u002Fdeequ) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fawslabs\u002Fdeequ.svg?cacheSeconds=86400) - 一个构建在 Apache Spark 之上的库，用于定义“数据的单元测试”，以衡量大型数据集中的数据质量。\n* [PyOD](https:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fyzhao062\u002Fpyod.svg?cacheSeconds=86400) - 一个用于可扩展离群点检测（异常检测）的 Python 工具箱。\n* [TFDV](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Fdata-validation) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftensorflow\u002Fdata-validation.svg?cacheSeconds=86400) - TFDV（Tensorflow Data Validation）是一个用于探索和验证机器学习数据的库。\n\n## 工业级计算机视觉\n* [CameraTraps](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FCameraTraps) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmicrosoft\u002FCameraTraps.svg?cacheSeconds=86400) - CameraTraps (PyTorch Wildlife) 是一个用于野生动物图像分析的协作式深度学习框架，提供在大规模相机陷阱数据集上训练的检测和分类模型。\n* [Deep Lake](https:\u002F\u002Fgithub.com\u002Factiveloopai\u002Fdeeplake) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Factiveloopai\u002Fdeeplake.svg?cacheSeconds=86400) - Deep Lake 是专为计算机视觉优化的数据基础设施。\n* [DeepForest](https:\u002F\u002Fgithub.com\u002Fweecology\u002FDeepForest) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fweecology\u002FDeepForest.svg?cacheSeconds=86400) - DeepForest 是一个 Python 包，用于使用深度学习从航空 RGB 图像中训练和预测单个树冠及物种。\n* [Detectron2](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fdetectron2) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffacebookresearch\u002Fdetectron2.svg?cacheSeconds=86400) - Detectron2 是 Facebook AI Research 的下一代库，提供最先进的检测和分割算法。\n* [KerasCV](https:\u002F\u002Fgithub.com\u002Fkeras-team\u002Fkeras-cv) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fkeras-team\u002Fkeras-cv.svg?cacheSeconds=86400) - KerasCV 是一套面向计算机视觉的模块化 Keras 组件库。\n* [Kornia](https:\u002F\u002Fgithub.com\u002Fkornia\u002Fkornia) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fkornia\u002Fkornia.svg?cacheSeconds=86400) - Kornia 是一个基于 PyTorch 构建的可微分计算机视觉库，提供丰富的可微分图像处理与几何视觉算法。\n* [LAVIS](https:\u002F\u002Fgithub.com\u002Fsalesforce\u002FLAVIS) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fsalesforce\u002FLAVIS.svg?cacheSeconds=86400) - LAVIS 是一个用于语言与视觉智能研究和应用的深度学习库。\n* [libcom](https:\u002F\u002Fgithub.com\u002Fbcmi\u002Flibcom) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fbcmi\u002Flibcom.svg?cacheSeconds=86400) - libcom 是一个图像合成工具箱。\n* [LightlyTrain](https:\u002F\u002Fgithub.com\u002Flightly-ai\u002Flightly-train) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flightly-ai\u002Flightly-train.svg?cacheSeconds=86400) - 在未标记数据上预训练计算机视觉模型，适用于工业应用。\n* [MMCV](https:\u002F\u002Fgithub.com\u002Fopen-mmlab\u002Fmmcv) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fopen-mmlab\u002Fmmcv.svg?cacheSeconds=86400) - MMCV 是 OpenMMLab 的基础计算机视觉库，提供图像和视频处理、数据转换与增强、CNN 架构以及优化的 CUDA 操作等核心功能。\n* [SuperGradients](https:\u002F\u002Fgithub.com\u002FDeci-AI\u002Fsuper-gradients) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FDeci-AI\u002Fsuper-gradients.svg?cacheSeconds=86400) - SuperGradients 是一个用于训练基于 PyTorch 的计算机视觉模型的开源库。\n* [supervision](https:\u002F\u002Fgithub.com\u002Froboflow\u002Fsupervision) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Froboflow\u002Fsupervision.svg?cacheSeconds=86400) - Supervision 是一个旨在高效管理计算机视觉流程的 Python 库，提供用于模型标注、可视化和监控的工具。\n* [VideoSys](https:\u002F\u002Fgithub.com\u002FNUS-HPC-AI-Lab\u002FVideoSys) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FNUS-HPC-AI-Lab\u002FVideoSys.svg?cacheSeconds=86400) - VideoSys 支持多种扩散模型，通过我们的各种加速技术，使这些模型运行更快且消耗更少的内存。\n\n## 工业级信息检索\n* [AutoRAG](https:\u002F\u002Fgithub.com\u002FMarker-Inc-Korea\u002FAutoRAG) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FMarker-Inc-Korea\u002FAutoRAG.svg?cacheSeconds=86400) - AutoRAG 是一个 RAG（检索增强生成）AutoML（自动机器学习）工具，用于为您的数据自动寻找最佳的 RAG 流水线。\n* [BGE](https:\u002F\u002Fgithub.com\u002FFlagOpen\u002FFlagEmbedding) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FFlagOpen\u002FFlagEmbedding.svg?cacheSeconds=86400) - BGE 为搜索和 RAG（检索增强生成）构建了一站式检索工具包。\n* [Cognita](https:\u002F\u002Fgithub.com\u002Ftruefoundry\u002Fcognita) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftruefoundry\u002Fcognita.svg?cacheSeconds=86400) - Cognita 是一个 RAG（检索增强生成）框架，用于构建模块化和生产就绪的应用程序。\n* [DocArray](https:\u002F\u002Fgithub.com\u002Fdocarray\u002Fdocarray) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdocarray\u002Fdocarray.svg?cacheSeconds=86400) - DocArray 是一个用于处理嵌套、非结构化、多模态传输数据的库，包括文本、图像、音频、视频、3D 网格等。它允许深度学习工程师通过 Pythonic（符合 Python 风格）API 高效地处理、嵌入、搜索、推荐、存储和传输多模态数据。\n* [EmbedAnything](https:\u002F\u002Fgithub.com\u002FStarlightSearch\u002FEmbedAnything) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FStarlightSearch\u002FEmbedAnything.svg?cacheSeconds=86400) - EmbedAnything 是一个用 Rust 构建的极简、轻量且高性能的嵌入流水线，用于从文本、图像、音频、PDF 和其他媒体生成嵌入，支持稠密、稀疏、ONNX 和后期交互嵌入。\n* [Faiss](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Ffaiss) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffacebookresearch\u002Ffaiss.svg?cacheSeconds=86400) - Faiss 是一个用于稠密向量高效相似度搜索和聚类的库。\n* [fastRAG](https:\u002F\u002Fgithub.com\u002FIntelLabs\u002FfastRAG) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FIntelLabs\u002FfastRAG.svg?cacheSeconds=86400) - fastRAG 是一个研究框架，用于构建高效优化的检索增强生成流水线，结合了最先进的 LLM（大语言模型）和信息检索技术。\n* [GraphRAG](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Fgraphrag) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmicrosoft\u002Fgraphrag.svg?cacheSeconds=86400) - GraphRAG 是一套数据流水线和转换套件，旨在利用 LLM（大语言模型）的能力从非结构化文本中提取有意义的结构化数据。\n* [HippoRAG](https:\u002F\u002Fgithub.com\u002FOSU-NLP-Group\u002FHippoRAG) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FOSU-NLP-Group\u002FHippoRAG.svg?cacheSeconds=86400) - HippoRAG 是一种新颖的检索增强生成（RAG）框架，受人类长期记忆神经生物学启发，使 LLM（大语言模型）能够跨外部文档持续整合知识。\n* [JamAI Base](https:\u002F\u002Fgithub.com\u002FEmbeddedLLM\u002FJamAIBase) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FEmbeddedLLM\u002FJamAIBase.svg?cacheSeconds=86400) - JamAI Base 是一个开源 RAG（检索增强生成）后端平台，集成了嵌入式数据库（SQLite）和嵌入式向量数据库（LanceDB），具备托管内存和 RAG 功能。它具有内置的 LLM、向量嵌入和重排序编排与管理功能，所有功能均可通过便捷直观的类电子表格 UI 和简单的 REST API 访问。\n* [LangExtract](https:\u002F\u002Fgithub.com\u002Fgoogle\u002Flangextract) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle\u002Flangextract.svg?cacheSeconds=86400) - LangExtract 是一个 Python 库，使用 LLM（大语言模型）根据用户定义的指令从非结构化文本文档中提取结构化信息。它处理临床笔记或报告等材料，识别并组织关键细节，同时确保提取的数据与源文本对应。\n* [LightRAG](https:\u002F\u002Fgithub.com\u002FHKUDS\u002FLightRAG) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FHKUDS\u002FLightRAG.svg?cacheSeconds=86400) - 一个简单快速的检索增强生成框架。\n* [llmware](https:\u002F\u002Fgithub.com\u002Fllmware-ai\u002Fllmware) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fllmware-ai\u002Fllmware.svg?cacheSeconds=86400) - llmware 提供了一个统一框架，用于构建基于 LLM（大语言模型）的应用程序（例如，RAG、Agents（智能体）），使用小型专用模型，可以私有部署，安全地与企业知识库集成，并经济高效地针对任何业务流程进行微调和适配。\n* [Mem0](https:\u002F\u002Fgithub.com\u002Fmem0ai\u002Fmem0) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmem0ai\u002Fmem0.svg?cacheSeconds=86400) - Mem0 通过智能记忆层增强 AI 助手和智能体，实现个性化的 AI 交互。\n* [NGT](https:\u002F\u002Fgithub.com\u002Fyahoojapan\u002FNGT) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fyahoojapan\u002FNGT.svg?cacheSeconds=86400) - NGT 提供命令和库，用于在高维向量数据空间中对大量数据进行高速近似最近邻搜索。\n* [NMSLIB](https:\u002F\u002Fgithub.com\u002Fnmslib\u002Fnmslib) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fnmslib\u002Fnmslib.svg?cacheSeconds=86400) - 非度量空间库（NMSLIB）：一个高效的相似度搜索库，以及用于评估通用非度量空间中 k-NN（k-近邻）方法的工具包。\n* [Qdrant](https:\u002F\u002Fgithub.com\u002Fqdrant\u002Fqdrant) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fqdrant\u002Fqdrant.svg?cacheSeconds=86400) - 一个具有扩展过滤支持的开源向量相似度搜索引擎。\n* [R2R](https:\u002F\u002Fgithub.com\u002FSciPhi-AI\u002FR2R) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FSciPhi-AI\u002FR2R.svg?cacheSeconds=86400) - R2R（RAG to Riches）是一个综合平台，用于构建、部署和扩展 RAG（检索增强生成）应用程序，支持混合搜索、多模态支持和高级可观测性。\n* [RAGFlow](https:\u002F\u002Fgithub.com\u002Finfiniflow\u002Fragflow) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Finfiniflow\u002Fragflow.svg?cacheSeconds=86400) - RAGFlow 是一个基于深度文档理解的 RAG（检索增强生成）引擎。\n* [RAGxplorer](https:\u002F\u002Fgithub.com\u002Fgabrielchua\u002FRAGxplorer) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgabrielchua\u002FRAGxplorer.svg?cacheSeconds=86400) - RAGxplorer 是构建 RAG（检索增强生成）可视化的工具。\n* [RAG-FiT](https:\u002F\u002Fgithub.com\u002FIntelLabs\u002FRAG-FiT) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FIntelLabs\u002FRAG-FiT.svg?cacheSeconds=86400) - RAG-FiT 是一个库，旨在通过在专门创建的 RAG 增强数据集上微调模型来提高 LLM（大语言模型）使用外部信息的能力。\n* [TextWorld](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FTextWorld) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmicrosoft\u002FTextWorld.svg?cacheSeconds=86400) - TextWorld 是一个基于文本的游戏生成器和可扩展的沙箱学习环境，用于训练和测试强化学习（RL）智能体。\n* [Vanna](https:\u002F\u002Fgithub.com\u002Fvanna-ai\u002Fvanna) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvanna-ai\u002Fvanna.svg?cacheSeconds=86400) - Vanna 是一个用于 SQL 生成及相关功能的 RAG（检索增强生成）框架。\n\n## 工业级自然语言处理 (NLP)\n* [aisuite](https:\u002F\u002Fgithub.com\u002Fandrewyng\u002Faisuite) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fandrewyng\u002Faisuite.svg?cacheSeconds=86400) - aisuite 是一个简单的、统一的接口，用于连接多个**生成式 AI (Generative AI)** 提供商。\n* [Align-Anything](https:\u002F\u002Fgithub.com\u002FPKU-Alignment\u002Falign-anything) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FPKU-Alignment\u002Falign-anything.svg?cacheSeconds=86400) - Align-Anything 旨在将任何模态的**大模型 (Large Models)**（任意到任意模型），包括**大语言模型 (LLMs)**、**视觉语言模型 (VLMs)** 等与人类的意图和价值观对齐。\n* [BERTopic](https:\u002F\u002Fgithub.com\u002FMaartenGr\u002FBERTopic) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FMaartenGr\u002FBERTopic.svg?cacheSeconds=86400) - BERTopic 是一种主题建模技术，利用 **transformers** 和 c-TF-IDF 创建密集聚类，从而生成易于解释的主题，同时保留主题描述中的重要词汇。\n* [Burr](https:\u002F\u002Fgithub.com\u002Fdagworks-inc\u002Fburr) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdagworks-inc\u002Fburr.svg?cacheSeconds=86400) - Burr 帮助您开发做出决策的应用程序（聊天机器人、**智能体 (Agent)**、模拟）。它具备生产就绪的功能（遥测、持久化、部署等）以及开源、免费且本地优先的 Burr UI。\n* [CodeTF](https:\u002F\u002Fgithub.com\u002Fsalesforce\u002FCodeTF) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fsalesforce\u002FCodeTF.svg?cacheSeconds=86400) - CodeTF 是一个一站式基于 Python 的 **transformer** 库，用于代码**大语言模型 (Code LLMs)** 和代码智能，为代码摘要、翻译、代码生成等代码智能任务的训练和推理提供无缝接口。 \n* [Dify](https:\u002F\u002Fgithub.com\u002Flanggenius\u002Fdify) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flanggenius\u002Fdify.svg?cacheSeconds=86400) - Dify 是一个开源的**大语言模型 (LLM)** 应用开发平台，其直观的界面结合了**代理 AI (Agentic AI)** 工作流、**RAG (检索增强生成)** 管道、**智能体 (Agent)** 能力、模型管理、可观测性功能等，让您能迅速从原型走向生产环境。\n* [dspy](https:\u002F\u002Fgithub.com\u002Fstanfordnlp\u002Fdspy) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fstanfordnlp\u002Fdspy.svg?cacheSeconds=86400) - 一个用于使用**基础模型 (Foundation Models)** 进行编程的框架。\n* [Dust](https:\u002F\u002Fgithub.com\u002Fdust-tt\u002Fdust) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdust-tt\u002Fdust.svg?cacheSeconds=86400) - Dust 协助设计和部署**大语言模型**应用程序。\n* [ESPnet](https:\u002F\u002Fgithub.com\u002Fespnet\u002Fespnet) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fespnet\u002Fespnet.svg?cacheSeconds=86400) - ESPnet 是一个端到端语音处理工具包。\n* [FastChat](https:\u002F\u002Fgithub.com\u002Flm-sys\u002FFastChat) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flm-sys\u002FFastChat.svg?cacheSeconds=86400) - FastChat 是一个用于训练、服务和评估基于**大语言模型**的聊天机器人的开放平台。\n* [Flair](https:\u002F\u002Fgithub.com\u002FflairNLP\u002Fflair) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FflairNLP\u002Fflair.svg?cacheSeconds=86400) - 由 Zalando 开发的简单框架，用于最先进的**自然语言处理 (NLP)**，直接基于 **PyTorch** 构建。\n* [Gensim](https:\u002F\u002Fgithub.com\u002Fpiskvorky\u002Fgensim) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpiskvorky\u002Fgensim.svg?cacheSeconds=86400) - Gensim 是一个用于大型语料库的主题建模、文档索引和相似度检索的 Python 库。\n* [gpt-fast](https:\u002F\u002Fgithub.com\u002Fmeta-pytorch\u002Fgpt-fast) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmeta-pytorch\u002Fgpt-fast.svg?cacheSeconds=86400) - 简单高效的**原生 PyTorch transformer** 文本生成。\n* [h2oGPT](https:\u002F\u002Fgithub.com\u002Fh2oai\u002Fh2ogpt) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fh2oai\u002Fh2ogpt.svg?cacheSeconds=86400) - h2oGPT 是一个开源**生成式 AI**，赋予像您这样的组织拥有**大语言模型**的能力，同时保留您的数据所有权。\n* [Haystack](https:\u002F\u002Fgithub.com\u002Fdeepset-ai\u002Fhaystack) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdeepset-ai\u002Fhaystack.svg?cacheSeconds=86400) - Haystack 是一个开源**自然语言处理 (NLP)** 框架，用于使用 **Transformer** 模型和**LLMs (GPT-3 等)** 与您的数据进行交互。Haystack 提供生产就绪的工具，可快速构建类似 ChatGPT 的问答、语义搜索、文本生成等功能。\n* [Interactive Composition Explorer](https:\u002F\u002Fgithub.com\u002Foughtinc\u002Fice) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Foughtinc\u002Fice.svg?cacheSeconds=86400) - ICE 是用于**语言模型**程序的 Python 库和追踪可视化工具。\n* [Jan](https:\u002F\u002Fgithub.com\u002Fjanhq\u002Fjan) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjanhq\u002Fjan.svg?cacheSeconds=86400) - Jan 是一个开源的 ChatGPT 替代品，可在您的计算机上 100% 离线运行，允许您下载并在本地运行 **LLMs**，享有完全的控制权和隐私保护。\n* [Lamini](https:\u002F\u002Fgithub.com\u002Flamini-ai\u002Flamini) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flamini-ai\u002Flamini.svg?cacheSeconds=86400) - Lamini 是一个**LLM** 引擎，用于快速定制模型。\n* [LangChain](https:\u002F\u002Fgithub.com\u002Flangchain-ai\u002Flangchain) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flangchain-ai\u002Flangchain.svg?cacheSeconds=86400) - LangChain 通过组合性协助构建基于**LLMs** 的应用程序。\n* [LlamaIndex](https:\u002F\u002Fgithub.com\u002Frun-llama\u002Fllama_index) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Frun-llama\u002Fllama_index.svg?cacheSeconds=86400) - LlamaIndex (GPT Index) 是您的 **LLM** 应用程序的数据框架。\n* [LLaMA](https:\u002F\u002Fgithub.com\u002Fmeta-llama\u002Fllama) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmeta-llama\u002Fllama.svg?cacheSeconds=86400) - LLaMA 旨在作为一个最小化、可修改且易读的示例，用于加载 LLaMA (arXiv) 模型并运行**推理**。\n* [LLaMA-Factory](https:\u002F\u002Fgithub.com\u002Fhiyouga\u002FLLaMA-Factory) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhiyouga\u002FLLaMA-Factory.svg?cacheSeconds=86400) - LLaMA-Factory 使得无需代码的 CLI 和 Web UI 即可轻松**微调** 100 多种**大语言模型**。\n* [LLMBox](https:\u002F\u002Fgithub.com\u002FAlpha-VLLM\u002FLLaMA2-Accessory) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FAlpha-VLLM\u002FLLaMA2-Accessory.svg?cacheSeconds=86400) - LLMBox 是一个实现 **LLMs** 的综合库，包括统一的训练管道和全面的模型评估。\n* [LLaMA2-Accessory](https:\u002F\u002Fgithub.com\u002FRUCAIBox\u002FLLMBox) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FRUCAIBox\u002FLLMBox.svg?cacheSeconds=86400) - LLaMA2-Accessory 是一个用于**大语言模型 (LLMs)** 和多模态 **LLM** 预训练、微调和部署的开源工具包。\n* [LMFlow](https:\u002F\u002Fgithub.com\u002FOptimalScale\u002FLMFlow) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FOptimalScale\u002FLMFlow.svg?cacheSeconds=86400) - LMFlow 是一个可扩展、便捷且高效的工具箱，用于**微调**大型**机器学习模型**。\n* [Megatron-LM](https:\u002F\u002Fgithub.com\u002FNVIDIA\u002FMegatron-LM) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FNVIDIA\u002FMegatron-LM.svg?cacheSeconds=86400) - Megatron-LM 是一个高度优化且高效的用于训练**大语言模型**的库。\n* [MindNLP](https:\u002F\u002Fgithub.com\u002Fmindspore-lab\u002Fmindnlp) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmindspore-lab\u002Fmindnlp.svg?cacheSeconds=86400) - MindNLP 是一个基于 MindSpore 的易用且高性能的**自然语言处理 (NLP)** 和 **LLM** 框架，兼容 Huggingface 的模型和数据集。\n* [MLC LLM](https:\u002F\u002Fgithub.com\u002Fmlc-ai\u002Fmlc-llm) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmlc-ai\u002Fmlc-llm.svg?cacheSeconds=86400) - MLC LLM 是一个通用解决方案，允许任何**语言模型**在多样化的硬件后端和本地应用程序上原生部署，并为每个人提供一个高效框架，以便针对各自用例进一步优化模型性能。\n* [mlx-lm](https:\u002F\u002Fgithub.com\u002Fml-explore\u002Fmlx-lm) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fml-explore\u002Fmlx-lm.svg?cacheSeconds=86400) - MLX LM 是一个 Python 包，用于在 Apple Silicon 上使用 MLX 生成文本和**微调大语言模型**，具有与 Hugging Face Hub 的集成以及对量化和分布式推理的支持。\n* [Ollama](https:\u002F\u002Fgithub.com\u002Follama\u002Follama) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Follama\u002Follama.svg?cacheSeconds=86400) - 在本地快速上手**大语言模型**。\n* [olmOCR](https:\u002F\u002Fgithub.com\u002Fallenai\u002Folmocr) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fallenai\u002Folmocr.svg?cacheSeconds=86400) - olmOCR 是一个用于训练**语言模型**以处理真实环境中 PDF 文档的工具包。\n* [PaddleNLP](https:\u002F\u002Fgithub.com\u002FPaddlePaddle\u002FPaddleNLP) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FPaddlePaddle\u002FPaddleNLP.svg?cacheSeconds=86400) - PaddleNLP 是基于 PaddlePaddle 深度学习框架的**大型语言模型 (LLM)** 开发套件，支持高效的大模型训练、无损压缩以及各种硬件设备上的高性能推理。\n* [PyLLMs](https:\u002F\u002Fgithub.com\u002Fkagisearch\u002Fpyllms) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fkagisearch\u002Fpyllms.svg?cacheSeconds=86400) - PyLLMs 是一个极简 Python 库，用于连接各种**语言模型 (LLMs)**，并内置了模型性能基准测试。\n* [Semantic Kernel](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Fsemantic-kernel) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmicrosoft\u002Fsemantic-kernel.svg?cacheSeconds=86400) - Semantic Kernel 是一个 SDK，它将 OpenAI、Azure OpenAI 和 Hugging Face 等**大型语言模型 (LLMs)** 与 C#、Python 和 Java 等传统编程语言集成。Semantic Kernel 通过允许您定义插件来实现这一点，这些插件只需几行代码即可链接在一起。\n* [Sentence Transformers](https:\u002F\u002Fgithub.com\u002FUKPLab\u002Fsentence-transformers) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FUKPLab\u002Fsentence-transformers.svg?cacheSeconds=86400) - Sentence Transformers 提供了一种简单的方法来计算句子、段落和图像的密集向量表示。\n* [SpaCy](https:\u002F\u002Fgithub.com\u002Fexplosion\u002FspaCy) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fexplosion\u002FspaCy.svg?cacheSeconds=86400) - spaCy 是一个用于 Python 和 Cython 的高级**自然语言处理**库。\n* [SWIFT](https:\u002F\u002Fgithub.com\u002Fmodelscope\u002Fms-swift) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmodelscope\u002Fms-swift.svg?cacheSeconds=86400) - SWIFT 是一个可扩展的轻量级**深度学习**模型微调基础设施。\n* [Tensorflow Lingvo](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Flingvo) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftensorflow\u002Flingvo.svg?cacheSeconds=86400) - 一个用于在 **TensorFlow** 中构建**神经网络**的框架，特别是序列模型。\n* [Tensorflow Text](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Ftext) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftensorflow\u002Ftext.svg?cacheSeconds=86400) - **TensorFlow** Text 提供了一系列与文本相关的类和操作，可直接用于 **TensorFlow 2.0**。\n* [ToolBench](https:\u002F\u002Fgithub.com\u002FOpenBMB\u002FToolBench) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FOpenBMB\u002FToolBench.svg?cacheSeconds=86400) - ToolBench 是一个用于训练、服务和评估用于工具学习的**大语言模型**的开放平台。\n* [Transformers](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Ftransformers) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Ftransformers.svg?cacheSeconds=86400) - Huggingface 的**自然语言处理 (NLP)** 最先进预训练模型库。\n\n## 工业级推荐系统\n* [EasyRec](https:\u002F\u002Fgithub.com\u002Falibaba\u002FEasyRec) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Falibaba\u002FEasyRec.svg?cacheSeconds=86400) - EasyRec 是一个用于大规模推荐算法的框架。\n* [Gorse](https:\u002F\u002Fgithub.com\u002Fgorse-io\u002Fgorse) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgorse-io\u002Fgorse.svg?cacheSeconds=86400) - Gorse 旨在成为一款通用的开源推荐系统，能够快速集成到多种在线服务中。\n* [Merlin](https:\u002F\u002Fgithub.com\u002FNVIDIA-Merlin\u002FMerlin) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FNVIDIA-Merlin\u002FMerlin.svg?cacheSeconds=86400) - NVIDIA Merlin 是一个开源库，提供端到端的 GPU 加速推荐系统，涵盖从特征工程、预处理到训练深度学习模型以及在生产环境中进行推理的全过程。\n* [Recommenders](https:\u002F\u002Fgithub.com\u002Frecommenders-team\u002Frecommenders) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Frecommenders-team\u002Frecommenders.svg?cacheSeconds=86400) - Recommenders 包含构建推荐系统的基准测试和最佳实践，以 Jupyter 笔记本的形式提供。\n* [TorchRec](https:\u002F\u002Fgithub.com\u002Fmeta-pytorch\u002Ftorchrec) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmeta-pytorch\u002Ftorchrec.svg?cacheSeconds=86400) - TorchRec 是一个面向 PyTorch 的领域库，旨在为大规模推荐系统（RecSys）提供所需的常见稀疏性和并行性原语。\n\n## 工业级强化学习 (Reinforcement Learning)\n* [Acme](https:\u002F\u002Fgithub.com\u002Fgoogle-deepmind\u002Facme) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle-deepmind\u002Facme.svg?cacheSeconds=86400) - Acme 是一个强化学习 (RL) 构建块库，致力于提供简单、高效且可读性强的智能体。\n* [AReaL](https:\u002F\u002Fgithub.com\u002FinclusionAI\u002FAReaL) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FinclusionAI\u002FAReaL.svg?cacheSeconds=86400) - AReaL 是一个强化学习库。\n* [ChatLearn](https:\u002F\u002Fgithub.com\u002Falibaba\u002FChatLearn) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Falibaba\u002FChatLearn.svg?cacheSeconds=86400) - ChatLearn 是一个面向大语言模型 (LLM) 的灵活高效的强化学习 (RL) 训练框架，支持分布式训练引擎 (FSDP2, Megatron) 和推理引擎 (vLLM, SGLang)，并采用 GRPO 和 GSPO 等现代强化学习算法。\n* [CleanRL](https:\u002F\u002Fgithub.com\u002Fvwxyzjn\u002Fcleanrl) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvwxyzjn\u002Fcleanrl.svg?cacheSeconds=86400) - CleanRL 是一个深度强化学习库，提供高质量的文件级实现，具备对研究友好的特性。该实现简洁明了，但我们可以利用 AWS Batch 扩展以运行数千个实验。\n* [CompilerGym](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002FCompilerGym) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffacebookresearch\u002FCompilerGym.svg?cacheSeconds=86400) - CompilerGym 是一个用于编译器任务的易于使用且高性能的强化学习 (RL) 环境库。\n* [d3rlpy](https:\u002F\u002Fgithub.com\u002Ftakuseno\u002Fd3rlpy) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftakuseno\u002Fd3rlpy.svg?cacheSeconds=86400) - d3rlpy 是供从业者和研究人员使用的离线深度强化学习库。\n* [D4RL](https:\u002F\u002Fgithub.com\u002FFarama-Foundation\u002FD4RL) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FFarama-Foundation\u002FD4RL.svg?cacheSeconds=86400) - D4RL 是一个用于离线强化学习 (RL) 的开源基准。\n* [Dopamine](https:\u002F\u002Fgithub.com\u002Fgoogle\u002Fdopamine) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle\u002Fdopamine.svg?cacheSeconds=86400) - Dopamine 是一个用于快速原型开发强化学习 (RL) 算法的研究框架。它旨在满足对小型、易于理解的代码库的需求，用户可以在其中自由地尝试各种大胆的想法（推测性研究）。\n* [EvoTorch](https:\u002F\u002Fgithub.com\u002Fnnaisense\u002Fevotorch) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fnnaisense\u002Fevotorch.svg?cacheSeconds=86400) - EvoTorch 是在 NNAISENSE 开发的开源进化计算库，基于 PyTorch 构建。\n* [FinRL](https:\u002F\u002Fgithub.com\u002FAI4Finance-Foundation\u002FFinRL) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FAI4Finance-Foundation\u002FFinRL.svg?cacheSeconds=86400) - FinRL 是首个展示金融强化学习 (RL) 巨大潜力的开源框架。\n* [Gymnasium](https:\u002F\u002Fgithub.com\u002FFarama-Foundation\u002FGymnasium) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FFarama-Foundation\u002FGymnasium.svg?cacheSeconds=86400) - Gymnasium 是一个开源 Python 库，通过提供标准 API 来连接学习算法与环境，以及一套符合该 API 的标准环境集，从而用于开发和比较强化学习 (RL) 算法。\n* [Gymnasium-Robotics](https:\u002F\u002Fgithub.com\u002FFarama-Foundation\u002FGymnasium-Robotics) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FFarama-Foundation\u002FGymnasium-Robotics.svg?cacheSeconds=86400) - Gymnasium-Robotics 包含一系列使用 Gymnasium API 的强化学习 (RL) 机器人环境。这些环境使用 MuJoCo 物理引擎和维护的 mujoco python 绑定运行。\n* [Jumanji](https:\u002F\u002Fgithub.com\u002Finstadeepai\u002Fjumanji) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Finstadeepai\u002Fjumanji.svg?cacheSeconds=86400) - Jumanji 是一套用 JAX 编写的强化学习 (RL) 环境套件，为行业驱动的研究提供干净、硬件加速的环境。\n* [MARLlib](https:\u002F\u002Fgithub.com\u002FReplicable-MARL\u002FMARLlib) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FReplicable-MARL\u002FMARLlib.svg?cacheSeconds=86400) - MARLlib 是一个基于 RLlib 的综合多智能体强化学习 (MARL) 算法库。它为 MARL 研究社区提供了一个统一的平台，用于构建、训练和评估多智能体强化学习算法。\n* [Mava](https:\u002F\u002Fgithub.com\u002Finstadeepai\u002FMava) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Finstadeepai\u002FMava.svg?cacheSeconds=86400) - Mava 是一个用于 JAX 中分布式多智能体强化学习 (MARL) 的框架。\n* [Melting Pot](https:\u002F\u002Fgithub.com\u002Fgoogle-deepmind\u002Fmeltingpot) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle-deepmind\u002Fmeltingpot.svg?cacheSeconds=86400) - Melting Pot 是一套用于多智能体强化学习 (MARL) 的测试场景套件。\n* [MetaDrive](https:\u002F\u002Fgithub.com\u002Fmetadriverse\u002Fmetadrive) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmetadriverse\u002Fmetadrive.svg?cacheSeconds=86400) - MetaDrive 是一个驾驶模拟器，由多种可泛化的强化学习 (RL) 驾驶场景组成。\n* [Minigrid](https:\u002F\u002Fgithub.com\u002FFarama-Foundation\u002FMinigrid) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FFarama-Foundation\u002FMinigrid.svg?cacheSeconds=86400) - Minigrid 库包含一系列离散网格世界环境，用于进行强化学习 (RL) 研究。这些环境遵循 Gymnasium 标准 API，设计为轻量级、快速且易于定制。\n* [MiniWorld](https:\u002F\u002Fgithub.com\u002FFarama-Foundation\u002FMiniworld) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FFarama-Foundation\u002FMiniworld.svg?cacheSeconds=86400) - MiniWorld 是一个用于强化学习 (RL) 和机器人研究的极简主义 3D 室内环境模拟器。\n* [ML-Agents](https:\u002F\u002Fgithub.com\u002FUnity-Technologies\u002Fml-agents) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FUnity-Technologies\u002Fml-agents.svg?cacheSeconds=86400) - ML-Agents 是一个开源项目，使游戏和模拟能够作为训练强化学习 (RL) 智能体的环境。\n* [MLGym](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002FMLGym) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffacebookresearch\u002FMLGym.svg?cacheSeconds=86400) - MLGym 是一个 Gym 环境，支持针对机器学习任务训练此类智能体的强化学习 (RL) 算法研究。\n* [MushroomRL](https:\u002F\u002Fgithub.com\u002FMushroomRL\u002Fmushroom-rl) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FMushroomRL\u002Fmushroom-rl.svg?cacheSeconds=86400) - MushroomRL 是一个 Python 强化学习 (RL) 库，其模块化允许轻松使用知名的 Python 张量计算库（例如 PyTorch, Tensorflow）和强化学习基准（例如 OpenAI Gym, PyBullet, Deepmind Control Suite）。\n* [OmniSafe](https:\u002F\u002Fgithub.com\u002FPKU-Alignment\u002Fomnisafe) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FPKU-Alignment\u002Fomnisafe.svg?cacheSeconds=86400) - OmniSafe 是一个旨在加速安全强化学习 (RL) 研究的架构框架。\n* [OpenRLHF](https:\u002F\u002Fgithub.com\u002FOpenRLHF\u002FOpenRLHF) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FOpenRLHF\u002FOpenRLHF.svg?cacheSeconds=86400) - OpenRLHF 是一个用于人类反馈强化学习 (RLHF) 的开源框架。\n* [PARL](https:\u002F\u002Fgithub.com\u002FPaddlePaddle\u002FPARL) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FPaddlePaddle\u002FPARL.svg?cacheSeconds=86400) - PARL 是一个灵活且高效的强化学习 (RL) 框架。\n* [PettingZoo](https:\u002F\u002Fgithub.com\u002FFarama-Foundation\u002FPettingZoo) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FFarama-Foundation\u002FPettingZoo.svg?cacheSeconds=86400) - PettingZoo 是一个用于多智能体强化学习 (MARL) 研究的 Python 库，类似于多智能体版本的 Gymnasium。\n* [ranx](https:\u002F\u002Fgithub.com\u002FAmenRa\u002Franx) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FAmenRa\u002Franx.svg?cacheSeconds=86400) - ranx 是一个用 Python 实现的快速排名评估指标库，利用 Numba 进行高速向量运算和自动并行化。\n* [RL4CO](https:\u002F\u002Fgithub.com\u002Fai4co\u002Frl4co) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fai4co\u002Frl4co.svg?cacheSeconds=86400) - RL4CO 是一个 PyTorch 库，涵盖组合优化 (CO) 相关的所有强化学习 (RL) 内容。\n* [RL2](https:\u002F\u002Fgithub.com\u002FChenmienTan\u002FRL2) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FChenmienTan\u002FRL2.svg?cacheSeconds=86400) - RL2 是一个强化学习 (RL) 库。\n* [RLinf](https:\u002F\u002Fgithub.com\u002FRLinf\u002FRLinf) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FRLinf\u002FRLinf.svg?cacheSeconds=86400) - RLinf 是一个强化学习 (RL) 库。\n* [ROLL](https:\u002F\u002Fgithub.com\u002Falibaba\u002FROLL) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Falibaba\u002FROLL.svg?cacheSeconds=86400) - ROLL 是一个强化学习 (RL) 库。\n* [skrl](https:\u002F\u002Fgithub.com\u002FToni-SM\u002Fskrl) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FToni-SM\u002Fskrl.svg?cacheSeconds=86400) - skrl 是一个用 Python（使用 PyTorch）编写的开源模块化强化学习 (RL) 库，设计重点在于算法实现的易读性、简单性和透明度。\n* [slime](https:\u002F\u002Fgithub.com\u002FTHUDM\u002Fslime) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FTHUDM\u002Fslime.svg?cacheSeconds=86400) - slime 是一个用于 RL 扩展的大语言模型 (LLM) 后训练框架。\n* [Stable Baselines](https:\u002F\u002Fgithub.com\u002FDLR-RM\u002Fstable-baselines3) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FDLR-RM\u002Fstable-baselines3.svg?cacheSeconds=86400) - OpenAI Baselines 的一个分支，实现了强化学习 (RL) 算法。\n* [TF-Agents](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Fagents) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftensorflow\u002Fagents.svg?cacheSeconds=86400) - 一个可靠、可扩展且易于使用的 TensorFlow 库，用于上下文赌博机和强化学习 (RL)。\n* [TorchRL](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Frl) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpytorch\u002Frl.svg?cacheSeconds=86400) - TorchRL 是一个用于 PyTorch 的开源强化学习 (RL) 库。\n* [TRL](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Ftrl) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Ftrl.svg?cacheSeconds=86400) - 使用强化学习 (RL) 训练 Transformer 语言模型。 \n* [veRL](https:\u002F\u002Fgithub.com\u002Fvolcengine\u002FveRL) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvolcengine\u002FveRL.svg?cacheSeconds=86400) - veRL (HybridFlow) 是一个专为大语言模型 (LLM) 设计的灵活、高效且工业级的强化学习 (HF) 训练框架。\n\n## 工业级机器人\n* [AI2-THOR](https:\u002F\u002Fgithub.com\u002Fallenai\u002Fai2thor) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fallenai\u002Fai2thor.svg?cacheSeconds=86400) - AI2-THOR 是一个面向 AI 智能体（AI agents）的近照片级真实可交互框架。\n* [Habitat-Sim](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fhabitat-sim) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffacebookresearch\u002Fhabitat-sim.svg?cacheSeconds=86400) - Habitat-Sim 是一个用于具身人工智能（Embodied AI）研究的灵活、高性能 3D 模拟器。\n* [IsaacLab](https:\u002F\u002Fgithub.com\u002Fisaac-sim\u002FIsaacLab) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fisaac-sim\u002FIsaacLab.svg?cacheSeconds=86400) - IsaacLab 是一个统一且模块化的机器人学习框架，依托 NVIDIA Isaac Sim 构建。\n* [robosuite](https:\u002F\u002Fgithub.com\u002FARISE-Initiative\u002Frobosuite) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FARISE-Initiative\u002Frobosuite.svg?cacheSeconds=86400) - robosuite 是一个由 MuJoCo 物理引擎驱动的仿真框架，用于机器人学习。\n* [RoboVerse](https:\u002F\u002Fgithub.com\u002FRoboVerseOrg\u002FRoboVerse) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FRoboVerseOrg\u002FRoboVerse.svg?cacheSeconds=86400) - RoboVerse 是一个拥有多样化环境的综合性机器人仿真平台。\n\n## 工业级可视化\n* [Apache ECharts](https:\u002F\u002Fgithub.com\u002Fapache\u002Fecharts) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fecharts.svg?cacheSeconds=86400) - Apache ECharts 是一个功能强大、交互式的浏览器图表和数据可视化库。\n* [Apache Superset](https:\u002F\u002Fgithub.com\u002Fapache\u002Fsuperset) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fsuperset.svg?cacheSeconds=86400) - 一款现代化的、面向企业的商业智能 (Business Intelligence) Web 应用程序。\n* [Bokeh](https:\u002F\u002Fgithub.com\u002Fbokeh\u002Fbokeh) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fbokeh\u002Fbokeh.svg?cacheSeconds=86400) - Bokeh 是一个 Python 交互式可视化库，能够在现代 Web 浏览器中实现美观且有意义的可视化展示。\n* [Bread Dataset Viewer](https:\u002F\u002Fgithub.com\u002FBread-Technologies\u002Fmle_vscode_extension) - 一个 VS Code 扩展，用于在编辑器内直接查看和探索大型机器学习 (Machine Learning) 数据集（CSV、JSON、Parquet 等），而不会导致集成开发环境 (IDE) 崩溃。\n* [Bread WandB Viewer](https:\u002F\u002Fgithub.com\u002FBread-Technologies\u002Fbread_wandb_viewer_extension) - 一个用于在 IDE 内查看 Weights & Biases 实验、日志和工件的 VS Code 扩展，消除了切换到 Web 用户界面 (Web UI) 的需求，并通过 100% 离线运行来保护数据隐私。\n* [Data Formulator](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Fdata-formulator) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmicrosoft\u002Fdata-formulator.svg?cacheSeconds=86400) - 利用人工智能 (AI) 迭代地转换数据并创建丰富的可视化效果。\n* [ggplot2](https:\u002F\u002Fgithub.com\u002Ftidyverse\u002Fggplot2) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftidyverse\u002Fggplot2.svg?cacheSeconds=86400) - R 语言的图形语法实现。\n* [gradio](https:\u002F\u002Fgithub.com\u002Fgradio-app\u002Fgradio) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgradio-app\u002Fgradio.svg?cacheSeconds=86400) - 仅通过编写 Python 即可快速创建和分享模型演示。在浏览器中交互式调试模型，获取协作人员反馈，并生成公开链接，无需部署任何内容。\n* [Kangas](https:\u002F\u002Fgithub.com\u002Fcomet-ml\u002Fkangas) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcomet-ml\u002Fkangas.svg?cacheSeconds=86400) - Kangas 是用于探索、分析和可视化大规模多媒体数据的工具。它提供直观的 Python API 用于记录大型数据表，以及直观可视化界面以针对数据集执行复杂查询。\n* [matplotlib](https:\u002F\u002Fgithub.com\u002Fmatplotlib\u002Fmatplotlib) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmatplotlib\u002Fmatplotlib.svg?cacheSeconds=86400) - 一个 Python 2D 绘图库，可在跨平台的多种硬拷贝格式和交互式环境中生成出版级图表。\n* [Netron](https:\u002F\u002Fgithub.com\u002Flutzroeder\u002Fnetron) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flutzroeder\u002Fnetron.svg?cacheSeconds=86400) - Netron 是神经网络 (Neural Network)、深度学习 (Deep Learning) 和机器学习的模型查看器。\n* [Perspective](https:\u002F\u002Fgithub.com\u002Ffinos\u002Fperspective) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffinos\u002Fperspective.svg?cacheSeconds=86400) 通过 WebAssembly 进行流式透视可视化。\n* [Plotly](https:\u002F\u002Fgithub.com\u002Fplotly\u002Fplotly.py) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fplotly\u002Fplotly.py.svg?cacheSeconds=86400) - Python 交互式、开源且基于浏览器的绘图库。\n* [Redash](https:\u002F\u002Fgithub.com\u002Fgetredash\u002Fredash) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgetredash\u002Fredash.svg?cacheSeconds=86400) - Redash 是一个开源可视化框架，旨在允许轻松访问利用多个后端的大数据集。\n* [Rerun](https:\u002F\u002Fgithub.com\u002Frerun-io\u002Frerun) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Frerun-io\u002Frerun.svg?cacheSeconds=86400) - Rerun 是用于记录、存储、查询和可视化多模态数据的开源软件开发工具包 (SDK)，专为机器人学 (Robotics)、计算机视觉 (Computer Vision) 和空间人工智能 (Spatial AI) 设计。\n* [seaborn](https:\u002F\u002Fgithub.com\u002Fmwaskom\u002Fseaborn) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmwaskom\u002Fseaborn.svg?cacheSeconds=86400) - Seaborn 是基于 matplotlib 的 Python 可视化库。它提供了绘制精美统计图形的高级接口。\n* [Spotlight](https:\u002F\u002Fgithub.com\u002FRenumics\u002Fspotlight) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FRenumics\u002Fspotlight.svg?cacheSeconds=86400) - Spotlight 帮助您识别关键数据段和模型故障模式。它使您能够通过策划高质量数据集来构建和维护可靠的机器学习模型。\n* [Streamlit](https:\u002F\u002Fgithub.com\u002Fstreamlit\u002Fstreamlit) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fstreamlit\u002Fstreamlit.svg?cacheSeconds=86400) - 让您使用看似简单的 Python 脚本为机器学习项目创建应用程序。它支持热重载 (hot-reloading)，因此您在编辑和保存文件时，应用程序会实时更新。\n* [tensorboardX](https:\u002F\u002Fgithub.com\u002Flanpa\u002FtensorboardX) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flanpa\u002FtensorboardX.svg?cacheSeconds=86400) - 通过简单的函数调用编写 TensorBoard 事件。\n* [TensorBoard](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Ftensorboard) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftensorflow\u002Ftensorboard.svg?cacheSeconds=86400) - TensorBoard 是机器学习实验的可视化工具包，使得托管、跟踪和共享机器学习实验变得简单。\n* [Transformer Explainer](https:\u002F\u002Fgithub.com\u002Fpoloclub\u002Ftransformer-explainer) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpoloclub\u002Ftransformer-explainer.svg?cacheSeconds=86400) - Transformer Explainer 是一款交互式可视化工具，旨在帮助任何人了解基于 Transformer 的模型（如 GPT）的工作原理。\n* [Vega-Altair](https:\u002F\u002Fgithub.com\u002Fvega\u002Faltair) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvega\u002Faltair.svg?cacheSeconds=86400) - Vega-Altair 是 Python 的声明式统计可视化库。\n* [ydata-profiling](https:\u002F\u002Fgithub.com\u002Fydataai\u002Fydata-profiling) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fydataai\u002Fydata-profiling.svg?cacheSeconds=86400) - ydata-profiling 提供了一行代码完成探索性数据分析 (EDA) 的一致且快速的解决方案。\n\n## 元数据管理\n* [Amundsen](https:\u002F\u002Fgithub.com\u002Famundsen-io\u002Famundsen) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Famundsen-io\u002Famundsen.svg?cacheSeconds=86400) - Amundsen 是一个元数据驱动 (Metadata-driven) 的应用程序，旨在提高数据分析师、数据科学家和工程师在与数据交互时的生产力。\n* [Apache Atlas](https:\u002F\u002Fgithub.com\u002Fapache\u002Fatlas) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapache\u002Fatlas.svg?cacheSeconds=86400) - Apache Atlas 框架是一套可扩展的核心基础治理服务 (Governance Services)，使企业能够在 Hadoop 内有效且高效地满足其合规要求，并允许与整个企业数据生态系统集成。\n* [DataHub](https:\u002F\u002Fgithub.com\u002Fdatahub-project\u002Fdatahub) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdatahub-project\u002Fdatahub.svg?cacheSeconds=86400) - DataHub 是 LinkedIn 开发的通用元数据搜索与发现工具 (Metadata Search & Discovery Tool)。\n* [Marquez](https:\u002F\u002Fgithub.com\u002FMarquezProject\u002Fmarquez) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FMarquezProject\u002Fmarquez.svg?cacheSeconds=86400) - Marquez 是一个开源元数据服务，用于收集、聚合和可视化数据生态系统的元数据。\n* [Metacat](https:\u002F\u002Fgithub.com\u002FNetflix\u002Fmetacat) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FNetflix\u002Fmetacat.svg?cacheSeconds=86400) - Metacat 是一个统一的元数据探索 API 服务。Metacat 专注于解决以下问题：1) 元数据系统的联邦视图 (Federated Views)；2) 关于数据集的任意元数据存储；3) 元数据发现。\n* [ML Metadata](https:\u002F\u002Fgithub.com\u002Fgoogle\u002Fml-metadata) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle\u002Fml-metadata.svg?cacheSeconds=86400) - 一个用于记录和检索与机器学习 (ML) 开发者和数据科学家工作流相关联的元数据的库。\n\n## 模型、数据与实验管理\n* [Aim](https:\u002F\u002Fgithub.com\u002Faimhubio\u002Faim) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Faimhubio\u002Faim.svg?cacheSeconds=86400) - 一种超级简单的方式来记录、搜索和比较人工智能 (AI) 实验。\n* [ClearML](https:\u002F\u002Fgithub.com\u002Fclearml\u002Fclearml) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fclearml\u002Fclearml.svg?cacheSeconds=86400) - 人工智能的自动化实验管理与版本控制 (Version Control)（前身为 Trains）。\n* [DataHub](https:\u002F\u002Fgithub.com\u002Fdatahub-project\u002Fdatahub) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdatahub-project\u002Fdatahub.svg?cacheSeconds=86400) - DataHub 是现代数据栈 (Modern Data Stack) 的开源数据目录。\n* [Dolt](https:\u002F\u002Fgithub.com\u002Fdolthub\u002Fdolt) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdolthub\u002Fdolt.svg?cacheSeconds=86400) - Dolt 是一个 SQL 数据库，你可以像 Git 仓库一样对其进行分叉 (Fork)、克隆 (Clone)、分支 (Branch)、合并 (Merge)、推送 (Push) 和拉取 (Pull)。\n* [DVC](https:\u002F\u002Fgithub.com\u002Fiterative\u002Fdvc) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fiterative\u002Fdvc.svg?cacheSeconds=86400) - DVC (Data Version Control，数据版本控制) 是一个 Git 分支，允许对模型进行版本管理。\n* [HuggingFace Model Downloader](https:\u002F\u002Fgithub.com\u002Fbodaay\u002FHuggingFaceModelDownloader) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fbodaay\u002FHuggingFaceModelDownloader.svg?cacheSeconds=86400) - HuggingFace Model Downloader 是一个用于从 HuggingFace 网站下载模型和数据集的实用工具。它提供 LFS 文件的多线程下载，并通过 SHA256 校验和验证确保下载模型的完整性。\n* [Keepsake](https:\u002F\u002Fgithub.com\u002Freplicate\u002Fkeepsake) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Freplicate\u002Fkeepsake.svg?cacheSeconds=86400) - 机器学习的版本控制。\n* [KitOps](https:\u002F\u002Fgithub.com\u002Fjozu-ai\u002Fkitops) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjozu-ai\u002Fkitops.svg?cacheSeconds=86400) - KitOps 是一个基于开放标准的 AI\u002FML 项目打包和版本控制系统，可与所有你正在使用的 AI\u002FML、开发和 DevOps 工具配合使用。\n* [lakeFS](https:\u002F\u002Fgithub.com\u002Ftreeverse\u002FlakeFS) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftreeverse\u002FlakeFS.svg?cacheSeconds=86400) - 建立在对象存储之上的可重复、原子化且带版本的数据湖。\n* [MLflow](https:\u002F\u002Fgithub.com\u002Fmlflow\u002Fmlflow) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmlflow\u002Fmlflow.svg?cacheSeconds=86400) - 用于管理机器学习 (ML) 生命周期的开源平台，包括实验、可复现性和部署。\n* [Neptune](https:\u002F\u002Fgithub.com\u002Fneptune-ai\u002Fneptune-client) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fneptune-ai\u002Fneptune-client.svg?cacheSeconds=86400) - Neptune 是为训练基础模型 (Foundation Models) 的团队提供的可扩展实验追踪器。\n* [Polyaxon](https:\u002F\u002Fgithub.com\u002Fpolyaxon\u002Fpolyaxon) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpolyaxon\u002Fpolyaxon.svg?cacheSeconds=86400) - 一个用于在 Kubernetes 上进行可复现和可扩展的机器学习及深度学习的平台 - [(视频)](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=Iexwrka_hys)。\n* [Quilt](https:\u002F\u002Fgithub.com\u002Fquiltdata\u002Fquilt) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fquiltdata\u002Fquilt.svg?cacheSeconds=86400) - 数据和模型的版本控制、可复现性和部署。\n* [Sacred](https:\u002F\u002Fgithub.com\u002FIDSIA\u002Fsacred) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FIDSIA\u002Fsacred.svg?cacheSeconds=86400) - 帮助你配置、组织、记录并复现机器学习实验的工具。\n* [TerminusDB](https:\u002F\u002Fgithub.com\u002Fterminusdb\u002Fterminusdb) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fterminusdb\u002Fterminusdb.svg?cacheSeconds=86400) - 一个像 Git 一样存储数据的图数据库管理系统。\n* [Weights & Biases](https:\u002F\u002Fgithub.com\u002Fwandb\u002Fwandb) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fwandb\u002Fwandb.svg?cacheSeconds=86400) - Weights & Biases 是一个机器学习实验追踪、数据集版本控制、超参数搜索、可视化和协作平台。\n\n## 模型训练与编排\n\n* [AutoTrain Advanced](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fautotrain-advanced) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Fautotrain-advanced.svg?cacheSeconds=86400) - AutoTrain Advanced 是一个无代码解决方案，允许您只需几次点击即可训练机器学习 (Machine Learning) 模型。\n* [Avalanche](https:\u002F\u002Fgithub.com\u002FContinualAI\u002Favalanche) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FContinualAI\u002Favalanche.svg?cacheSeconds=86400) - Avalanche 是一个端到端 (End-to-End) 持续学习 (Continual Learning) 库，旨在提供一个共享且协作的开源（MIT 许可）代码库，用于快速原型设计、训练和可复现地评估持续学习算法。\n* [Axolotl](https:\u002F\u002Fgithub.com\u002Faxolotl-ai-cloud\u002Faxolotl) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Faxolotl-ai-cloud\u002Faxolotl.svg?cacheSeconds=86400) - Axolotl 是一款旨在简化各种人工智能 (AI) 模型微调 (Fine-tuning) 的工具，支持多种配置和架构。\n* [BindsNET](https:\u002F\u002Fgithub.com\u002FBindsNET\u002Fbindsnet) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FBindsNET\u002Fbindsnet.svg?cacheSeconds=86400) - BindsNET 是一个脉冲神经网络 (Spiking Neural Network) 仿真库，专注于开发用于机器学习的生物启发式算法。\n* [CML](https:\u002F\u002Fgithub.com\u002Fiterative\u002Fcml) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fiterative\u002Fcml.svg?cacheSeconds=86400) - 持续机器学习 (Continuous Machine Learning, CML) 是一个开源库，用于在机器学习项目中实施持续集成与交付 (CI\u002FCD)。\n* [CoreNet](https:\u002F\u002Fgithub.com\u002Fapple\u002Fcorenet) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fapple\u002Fcorenet.svg?cacheSeconds=86400) - CoreNet 是一个深度神经网络 (Deep Neural Network) 工具包，允许研究人员和工程师训练标准和新型的小规模及大规模模型，适用于各种任务，包括基础模型（例如 CLIP 和 大语言模型 (LLM)）、物体分类、物体检测和语义分割。\n* [Determined](https:\u002F\u002Fgithub.com\u002Fdetermined-ai\u002Fdetermined) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdetermined-ai\u002Fdetermined.svg?cacheSeconds=86400) - 深度学习 (Deep Learning) 训练平台，集成了分布式训练、超参数调优和模型管理的支持（支持 TensorFlow 和 PyTorch）。\n* [dstack](https:\u002F\u002Fgithub.com\u002Fdstackai\u002Fdstack) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdstackai\u002Fdstack.svg?cacheSeconds=86400) - dstack 是一个开源容器编排器，简化了工作负载编排，并提高了机器学习团队的 GPU (图形处理器) 利用率。\n* [envd](https:\u002F\u002Fgithub.com\u002Ftensorchord\u002Fenvd) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftensorchord\u002Fenvd.svg?cacheSeconds=86400) - 面向数据科学和人工智能 (AI)\u002F机器学习工程团队的机器学习开发环境。\n* [Fairseq](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Ffairseq) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ffacebookresearch\u002Ffairseq.svg?cacheSeconds=86400) - Fairseq(-py) 是一个序列建模工具包，允许研究人员和开发者为翻译、摘要、语言建模和其他文本生成任务训练自定义模型。\n* [Fire-Flyer File System](https:\u002F\u002Fgithub.com\u002Fdeepseek-ai\u002F3FS) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdeepseek-ai\u002F3FS.svg?cacheSeconds=86400) - Fire-Flyer 文件系统（3FS）是一种高性能分布式文件系统，旨在解决人工智能 (AI) 训练和推理工作负载的挑战。它利用现代 SSD (固态硬盘) 和 RDMA (远程直接内存访问) 网络提供共享存储层，简化分布式应用程序的开发。\n* [H2O-3](https:\u002F\u002Fgithub.com\u002Fh2oai\u002Fh2o-3) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fh2oai\u002Fh2o-3.svg?cacheSeconds=86400) - 用于更智能应用的快速可扩展机器学习平台：深度学习、梯度提升与 XGBoost、随机森林、广义线性建模（逻辑回归、弹性网络）、K-Means、PCA、堆叠集成、自动机器学习 (AutoML) 等。\n* [Hopsworks](https:\u002F\u002Fgithub.com\u002Flogicalclocks\u002Fhopsworks) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flogicalclocks\u002Fhopsworks.svg?cacheSeconds=86400) - Hopsworks 是一个数据密集型平台，用于设计和运行机器学习流水线。\n* [Ignite](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Fignite) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpytorch\u002Fignite.svg?cacheSeconds=86400) - Ignite 是一个高级库，旨在灵活且透明地帮助在 PyTorch 中训练和评估神经网络。\n* [Kubeflow](https:\u002F\u002Fgithub.com\u002Fkubeflow\u002Fkubeflow) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fkubeflow\u002Fkubeflow.svg?cacheSeconds=86400) - 基于 Google 内部机器学习流水线的云原生 (Cloud-Native) 机器学习平台。\n* [Ludwig](https:\u002F\u002Fgithub.com\u002Fludwig-ai\u002Fludwig) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fludwig-ai\u002Fludwig.svg?cacheSeconds=86400) - Ludwig 是一个低代码框架，用于构建自定义人工智能 (AI) 模型，如 大语言模型 (LLM) 和其他深度神经网络。\n* [MFTCoder](https:\u002F\u002Fgithub.com\u002Fcodefuse-ai\u002FMFTCoder) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcodefuse-ai\u002FMFTCoder.svg?cacheSeconds=86400) - MFTCoder 是 CodeFuse 的一个开源项目，用于在大语言模型 (LLM) 上准确高效地进行多任务微调 (MFT)，特别是在代码大语言模型（Code-LLM，用于代码任务的大语言模型）上。\n* [MLeap](https:\u002F\u002Fgithub.com\u002Fcombust\u002Fmleap) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcombust\u002Fmleap.svg?cacheSeconds=86400) - 针对 Spark、TensorFlow 和 sklearn 的流水线 (Pipeline) 和模型序列化标准化。\n* [Nanotron](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fnanotron) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Fnanotron.svg?cacheSeconds=86400) - Nanotron 提供分布式原语，使用 3D 并行性高效训练各种模型。\n* [NeMo](https:\u002F\u002Fgithub.com\u002FNVIDIA\u002FNeMo) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FNVIDIA\u002FNeMo.svg?cacheSeconds=86400) - NVIDIA NeMo 是一个可扩展的云原生生成式人工智能 (AI) 框架，专为从事大语言模型 (LLM)、多模态模型 (MM)、自动语音识别 (ASR)、文本转语音 (TTS) 和计算机视觉 (CV) 领域的研究人员和 PyTorch 开发者打造。它旨在通过利用现有代码和预训练模型检查点，帮助您高效地创建、定制和部署新的生成式 AI 模型。\n* [Prime](https:\u002F\u002Fgithub.com\u002FPrimeIntellect-ai\u002Fprime) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FPrimeIntellect-ai\u002Fprime.svg?cacheSeconds=86400) - Prime 是一个框架，用于在互联网上高效地全球分布式训练人工智能 (AI) 模型。\n* [PyCaret](https:\u002F\u002Fgithub.com\u002Fpycaret\u002Fpycaret) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpycaret\u002Fpycaret.svg?cacheSeconds=86400) - 用于训练和部署模型的 低代码 (Low-code) 库（scikit-learn, XGBoost, LightGBM, spaCy）\n* [Sematic](https:\u002F\u002Fgithub.com\u002Fsematic-ai\u002Fsematic) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fsematic-ai\u002Fsematic.svg?cacheSeconds=86400) - 使用简单的 Python 构建资源密集型流水线的平台。\n* [Skaffold](https:\u002F\u002Fgithub.com\u002FGoogleContainerTools\u002Fskaffold) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FGoogleContainerTools\u002Fskaffold.svg?cacheSeconds=86400) - Skaffold 是一个命令行工具，促进 Kubernetes 应用的持续开发。您可以在本地迭代应用程序源代码，然后部署到本地或远程 Kubernetes 集群。\n* [TFX](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Ftfx) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftensorflow\u002Ftfx.svg?cacheSeconds=86400) - Tensorflow Extended (TFX) 是一个基于 TensorFlow 的生产导向型机器学习配置框架，包括监控和模型版本管理。\n* [unsloth](https:\u002F\u002Fgithub.com\u002Funslothai\u002Funsloth) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Funslothai\u002Funsloth.svg?cacheSeconds=86400) - 针对 大语言模型 (LLM) 的微调与强化学习。以 2 倍速度和 70% 更少的显存 (VRAM) 训练 OpenAI gpt-oss, DeepSeek-R1, Qwen3, Gemma 3, TTS。\n\n## 模型存储优化\n* [AutoAWQ](https:\u002F\u002Fgithub.com\u002Fcasper-hansen\u002FAutoAWQ) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcasper-hansen\u002FAutoAWQ.svg?cacheSeconds=86400) - AutoAWQ 是一个易于使用的工具包，用于 4 位量化模型。\n* [AutoGPTQ](https:\u002F\u002Fgithub.com\u002FAutoGPTQ\u002FAutoGPTQ) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FAutoGPTQ\u002FAutoGPTQ.svg?cacheSeconds=86400) - 一个易于使用的大语言模型（LLMs）量化工具包，拥有用户友好的 API（应用程序接口），基于 GPTQ 算法。\n* [AWQ](https:\u002F\u002Fgithub.com\u002Fmit-han-lab\u002Fllm-awq) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmit-han-lab\u002Fllm-awq.svg?cacheSeconds=86400) - 面向大语言模型（LLM）压缩与加速的感知激活权重量化（Activation-aware Weight Quantization）。\n* [GGML](https:\u002F\u002Fgithub.com\u002Fggml-org\u002Fggml) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fggml-org\u002Fggml.svg?cacheSeconds=86400) - GGML 是一个高性能的机器学习张量库，支持在 CPU（中央处理器）上进行高效推理，特别针对大语言模型进行了优化。\n* [neural-compressor](https:\u002F\u002Fgithub.com\u002Fintel\u002Fneural-compressor) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fintel\u002Fneural-compressor.svg?cacheSeconds=86400) - Intel® Neural Compressor 旨在主流框架上提供流行的模型压缩技术，如量化、剪枝（稀疏性）、蒸馏和神经架构搜索。\n* [NNEF](https:\u002F\u002Fwww.khronos.org\u002Fnnef) - 神经网络交换格式（Neural Network Exchange Format, NNEF）是一种开放标准，用于表示神经网络模型，以实现不同机器学习框架和平台之间的互操作性和可移植性。\n* [ONNX](https:\u002F\u002Fgithub.com\u002Fonnx\u002Fonnx) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fonnx\u002Fonnx.svg?cacheSeconds=86400) - ONNX（开放神经网络交换，Open Neural Network Exchange）是一种开源格式，旨在促进不同框架和平台之间机器学习模型的互操作性和可移植性。\n* [PFA](https:\u002F\u002Fdmg.org\u002Fpfa) - PFA（分析便携格式，Portable Format for Analytics）格式是一种标准，用于以便携式、基于 JSON 的格式表示和交换预测模型和分析工作流。\n* [PMML](https:\u002F\u002Fdmg.org\u002Fpmml) - PMML（预测模型标记语言，Predictive Model Markup Language）是一种基于 XML 的标准，用于在不同应用程序之间表示和共享预测模型。\n* [Quanto](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Foptimum-quanto) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhuggingface\u002Foptimum-quanto.svg?cacheSeconds=86400) - Quanto 旨在简化深度学习模型的量化过程。\n\n## 隐私与安全\n* [AI Gateway](https:\u002F\u002Fgithub.com\u002Fportkey-ai\u002Fgateway) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fportkey-ai\u002Fgateway.svg?cacheSeconds=86400) - AI Gateway 是一个集成安全护栏（Guardrails）的极速 AI 网关。\n* [AI Job Displacement Tracker](https:\u002F\u002Fgithub.com\u002Fnoahaust2\u002Fai-displacement-tracker) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fnoahaust2\u002Fai-displacement-tracker.svg?cacheSeconds=86400) - 结构化、有来源支持的数据集，追踪了 96 起归因于 AI 的人员缩减（影响 45.7 万名员工，涉及 13 个国家、13 个行业）。每个条目均包含来源 URL、归因层级和职能。\n* [ART](https:\u002F\u002Fgithub.com\u002FTrusted-AI\u002Fadversarial-robustness-toolbox) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FTrusted-AI\u002Fadversarial-robustness-toolbox.svg?cacheSeconds=86400) - ART（对抗鲁棒性工具箱，Adversarial Robustness Toolbox）提供工具，使开发者和研究人员能够防御和评估机器学习（Machine Learning）模型及应用免受逃避（Evasion）、投毒（Poisoning）、提取（Extraction）和推理（Inference）等对抗威胁。\n* [CipherChat](https:\u002F\u002Fgithub.com\u002FRobustNLP\u002FCipherChat) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FRobustNLP\u002FCipherChat.svg?cacheSeconds=86400) - CipherChat 是一个用于评估大型语言模型（LLMs）安全对齐泛化能力的框架。\n* [DeepTeam](https:\u002F\u002Fgithub.com\u002Fconfident-ai\u002Fdeepteam) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fconfident-ai\u002Fdeepteam.svg?cacheSeconds=86400) - DeepTeam 是一个简单易用的开源大型语言模型（LLM）红队测试（red teaming）框架，用于渗透测试和保护大型语言模型系统。\n* [FATE](https:\u002F\u002Fgithub.com\u002FFederatedAI\u002FFATE) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FFederatedAI\u002FFATE.svg?cacheSeconds=86400) - FATE（联邦人工智能技术赋能器，Federated AI Technology Enabler）是全球首个工业级联邦学习（Federated Learning）开源框架，旨在使企业和机构能够在保护数据安全与隐私的同时进行数据协作。\n* [FedML](https:\u002F\u002Fgithub.com\u002FFedML-AI\u002FFedML) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FFedML-AI\u002FFedML.svg?cacheSeconds=86400) - FedML 为任何规模、任何地点的联邦\u002F分布式机器学习提供了研究与生产一体化的边缘云平台。\n* [Flower](https:\u002F\u002Fgithub.com\u002Fadap\u002Fflower) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fadap\u002Fflower.svg?cacheSeconds=86400) - Flower 是一个具有统一方法的联邦学习框架。它支持任何机器学习工作负载与任何机器学习框架及编程语言的联合。\n* [Google's Differential Privacy](https:\u002F\u002Fgithub.com\u002Fgoogle\u002Fdifferential-privacy) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgoogle\u002Fdifferential-privacy.svg?cacheSeconds=86400) - 这是一个包含 ε-差分隐私（ε-differentially private）算法的 C++ 库，可用于生成包含私有或敏感信息的数值数据集的聚合统计信息。\n* [Guardrails](https:\u002F\u002Fgithub.com\u002Fguardrails-ai\u002Fguardrails) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fguardrails-ai\u002Fguardrails.svg?cacheSeconds=86400) - Guardrails 是一个包，允许用户为大语言模型的输出添加结构、类型和质量保证。\n* [NeMo Guardrails](https:\u002F\u002Fgithub.com\u002FNVIDIA\u002FNeMo-Guardrails) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FNVIDIA\u002FNeMo-Guardrails.svg?cacheSeconds=86400) - NeMo Guardrails 是一个开源工具包，用于轻松地为基于大型语言模型的对话系统添加可编程的安全护栏。\n* [Opacus](https:\u002F\u002Fgithub.com\u002Fmeta-pytorch\u002Fopacus)  ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmeta-pytorch\u002Fopacus.svg?cacheSeconds=86400) - Opacus 是一个库，支持使用差分隐私（Differential Privacy）训练 PyTorch 模型。它支持客户端仅需最小代码更改即可进行训练，对训练性能影响很小，并允许客户端在线跟踪任意时刻消耗的隐私预算。\n* [OpenFL](https:\u002F\u002Fgithub.com\u002Fsecurefederatedai\u002Fopenfl)  ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fsecurefederatedai\u002Fopenfl.svg?cacheSeconds=86400) - OpenFL 是一个用于联邦学习的 Python 框架。OpenFL 旨在成为数据科学家灵活、可扩展且易于学习的工具。OpenFL 由英特尔物联网组（IOTG）和英特尔实验室开发。\n* [PySyft](https:\u002F\u002Fgithub.com\u002FOpenMined\u002FPySyft) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FOpenMined\u002FPySyft.svg?cacheSeconds=86400) - 一个用于安全、私密深度学习（Deep Learning）的 Python 库。PySyft 在 PyTorch 内使用多方计算（Multi-Party Computation, MPC），将私有数据与模型训练解耦。\n* [Tensorflow Privacy](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Fprivacy) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftensorflow\u002Fprivacy.svg?cacheSeconds=86400) - 一个 Python 库，包含用于使用差分隐私训练机器学习模型的 TensorFlow 优化器实现。\n* [TF Encrypted](https:\u002F\u002Fgithub.com\u002Ftf-encrypted\u002Ftf-encrypted) ![](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftf-encrypted\u002Ftf-encrypted.svg?cacheSeconds=86400) - 一个在 TensorFlow 中对加密数据进行机密机器学习的框架。\n\n# 其他精选列表\n\n* [Awesome AI Regulation](https:\u002F\u002Fgithub.com\u002FEthicalML\u002Fawesome-artificial-intelligence-regulation) - 涵盖治理、合规性及监管框架，这些对于不同司法管辖区负责任地部署机器学习系统至关重要。\n* [Awesome Production GenAI](https:\u002F\u002Fgithub.com\u002FEthicalML\u002Fawesome-production-genai) - 专注于生成式 AI（Generative AI）的部署，包括大型语言模型运营、提示工程（prompt engineering）和针对生成式 AI 的监控及安全工具。\n* [Awesome RAG Production](https:\u002F\u002Fgithub.com\u002FYigtwxx\u002FAwesome-RAG-Production) - 精心策划的生产级工具和最佳实践列表，用于构建可扩展的检索增强生成（RAG）系统。","# Awesome Production Machine Learning 快速上手指南\n\n本项目是一个精心策划的开源库列表，旨在帮助开发者部署、监控、版本控制、扩展和保障生产环境中的机器学习系统（MLOps）。它不是一个单一的软件包，而是一个资源导航集合。\n\n## 环境准备\n\n由于本项目主要作为资源索引使用，对本地环境要求较低：\n\n- **网络环境**：能够访问 GitHub 及 Hugging Face 空间（国内用户可能需要配置网络代理）。\n- **浏览器**：Chrome、Firefox 或 Edge 等现代浏览器用于在线浏览。\n- **Git（可选）**：如需离线阅读或贡献代码，需安装 Git。\n\n## 安装步骤\n\n你可以通过在线访问或克隆仓库两种方式获取内容。\n\n### 方式一：在线浏览（推荐）\n\n直接访问项目主页查看最新列表，无需安装任何软件。\n\n```bash\nhttps:\u002F\u002Fgithub.com\u002FEthicalML\u002Fawesome-production-machine-learning\n```\n\n### 方式二：本地克隆\n\n如果你希望将列表保存到本地进行离线查阅或提交 PR，请使用以下命令：\n\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002FEthicalML\u002Fawesome-production-machine-learning.git\ncd awesome-production-machine-learning\n```\n\n## 基本使用\n\n### 1. 浏览工具分类\n\n项目按功能模块整理了大量工具，常见分类包括：\n\n- **AutoML**：自动化机器学习（如 AutoGluon, Optuna）。\n- **Deployment & Serving**：模型部署与服务（如 KServe, Triton）。\n- **Evaluation & Monitoring**：评估与监控（如 Evidently, WhyLabs）。\n- **Model Management**：模型管理与实验跟踪（如 MLflow, Weights & Biases）。\n\n直接在页面顶部的目录表中点击对应链接跳转至具体工具介绍。\n\n### 2. 使用搜索工具\n\n为了快速在庞大的工具链中查找所需库，项目提供了专门的搜索工具：\n\n```bash\nhttps:\u002F\u002Fhuggingface.co\u002Fspaces\u002Fzhiminy\u002FAwesome-Production-Machine-Learning-Search\n```\n\n在该页面输入关键词，可快速定位到相关的开源库。\n\n### 3. 获取更新通知\n\n- **月度更新**：关注 GitHub Releases 以获取每月新增的生产级 ML 库摘要。\n  ```bash\n  https:\u002F\u002Fgithub.com\u002FEthicalML\u002Fawesome-production-machine-learning\u002Freleases\n  ```\n- **视频教程**：观看 10 分钟概览视频了解 MLOps 动机及工具介绍。\n  ```bash\n  https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=Ynb6X0KZKxY\n  ```\n\n### 4. 贡献与维护\n\n如果你发现优秀的工具想加入列表，请遵循 `CONTRIBUTING.md` 的要求提交 Pull Request。","某金融科技公司的算法团队正在将核心欺诈检测模型从实验环境迁移至生产环境，急需构建稳定可靠的 MLOps 全流程。\n\n### 没有 awesome-production-machine-learning 时\n- 工程师需在海量的 GitHub 项目中盲目筛选，耗费数周评估各库的维护状态与兼容性。\n- 缺乏系统化的监控方案，模型上线后出现数据漂移却迟迟未能发现，导致业务损失。\n- 实验记录分散在本地 Notebook 中，模型版本与代码版本脱节，难以追溯和复现历史结果。\n- 安全与隐私合规工具缺失，面临潜在的数据泄露风险且不知如何加固。\n\n### 使用 awesome-production-machine-learning 后\n- 直接查阅 **awesome-production-machine-learning** 分类目录，快速锁定经过社区验证的部署、监控及版本管理工具，选型效率提升 80%。\n- 采用推荐的监控解决方案，实现对模型预测分布的实时追踪，异常波动秒级告警。\n- 引入列表中的实验管理平台，统一存储模型元数据与代码快照，确保每次迭代可审计。\n- 依据隐私与安全板块建议，集成加密与脱敏库，满足金融行业的合规要求。\n\n这份精选清单通过聚合工业级最佳实践，帮助团队规避技术陷阱，大幅缩短模型从开发到上线的周期。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FEthicalML_awesome-production-machine-learning_6cb3f367.jpg","EthicalML","The Institute for Ethical Machine Learning","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002FEthicalML_2beff565.jpg","The Institute for Ethical Machine Learning is a think-tank that brings together with technology leaders, policymakers & academics to develop standards for ML.",null,"a@ethical.institute","http:\u002F\u002Fethical.institute","https:\u002F\u002Fgithub.com\u002FEthicalML",20339,2540,"2026-04-05T10:42:47","MIT",1,"未说明",{"notes":90,"python":88,"dependencies":91},"本项目为生产机器学习工具的精选资源列表（Awesome List），本身不包含可执行代码，因此无统一的运行环境要求。实际硬件及软件依赖取决于用户从列表中选择的特定工具（如 AutoML、模型部署框架等），需参考各子项目的官方文档。",[],[13,51],[94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112],"machine-learning","mlops","interpretability","explainability","responsible-ai","deep-learning","machine-learning-operations","ml-ops","ml-operations","privacy-preserving","privacy-preserving-ml","privacy-preserving-machine-learning","data-mining","large-scale-ml","production-ml","large-scale-machine-learning","production-machine-learning","awesome","awesome-list",12,"2026-03-27T02:49:30.150509","2026-04-06T07:13:05.147588",[117,122,127,132,137,142],{"id":118,"question_zh":119,"answer_zh":120,"source_url":121},3546,"生成式 AI 工具是否适合加入本列表？","是的，特别是涉及代理链架构的基础设施工具（如 LangChain）。单纯的提示词模板管理工具可能因过于高层级而不被收录，但能增强 LLM 的基础设施类框架是受欢迎的。","https:\u002F\u002Fgithub.com\u002FEthicalML\u002Fawesome-production-machine-learning\u002Fissues\u002F339",{"id":123,"question_zh":124,"answer_zh":125,"source_url":126},3547,"推荐哪些 ML 服务化（Serving）框架？","包括 Seldon Core, KFServing, BentoML, Cortex, TFX, TFServing, ForestFlow 等主流框架。","https:\u002F\u002Fgithub.com\u002FEthicalML\u002Fawesome-production-machine-learning\u002Fissues\u002F98",{"id":128,"question_zh":129,"answer_zh":130,"source_url":131},3548,"有哪些推荐的异常检测（Outlier Detection）库？","推荐 alibi-detect, pyod, SUOD, outlier-exposure 等库，通常要求项目拥有超过 100 个星标以保证一定关注度。","https:\u002F\u002Fgithub.com\u002FEthicalML\u002Fawesome-production-machine-learning\u002Fissues\u002F99",{"id":133,"question_zh":134,"answer_zh":135,"source_url":136},3549,"如何定义什么是“生产环境机器学习”？","这是一个动态演进的领域，没有绝对定义。社区采用“有异议但需执行”（disagree and commit）的原则来处理分歧，避免无休止的争论。","https:\u002F\u002Fgithub.com\u002FEthicalML\u002Fawesome-production-machine-learning\u002Fissues\u002F392",{"id":138,"question_zh":139,"answer_zh":140,"source_url":141},3550,"工具有什么标准才能被列入此列表？","建议将最低星标数门槛提高至 500 星，以过滤掉那些已停止维护或不再流行的工具，确保列表质量。","https:\u002F\u002Fgithub.com\u002FEthicalML\u002Fawesome-production-machine-learning\u002Fissues\u002F383",{"id":143,"question_zh":144,"answer_zh":145,"source_url":146},3551,"列表是否包含模型监控（Monitoring）相关内容？","是的，计划或正在探索添加关于模型和数据分布监控以及概念漂移检测的独立章节。","https:\u002F\u002Fgithub.com\u002FEthicalML\u002Fawesome-production-machine-learning\u002Fissues\u002F94",[148,152,156,160,164,168,172,176,180,184,188,192,196,200,204,208,212,216,220,224],{"id":149,"version":150,"summary_zh":79,"released_at":151},103146,"release-2026-03-01","2026-03-01T02:46:18",{"id":153,"version":154,"summary_zh":79,"released_at":155},103147,"release-2026-02-01","2026-02-01T02:49:17",{"id":157,"version":158,"summary_zh":79,"released_at":159},103148,"release-2026-01-01","2026-01-01T02:13:13",{"id":161,"version":162,"summary_zh":79,"released_at":163},103149,"release-2025-12-01","2025-12-01T02:15:32",{"id":165,"version":166,"summary_zh":79,"released_at":167},103150,"release-2025-11-01","2025-11-01T01:56:43",{"id":169,"version":170,"summary_zh":79,"released_at":171},103151,"release-2025-10-01","2025-10-01T01:56:32",{"id":173,"version":174,"summary_zh":79,"released_at":175},103152,"release-2025-09-01","2025-09-01T02:04:37",{"id":177,"version":178,"summary_zh":79,"released_at":179},103153,"release-2025-08-01","2025-08-01T02:23:50",{"id":181,"version":182,"summary_zh":79,"released_at":183},103154,"release-2025-07-01","2025-07-01T02:08:59",{"id":185,"version":186,"summary_zh":79,"released_at":187},103155,"release-2025-06-01","2025-06-01T02:12:10",{"id":189,"version":190,"summary_zh":79,"released_at":191},103156,"release-2025-05-01","2025-05-01T02:00:14",{"id":193,"version":194,"summary_zh":79,"released_at":195},103157,"release-2025-04-01","2025-04-01T02:01:31",{"id":197,"version":198,"summary_zh":79,"released_at":199},103158,"release-2025-03-01","2025-03-01T01:48:51",{"id":201,"version":202,"summary_zh":79,"released_at":203},103159,"release-2025-01-01","2025-01-01T01:47:46",{"id":205,"version":206,"summary_zh":79,"released_at":207},103160,"release-2024-12-01","2024-12-01T02:01:15",{"id":209,"version":210,"summary_zh":79,"released_at":211},103161,"release-2024-11-01","2024-11-01T01:49:08",{"id":213,"version":214,"summary_zh":79,"released_at":215},103162,"release-2024-10-01","2024-10-01T01:47:41",{"id":217,"version":218,"summary_zh":79,"released_at":219},103163,"release-2024-09-01","2024-09-01T01:45:14",{"id":221,"version":222,"summary_zh":79,"released_at":223},103164,"release-2024-08-01","2024-08-01T01:29:27",{"id":225,"version":226,"summary_zh":79,"released_at":227},103165,"release-2024-07-01","2024-07-01T01:27:17"]