[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-alvinunreal--awesome-autoresearch":3,"tool-alvinunreal--awesome-autoresearch":62},[4,18,26,36,46,54],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",158594,2,"2026-04-16T23:34:05",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":42,"last_commit_at":43,"category_tags":44,"status":17},8272,"opencode","anomalyco\u002Fopencode","OpenCode 是一款开源的 AI 编程助手（Coding Agent），旨在像一位智能搭档一样融入您的开发流程。它不仅仅是一个代码补全插件，而是一个能够理解项目上下文、自主规划任务并执行复杂编码操作的智能体。无论是生成全新功能、重构现有代码，还是排查难以定位的 Bug，OpenCode 都能通过自然语言交互高效完成，显著减少开发者在重复性劳动和上下文切换上的时间消耗。\n\n这款工具专为软件开发者、工程师及技术研究人员设计，特别适合希望利用大模型能力来提升编码效率、加速原型开发或处理遗留代码维护的专业人群。其核心亮点在于完全开源的架构，这意味着用户可以审查代码逻辑、自定义行为策略，甚至私有化部署以保障数据安全，彻底打破了传统闭源 AI 助手的“黑盒”限制。\n\n在技术体验上，OpenCode 提供了灵活的终端界面（Terminal UI）和正在测试中的桌面应用程序，支持 macOS、Windows 及 Linux 全平台。它兼容多种包管理工具，安装便捷，并能无缝集成到现有的开发环境中。无论您是追求极致控制权的资深极客，还是渴望提升产出的独立开发者，OpenCode 都提供了一个透明、可信",144296,1,"2026-04-16T14:50:03",[13,45],"插件",{"id":47,"name":48,"github_repo":49,"description_zh":50,"stars":51,"difficulty_score":32,"last_commit_at":52,"category_tags":53,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",108322,"2026-04-10T11:39:34",[14,15,13],{"id":55,"name":56,"github_repo":57,"description_zh":58,"stars":59,"difficulty_score":32,"last_commit_at":60,"category_tags":61,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[45,13,15,14],{"id":63,"github_repo":64,"name":65,"description_en":66,"description_zh":67,"ai_summary_zh":68,"readme_en":69,"readme_zh":70,"quickstart_zh":71,"use_case_zh":72,"hero_image_url":73,"owner_login":74,"owner_name":74,"owner_avatar_url":75,"owner_bio":76,"owner_company":76,"owner_location":76,"owner_email":76,"owner_twitter":76,"owner_website":76,"owner_url":77,"languages":76,"stars":78,"forks":79,"last_commit_at":80,"license":81,"difficulty_score":32,"env_os":82,"env_gpu":83,"env_ram":84,"env_deps":85,"category_tags":88,"github_topics":90,"view_count":32,"oss_zip_url":76,"oss_zip_packed_at":76,"status":17,"created_at":106,"updated_at":107,"faqs":108,"releases":109},8323,"alvinunreal\u002Fawesome-autoresearch","awesome-autoresearch","A curated list of autonomous improvement loops, research agents, and autoresearch-style systems inspired by Karpathy's autoresearch.","awesome-autoresearch 是一个精心整理的开源项目清单，专注于收录受 Karpathy 提出的“自动研究”（autoresearch）理念启发的各类工具。它汇集了能够自主执行改进循环、独立开展研究的智能体系统及其衍生项目，旨在帮助开发者快速找到实现 AI 自我迭代与自动化科研的优质资源。\n\n在 AI 技术飞速发展的今天，如何构建能自动发现问题、分析失败模式并应用修复方案的系统是一大挑战。awesome-autoresearch 通过分类整理通用型框架、特定领域适配方案、硬件移植版本以及评估基准，解决了研究者难以从零开始搭建或筛选合适自主研究工具的痛点。无论是希望让代码自动调试优化的工程师，还是试图构建全自动实验流程的科研人员，都能在此找到灵感与现成方案。\n\n该项目特别适合 AI 开发者、研究人员以及对自主智能体感兴趣的技术爱好者使用。其独特亮点在于不仅收录了基于 Claude Code、Gemini CLI 等不同大模型原生的技能插件，还涵盖了支持断点续跑、并行实验及实时指标追踪的高级框架。部分项目更创新地引入搜索引擎作为实时验证源，实现了真正的“无人值守”过夜运行模式","awesome-autoresearch 是一个精心整理的开源项目清单，专注于收录受 Karpathy 提出的“自动研究”（autoresearch）理念启发的各类工具。它汇集了能够自主执行改进循环、独立开展研究的智能体系统及其衍生项目，旨在帮助开发者快速找到实现 AI 自我迭代与自动化科研的优质资源。\n\n在 AI 技术飞速发展的今天，如何构建能自动发现问题、分析失败模式并应用修复方案的系统是一大挑战。awesome-autoresearch 通过分类整理通用型框架、特定领域适配方案、硬件移植版本以及评估基准，解决了研究者难以从零开始搭建或筛选合适自主研究工具的痛点。无论是希望让代码自动调试优化的工程师，还是试图构建全自动实验流程的科研人员，都能在此找到灵感与现成方案。\n\n该项目特别适合 AI 开发者、研究人员以及对自主智能体感兴趣的技术爱好者使用。其独特亮点在于不仅收录了基于 Claude Code、Gemini CLI 等不同大模型原生的技能插件，还涵盖了支持断点续跑、并行实验及实时指标追踪的高级框架。部分项目更创新地引入搜索引擎作为实时验证源，实现了真正的“无人值守”过夜运行模式。作为一个社区驱动的索引库，awesome-autoresearch 为探索 AI 自主进化路径提供了高价值的导航图。","\u003Cdiv align=\"center\">\n\n# 🔬 Awesome Autoresearch\n\n**A curated, high-signal index of autonomous improvement loops, research agents, and descendants inspired by** [**karpathy\u002Fautoresearch**](https:\u002F\u002Fgithub.com\u002Fkarpathy\u002Fautoresearch).\n\n[![Awesome](https:\u002F\u002Fawesome.re\u002Fbadge.svg)](https:\u002F\u002Fawesome.re)\n[![PRs Welcome](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FPRs-welcome-brightgreen.svg?style=flat-square)](.\u002FCONTRIBUTING.md)\n[![License: CC0-1.0](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002Flicense-CC0--1.0-blue.svg?style=flat-square)](.\u002FLICENSE)\n\n\u003Csub>by **Boring Dystopia Development**\u003C\u002Fsub>\n\n\u003Cp align=\"center\">\n  \u003Ca href=\"https:\u002F\u002Fboringdystopia.ai\u002F\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002Fboringdystopia.ai-111111?style=for-the-badge&logo=vercel&logoColor=white\" alt=\"boringdystopia.ai\" \u002F>\n  \u003C\u002Fa>&nbsp;\n  \u003Ca href=\"https:\u002F\u002Fx.com\u002Falvinunreal\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FX-@alvinunreal-000000?style=for-the-badge&logo=x&logoColor=white\" alt=\"X @alvinunreal\" \u002F>\n  \u003C\u002Fa>&nbsp;\n  \u003Ca href=\"https:\u002F\u002Ft.me\u002Fboringdystopiadevelopment\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FTelegram-Join%20channel-2CA5E0?style=for-the-badge&logo=telegram&logoColor=white\" alt=\"Telegram Join channel\" \u002F>\n  \u003C\u002Fa>\n\u003C\u002Fp>\n\n\u003C\u002Fdiv>\n\n## Contents\n\n- [🛠️ General-purpose descendants](#️-general-purpose-descendants)\n- [🔬 Research-agent systems](#-research-agent-systems)\n- [💻 Platform ports and hardware forks](#-platform-ports-and-hardware-forks)\n- [🎯 Domain-specific adaptations](#-domain-specific-adaptations)\n- [📊 Evaluation & benchmarks](#-evaluation--benchmarks)\n- [📈 Notable use cases and writeups](#-notable-use-cases-and-writeups)\n- [📚 Related resources](#-related-resources)\n- [📄 License](#-license)\n\n## 🛠️ General-purpose descendants\n\n- [kayba-ai\u002Frecursive-improve](https:\u002F\u002Fgithub.com\u002Fkayba-ai\u002Frecursive-improve) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fkayba-ai\u002Frecursive-improve?style=social) - Recursive self-improvement framework where agents capture execution traces, analyze failure patterns, and apply targeted fixes with keep-or-revert evaluation.\n- [vukrosic\u002Fauto-research](https:\u002F\u002Fgithub.com\u002Fvukrosic\u002Fauto-research) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvukrosic\u002Fauto-research?style=social) - Docs-only control plane for an open autonomous AI research lab — file-based operating model for human direction and agent execution.\n- [uditgoenka\u002Fautoresearch](https:\u002F\u002Fgithub.com\u002Fuditgoenka\u002Fautoresearch) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fuditgoenka\u002Fautoresearch?style=social) - Claude Code skill that generalizes autoresearch into a reusable loop for software, docs, security, shipping, debugging, and other measurable goals.\n- [leo-lilinxiao\u002Fcodex-autoresearch](https:\u002F\u002Fgithub.com\u002Fleo-lilinxiao\u002Fcodex-autoresearch) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fleo-lilinxiao\u002Fcodex-autoresearch?style=social) - Codex-native autoresearch skill with resume support, lessons across runs, optional parallel experiments, and mode-specific workflows.\n- [supratikpm\u002Fgemini-autoresearch](https:\u002F\u002Fgithub.com\u002Fsupratikpm\u002Fgemini-autoresearch) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fsupratikpm\u002Fgemini-autoresearch?style=social) - Gemini CLI skill that generalises autoresearch to any measurable goal. Gemini-native: uses Google Search grounding as a live verification source inside the loop, true headless overnight mode via --yolo --prompt, and 1M token context. Also works in Antigravity IDE via .agents\u002Fskills\u002F.\n- [davebcn87\u002Fpi-autoresearch](https:\u002F\u002Fgithub.com\u002Fdavebcn87\u002Fpi-autoresearch) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdavebcn87\u002Fpi-autoresearch?style=social) - `pi` extension plus dashboard for persistent experiment loops, live metrics, confidence tracking, and resumable autoresearch sessions.\n- [drivelineresearch\u002Fautoresearch-claude-code](https:\u002F\u002Fgithub.com\u002Fdrivelineresearch\u002Fautoresearch-claude-code) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdrivelineresearch\u002Fautoresearch-claude-code?style=social) - Claude Code plugin\u002Fskill port of `pi-autoresearch`, with a clean experiment-loop workflow and a concrete biomechanics case study.\n- [greyhaven-ai\u002Fautocontext](https:\u002F\u002Fgithub.com\u002Fgreyhaven-ai\u002Fautocontext) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgreyhaven-ai\u002Fautocontext?style=social) - Closed-loop control plane for repeated agent improvement, with evaluation, persistent knowledge, staged validation, and optional distillation into cheaper local runtimes.\n- [jmilinovich\u002Fgoal-md](https:\u002F\u002Fgithub.com\u002Fjmilinovich\u002Fgoal-md) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjmilinovich\u002Fgoal-md?style=social) - Generalizes autoresearch into a `GOAL.md` pattern for repos where the agent must first construct a measurable fitness function before it can optimize.\n- [james-s-tayler\u002Flazy-developer](https:\u002F\u002Fgithub.com\u002Fjames-s-tayler\u002Flazy-developer) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjames-s-tayler\u002Flazy-developer?style=social) - Claude Code skill that orchestrates autoresearch across a prioritized sequence of optimization goals (coverage, test speed, build speed, complexity, LOC, performance) using GOAL.md as the engine. Supports standalone and Ralph Mode multi-instance execution.\n- [mutable-state-inc\u002Fautoresearch-at-home](https:\u002F\u002Fgithub.com\u002Fmutable-state-inc\u002Fautoresearch-at-home) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmutable-state-inc\u002Fautoresearch-at-home?style=social) - Collaborative fork of upstream autoresearch that adds experiment claiming, shared best-config syncing, hypothesis exchange, and swarm-style coordination across many single-GPU agents.\n- [zkarimi22\u002Fautoresearch-anything](https:\u002F\u002Fgithub.com\u002Fzkarimi22\u002Fautoresearch-anything) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fzkarimi22\u002Fautoresearch-anything?style=social) - Generalizes autoresearch to **any measurable metric** — system prompts, API performance, landing pages, test suites, config tuning, SQL queries. \"If you can measure it, you can optimize it.\"\n- [Entrpi\u002Fautoresearch-everywhere](https:\u002F\u002Fgithub.com\u002FEntrpi\u002Fautoresearch-everywhere) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FEntrpi\u002Fautoresearch-everywhere?style=social) - Cross-platform expansion that auto-detects hardware config and starts the loop. The \"glue and generalization\" half of autoresearch.\n- [ShengranHu\u002FADAS](https:\u002F\u002Fgithub.com\u002FShengranHu\u002FADAS) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FShengranHu\u002FADAS?style=social) - **Automated Design of Agentic Systems** — ICLR 2025. Meta-agents that invent novel agent architectures by programming them in code.\n- [MaximeRobeyns\u002Fself_improving_coding_agent](https:\u002F\u002Fgithub.com\u002FMaximeRobeyns\u002Fself_improving_coding_agent) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FMaximeRobeyns\u002Fself_improving_coding_agent?style=social) - **SICA**: Self-Improving Coding Agent that edits its own codebase. ICLR 2025 Workshop paper demonstrating scaffold-level self-improvement on coding benchmarks.\n- [peterskoett\u002Fself-improving-agent](https:\u002F\u002Fgithub.com\u002Fpeterskoett\u002Fself-improving-agent) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpeterskoett\u002Fself-improving-agent?style=social) - Alternative self-improving agent architecture with reflection and meta-learning cycles.\n- [metauto-ai\u002FHGM](https:\u002F\u002Fgithub.com\u002Fmetauto-ai\u002FHGM) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmetauto-ai\u002FHGM?style=social) - **Huxley-Gödel Machine** for coding agents — applies self-improvement to SWE-bench performance via meta-level optimization.\n- [gepa-ai\u002Fgepa](https:\u002F\u002Fgithub.com\u002Fgepa-ai\u002Fgepa) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgepa-ai\u002Fgepa?style=social) - **GEPA (Genetic-Pareto)** — ICLR 2026 Oral. Reflective prompt evolution that outperforms RL (GRPO) on benchmarks. Optimizes any textual parameters against any metric using natural language reflection.\n- [MrTsepa\u002Fautoevolve](https:\u002F\u002Fgithub.com\u002FMrTsepa\u002Fautoevolve) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FMrTsepa\u002Fautoevolve?style=social) - GEPA-inspired autoresearch for self-play: mutate code strategies, evaluate head-to-head, rate with Elo\u002FBradley-Terry, branch from the Pareto front. Agent reads match traces to target mutations. Works as a Claude Code skill.\n- [HKUDS\u002FClawTeam](https:\u002F\u002Fgithub.com\u002FHKUDS\u002FClawTeam) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FHKUDS\u002FClawTeam?style=social) - Agent swarm intelligence for autoresearch — spawns parallel GPU research directions, distributes work across agents, aggregates results.\n- [Orchestra-Research\u002FAI-Research-SKILLs](https:\u002F\u002Fgithub.com\u002FOrchestra-Research\u002FAI-Research-SKILLs) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FOrchestra-Research\u002FAI-Research-SKILLs?style=social) - Comprehensive skill library including autoresearch orchestration with two-loop architecture (inner optimization + outer synthesis).\n- [WecoAI\u002Faideml](https:\u002F\u002Fgithub.com\u002FWecoAI\u002Faideml) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FWecoAI\u002Faideml?style=social) - **AIDE**: Tree-search ML engineering agent that autonomously improves model performance via iterative code generation and evaluation.\n- [weco.ai](https:\u002F\u002Fweco.ai) - **Weco**: Cloud platform for AIDE with observability, experiment tracking, and managed runs — brings the autoresearch loop into production.\n\n## 🔬 Research-agent systems\n\n- [aiming-lab\u002FAutoResearchClaw](https:\u002F\u002Fgithub.com\u002Faiming-lab\u002FAutoResearchClaw) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Faiming-lab\u002FAutoResearchClaw?style=social) - End-to-end research pipeline that turns a topic into literature review, experiments, analysis, peer review, and paper drafts; broader than autoresearch, but clearly in the same lineage.\n- [OpenRaiser\u002FNanoResearch](https:\u002F\u002Fgithub.com\u002FOpenRaiser\u002FNanoResearch) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FOpenRaiser\u002FNanoResearch?style=social) - End-to-end autonomous research engine that plans experiments, generates code, runs jobs locally or on SLURM, analyzes real results, and writes papers grounded in those outputs.\n- [wanshuiyin\u002FAuto-claude-code-research-in-sleep](https:\u002F\u002Fgithub.com\u002Fwanshuiyin\u002FAuto-claude-code-research-in-sleep) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fwanshuiyin\u002FAuto-claude-code-research-in-sleep?style=social) - Markdown-first research workflows for Claude Code and other agents, centered on autonomous literature review, experiments, paper iteration, and cross-model critique.\n- [Sibyl-Research-Team\u002FAutoResearch-SibylSystem](https:\u002F\u002Fgithub.com\u002FSibyl-Research-Team\u002FAutoResearch-SibylSystem) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FSibyl-Research-Team\u002FAutoResearch-SibylSystem?style=social) - Fully autonomous AI scientist built on Claude Code, with explicit AutoResearch lineage, multi-agent research iteration, GPU experiment execution, and a self-evolving outer loop.\n- [eimenhmdt\u002Fautoresearcher](https:\u002F\u002Fgithub.com\u002Feimenhmdt\u002Fautoresearcher) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Feimenhmdt\u002Fautoresearcher?style=social) - Early open-source package for automating scientific workflows, currently centered on literature-review generation with an ambition toward broader autonomous research.\n- [hyperspaceai\u002Fagi](https:\u002F\u002Fgithub.com\u002Fhyperspaceai\u002Fagi) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhyperspaceai\u002Fagi?style=social) - Distributed, peer-to-peer research network where autonomous agents run experiments, gossip findings, maintain CRDT leaderboards, and archive results to GitHub across multiple research domains.\n- [SakanaAI\u002FAI-Scientist](https:\u002F\u002Fgithub.com\u002FSakanaAI\u002FAI-Scientist) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FSakanaAI\u002FAI-Scientist?style=social) - **The AI Scientist**: First comprehensive system for fully automatic scientific discovery. From idea generation to paper writing with minimal human supervision.\n- [SakanaAI\u002FAI-Scientist-v2](https:\u002F\u002Fgithub.com\u002FSakanaAI\u002FAI-Scientist-v2) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FSakanaAI\u002FAI-Scientist-v2?style=social) - Workshop-level automated scientific discovery via agentic tree search. Removes template dependency from v1, generalizes across research domains.\n- [HKUDS\u002FAI-Researcher](https:\u002F\u002Fgithub.com\u002FHKUDS\u002FAI-Researcher) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FHKUDS\u002FAI-Researcher?style=social) - NeurIPS 2025 paper. Full end-to-end research automation: hypothesis → experiments → manuscript → peer review. Production version at [novix.science](https:\u002F\u002Fnovix.science\u002Fchat).\n- [openags\u002FAuto-Research](https:\u002F\u002Fgithub.com\u002Fopenags\u002FAuto-Research) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fopenags\u002FAuto-Research?style=social) - **OpenAGS**: Orchestrates a team of AI agents across the full research lifecycle — lit review, hypothesis generation, experiments, manuscript writing, and peer review.\n- [SamuelSchmidgall\u002FAgentLaboratory](https:\u002F\u002Fgithub.com\u002FSamuelSchmidgall\u002FAgentLaboratory) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FSamuelSchmidgall\u002FAgentLaboratory?style=social) - End-to-end autonomous research workflow: idea → literature review → experiments → report. Supports both autonomous and co-pilot modes.\n- [AgentRxiv](https:\u002F\u002Fagentrxiv.github.io\u002F) - Collaborative autonomous research framework where agent laboratories share a preprint server to build on each other's work iteratively.\n- [JinheonBaek\u002FResearchAgent](https:\u002F\u002Fgithub.com\u002FJinheonBaek\u002FResearchAgent) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FJinheonBaek\u002FResearchAgent?style=social) - Iterative research idea generation over scientific literature with LLMs. Multi-agent review and feedback loops.\n- [du-nlp-lab\u002FMLR-Copilot](https:\u002F\u002Fgithub.com\u002Fdu-nlp-lab\u002FMLR-Copilot) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdu-nlp-lab\u002FMLR-Copilot?style=social) - Autonomous ML research framework — generates ideas, implements experiments, analyzes results.\n- [MASWorks\u002FML-Agent](https:\u002F\u002Fgithub.com\u002FMASWorks\u002FML-Agent) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FMASWorks\u002FML-Agent?style=social) - Reinforcing LLM agents for autonomous ML engineering. Learns from trial and error to improve model performance.\n- [PouriaRouzrokh\u002FLatteReview](https:\u002F\u002Fgithub.com\u002FPouriaRouzrokh\u002FLatteReview) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FPouriaRouzrokh\u002FLatteReview?style=social) - Low-code Python package for **automated systematic literature reviews** via AI-powered agents.\n- [LitLLM\u002FLitLLM](https:\u002F\u002Fgithub.com\u002FLitLLM\u002FLitLLM) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FLitLLM\u002FLitLLM?style=social) - AI-powered literature review assistant using RAG for accurate, well-structured related-work sections in academic writing.\n- [Agent Laboratory](https:\u002F\u002Fagentlaboratory.github.io\u002F) - Three-phase research pipeline: Literature Review → Experimentation → Report Writing, with specialized agents for each phase.\n- [WecoAI\u002Faideml](https:\u002F\u002Fgithub.com\u002FWecoAI\u002Faideml) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FWecoAI\u002Faideml?style=social) - **AIDE**: AI-Driven Exploration — tree-search-based ML engineering agent that automates experiment design, code generation, and evaluation. Treats ML engineering as code optimization against any metric.\n\n## 💻 Platform ports and hardware forks\n\n- [gianfrancopiana\u002Fopenclaw-autoresearch](https:\u002F\u002Fgithub.com\u002Fgianfrancopiana\u002Fopenclaw-autoresearch) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgianfrancopiana\u002Fopenclaw-autoresearch?style=social) - OpenClaw port of pi-autoresearch; autonomous experiment loop for any optimization target with statistical confidence scoring.\n- [miolini\u002Fautoresearch-macos](https:\u002F\u002Fgithub.com\u002Fmiolini\u002Fautoresearch-macos) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmiolini\u002Fautoresearch-macos?style=social) - Widely adopted macOS fork that adapts upstream autoresearch for Apple Silicon \u002F MPS while preserving the original loop shape.\n- [trevin-creator\u002Fautoresearch-mlx](https:\u002F\u002Fgithub.com\u002Ftrevin-creator\u002Fautoresearch-mlx) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftrevin-creator\u002Fautoresearch-mlx?style=social) - MLX-native Apple Silicon port that keeps the upstream fixed-budget `val_bpb` loop while removing the PyTorch\u002FCUDA dependency entirely.\n- [jsegov\u002Fautoresearch-win-rtx](https:\u002F\u002Fgithub.com\u002Fjsegov\u002Fautoresearch-win-rtx) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjsegov\u002Fautoresearch-win-rtx?style=social) - Windows-native RTX fork focused on consumer NVIDIA GPUs, with explicit VRAM floors and a practical desktop setup path.\n- [iii-hq\u002Fn-autoresearch](https:\u002F\u002Fgithub.com\u002Fiii-hq\u002Fn-autoresearch) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fiii-hq\u002Fn-autoresearch?style=social) - Multi-GPU autoresearch infrastructure with structured experiment tracking, adaptive search strategy, crash recovery, and queryable orchestration around the classic `train.py` loop.\n- [lucasgelfond\u002Fautoresearch-webgpu](https:\u002F\u002Fgithub.com\u002Flucasgelfond\u002Fautoresearch-webgpu) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flucasgelfond\u002Fautoresearch-webgpu?style=social) - Browser\u002FWebGPU port that lets agents generate training code, run experiments in-browser, and feed results back into the loop without a Python setup.\n- [tonitangpotato\u002Fautoresearch-engram](https:\u002F\u002Fgithub.com\u002Ftonitangpotato\u002Fautoresearch-engram) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftonitangpotato\u002Fautoresearch-engram?style=social) - Fork with **persistent cognitive memory** — frequency-weighted retrieval of cross-session knowledge for improved experiment continuity.\n- **Colab\u002FKaggle T4 port** - Adapts autoresearch for free T4 GPUs (Google Colab \u002F Kaggle) with zero cost and zero local setup. Key changes: Flash Attention 3 → PyTorch SDPA, removes H100-only kernel dependency. ([upstream issue #208](https:\u002F\u002Fgithub.com\u002Fkarpathy\u002Fautoresearch\u002Fissues\u002F208))\n- [ArmanJR-Lab\u002Fautoautoresearch](https:\u002F\u002Fgithub.com\u002FArmanJR-Lab\u002Fautoautoresearch) - Jetson AGX Orin port with a **director** — a Go binary that acts as a \"creative director\" injecting novelty (arxiv papers + DeepSeek Reasoner) into the loop to escape local minima. Includes multi-experiment comparison (baseline vs director-guided) with detailed stall analysis.\n\n## 🎯 Domain-specific adaptations\n\n- [mattprusak\u002Fautoresearch-genealogy](https:\u002F\u002Fgithub.com\u002Fmattprusak\u002Fautoresearch-genealogy) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmattprusak\u002Fautoresearch-genealogy?style=social) - Applies the autoresearch pattern to genealogy, using structured prompts, archive guides, source checks, and vault workflows to iteratively expand and verify family-history research.\n- [ArchishmanSengupta\u002Fautovoiceevals](https:\u002F\u002Fgithub.com\u002FArchishmanSengupta\u002Fautovoiceevals) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FArchishmanSengupta\u002Fautovoiceevals?style=social) - Uses adversarial callers plus keep-or-revert prompt edits to harden voice AI agents across Vapi, Smallest AI, and ElevenLabs.\n- [chrisworsey55\u002Fatlas-gic](https:\u002F\u002Fgithub.com\u002Fchrisworsey55\u002Fatlas-gic) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fchrisworsey55\u002Fatlas-gic?style=social) - Applies the autoresearch keep-or-revert loop to trading agents, optimizing prompts and portfolio orchestration against rolling Sharpe ratio instead of model loss.\n- [RightNow-AI\u002Fautokernel](https:\u002F\u002Fgithub.com\u002FRightNow-AI\u002Fautokernel) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FRightNow-AI\u002Fautokernel?style=social) - Applies the autoresearch loop to GPU kernel optimization: profile bottlenecks, edit one kernel, benchmark, keep or revert, repeat.\n- [Rkcr7\u002Fautoresearch-sudoku](https:\u002F\u002Fgithub.com\u002FRkcr7\u002Fautoresearch-sudoku) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FRkcr7\u002Fautoresearch-sudoku?style=social) - Enhanced autoresearch workflow where an AI agent iteratively rewrites and benchmarks a Rust sudoku solver, ultimately beating leading human-built solvers on hard benchmark sets.\n- [jeongph\u002Fautospec](https:\u002F\u002Fgithub.com\u002Fjeongph\u002Fautospec) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjeongph\u002Fautospec?style=social) - Reads natural-language business rules and autonomously builds a Spring Boot service with tests via the keep-or-revert loop. Evaluates with Gradle build + JUnit XML. 119-line skeleton to 950 lines in 5 cycles.\n\n## 📊 Evaluation & benchmarks\n\n- [snap-stanford\u002FMLAgentBench](https:\u002F\u002Fgithub.com\u002Fsnap-stanford\u002FMLAgentBench) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fsnap-stanford\u002FMLAgentBench?style=social) - Benchmark suite for evaluating AI agents on ML experimentation tasks. 13 tasks from CIFAR-10 to BabyLM.\n- [openai\u002Fmle-bench](https:\u002F\u002Fgithub.com\u002Fopenai\u002Fmle-bench) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fopenai\u002Fmle-bench?style=social) - OpenAI's benchmark for measuring how well AI agents perform at ML engineering.\n- [chchenhui\u002Fmlrbench](https:\u002F\u002Fgithub.com\u002Fchchenhui\u002Fmlrbench) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fchchenhui\u002Fmlrbench?style=social) - MLR-Bench: Evaluating AI agents on open-ended ML research. 201 tasks from NeurIPS\u002FICLR\u002FICML workshops.\n- [gersteinlab\u002FML-Bench](https:\u002F\u002Fgithub.com\u002Fgersteinlab\u002FML-Bench) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgersteinlab\u002FML-Bench?style=social) - Evaluates LLMs and agents for ML tasks on repository-level code.\n- [THUDM\u002FAgentBench](https:\u002F\u002Fgithub.com\u002FTHUDM\u002FAgentBench) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FTHUDM\u002FAgentBench?style=social) - Comprehensive benchmark for LLM-as-Agent evaluation across 8 distinct environments. ICLR 2024.\n\n## 📈 Notable use cases and writeups\n\n- **Shopify Liquid optimization** - Tobi Lütke shared an autoresearch-style optimization run on Shopify's Liquid engine, with public traces showing major parse\u002Frender speedups and allocation reductions. ([tweet](https:\u002F\u002Fx.com\u002Ftobi\u002Fstatus\u002F2032212531846971413), [PR with traces](https:\u002F\u002Fgithub.com\u002FShopify\u002Fliquid\u002Fpull\u002F2056))\n- **Driveline baseball biomechanics** - Public autoresearch-style experiment loop for pitch-velocity prediction from biomechanics data, with large reported gains in model quality. ([tweet](https:\u002F\u002Fx.com\u002Fdrivelinekyle\u002Fstatus\u002F2032242254035992610))\n- **Tennis XGBoost prediction + reward hacking writeup** - Nick Oak documents an autoresearch-inspired loop for tennis match prediction, including where the optimization setup went wrong. ([blog](https:\u002F\u002Fnickoak.com\u002Fposts\u002Ftennis-xgboost-autoresearch\u002F) · [repo](https:\u002F\u002Fgithub.com\u002Fbuildoak\u002Ftennis-xgboost-autoresearch) · [gamed branch](https:\u002F\u002Fgithub.com\u002Fbuildoak\u002Ftennis-xgboost-autoresearch\u002Ftree\u002Farchived\u002Fgamed-iterations))\n- **Vesuvius Challenge ink detection swarm** - Multi-agent experimental loop applied to ancient-scroll ink detection, with a strong writeup on cross-scroll generalization improvements. ([blog](https:\u002F\u002Fscrollprize.substack.com\u002Fp\u002Fwe-are-cooking))\n- **Earth system model optimization** - Hybrid workflow where an LLM proposes equation structures and a search process tunes parameters, showing how the autoresearch pattern extends into scientific modeling. ([tweet](https:\u002F\u002Fx.com\u002Fdevparagiri\u002Fstatus\u002F2035075626273739068), [blog](https:\u002F\u002Fparagiri.com\u002Fblog\u002F2026\u002Fautoresearch-earth-system-models\u002F))\n- **The Agentic Researcher** - Paper: \"A Practical Guide to AI-Assisted Research in Mathematics and Machine Learning.\" Cites autoresearch as the canonical example of automated ML experiment pipelines. ([arxiv 2603.15914](https:\u002F\u002Farxiv.org\u002Fhtml\u002F2603.15914))\n- **Scaling Autoresearch to GPU Clusters** - SkyPilot blog on running autoresearch on H100\u002FH200 clusters with cloud orchestration. ([SkyPilot Blog](https:\u002F\u002Fblog.skypilot.co\u002Fscaling-autoresearch\u002F))\n- **Self-Improving Coding Agents** - Addy Osmani's practical guide to setting up self-improving agent loops with Claude Code. ([article](https:\u002F\u002Faddyosmani.com\u002Fblog\u002Fself-improving-agents\u002F))\n- **autoresearch@home: Distributed AI Research** - SETI@home model applied to autoresearch — contribute GPU time to collective model optimization. ([Ensue Blog](https:\u002F\u002Fensue.dev\u002Fblog\u002Fautoresearch-at-home\u002F))\n- **Claude Code + AutoResearch for Self-Improving Skills** - MindStudio guide to building self-improving AI skills using Claude Code with autoresearch patterns. ([article](https:\u002F\u002Fwww.mindstudio.ai\u002Fblog\u002Fclaude-code-autoresearch-self-improving-skills))\n- **100 ML Experiments Overnight** - Particula technical breakdown with domain-agnostic fork applications. ([article](https:\u002F\u002Fparticula.tech\u002Fblog\u002Fkarpathy-autoresearch-autonomous-ml-experiments))\n- **PM's Guide to Autoresearch** - Product manager's guide covering setup, community forks, and real-world applications. ([article](https:\u002F\u002Fwww.news.aakashg.com\u002Fp\u002Fautoresearch-guide-for-pms))\n- **Autoresearch 101 Builder's Playbook** - Substack deep-dive on applying autoresearch patterns to prompts, agents, and workflows with concrete examples. ([article](https:\u002F\u002Fsidsaladi.substack.com\u002Fp\u002Fautoresearch-101-builders-playbook))\n- **Kingy AI Technical Breakdown** - Detailed technical walkthrough of the autoresearch loop architecture, mutation operators, and fitness function design. ([article](https:\u002F\u002Fkingy.ai\u002Fai\u002Fautoresearch-karpathys-minimal-agent-loop-for-autonomous-llm-experimentation\u002F))\n- **Fortune Feature** - Business and industry context on why autoresearch matters for the future of autonomous AI agents. ([article](https:\u002F\u002Ffortune.com\u002F2026\u002F03\u002F17\u002Fandrej-karpathy-loop-autonomous-ai-agents-future\u002F))\n\n## 📚 Related resources\n\nCurated lists and paper collections for AI agents, autonomous systems, and automated research:\n\n- [ai-agents-2030\u002Fawesome-deep-research-agent](https:\u002F\u002Fgithub.com\u002Fai-agents-2030\u002Fawesome-deep-research-agent) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fai-agents-2030\u002Fawesome-deep-research-agent?style=social) - Curated list of deep research agent papers and systems.\n- [YoungDubbyDu\u002FLLM-Agent-Optimization](https:\u002F\u002Fgithub.com\u002FYoungDubbyDu\u002FLLM-Agent-Optimization) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FYoungDubbyDu\u002FLLM-Agent-Optimization?style=social) - Papers on LLM agent optimization methods.\n- [VoltAgent\u002Fawesome-ai-agent-papers](https:\u002F\u002Fgithub.com\u002FVoltAgent\u002Fawesome-ai-agent-papers) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FVoltAgent\u002Fawesome-ai-agent-papers?style=social) - Curated AI agent papers from 2026 — agent engineering, memory, evaluation, workflows, and autonomous systems.\n- [masamasa59\u002Fai-agent-papers](https:\u002F\u002Fgithub.com\u002Fmasamasa59\u002Fai-agent-papers) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmasamasa59\u002Fai-agent-papers?style=social) - AI agent research papers updated biweekly via automated arxiv search with curated selection.\n- [tmgthb\u002FAutonomous-Agents](https:\u002F\u002Fgithub.com\u002Ftmgthb\u002FAutonomous-Agents) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftmgthb\u002FAutonomous-Agents?style=social) - Autonomous agents research papers, updated daily.\n- [HKUST-KnowComp\u002FAwesome-LLM-Scientific-Discovery](https:\u002F\u002Fgithub.com\u002FHKUST-KnowComp\u002FAwesome-LLM-Scientific-Discovery) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FHKUST-KnowComp\u002FAwesome-LLM-Scientific-Discovery?style=social) - EMNLP 2025 survey on LLMs in scientific discovery.\n- [openags\u002FAwesome-AI-Scientist-Papers](https:\u002F\u002Fgithub.com\u002Fopenags\u002FAwesome-AI-Scientist-Papers) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fopenags\u002FAwesome-AI-Scientist-Papers?style=social) - Collection of AI Scientist \u002F Robot Scientist papers.\n- [agenticscience.github.io](https:\u002F\u002Fagenticscience.github.io\u002F) - Survey: \"From AI for Science to Agentic Science: A Survey on Autonomous Scientific Discovery.\"\n- [dspy.ai\u002FGEPA](https:\u002F\u002Fdspy.ai\u002Fapi\u002Foptimizers\u002FGEPA\u002Foverview\u002F) - DSPy integration of GEPA reflective prompt optimizer for compound AI systems.\n- [OpenAI Cookbook: Self-Evolving Agents](https:\u002F\u002Fdevelopers.openai.com\u002Fcookbook\u002Fexamples\u002Fpartners\u002Fself_evolving_agents\u002Fautonomous_agent_retraining) - Cookbook for autonomous agent retraining using GEPA-style reflective evolution.\n- [WecoAI\u002Fawesome-autoresearch](https:\u002F\u002Fgithub.com\u002FWecoAI\u002Fawesome-autoresearch) ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FWecoAI\u002Fawesome-autoresearch?style=social) - Curated list of AutoResearch use cases with verifiable traces and progress charts, organized by domain (LLM training, GPU kernels, voice agents, trading, etc.).\n\n\u003Cdiv align=\"center\">\n\n## Star History\n\n\u003Ca href=\"https:\u002F\u002Fwww.star-history.com\u002F?type=date&repos=alvinunreal%2Fawesome-autoresearch\">\n \u003Cpicture>\n   \u003Csource media=\"(prefers-color-scheme: dark)\" srcset=\"https:\u002F\u002Fapi.star-history.com\u002Fimage?repos=alvinunreal%2Fawesome-autoresearch&type=date&theme=dark&legend=top-left\" \u002F>\n   \u003Csource media=\"(prefers-color-scheme: light)\" srcset=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Falvinunreal_awesome-autoresearch_readme_cb955c9826be.png\" \u002F>\n   \u003Cimg alt=\"Star History Chart\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Falvinunreal_awesome-autoresearch_readme_cb955c9826be.png\" \u002F>\n \u003C\u002Fpicture>\n\u003C\u002Fa>\n\n## 📄 License\n\nThis list is released under [CC0-1.0](.\u002FLICENSE).\n","\u003Cdiv align=\"center\">\n\n# 🔬 令人惊叹的自动研究\n\n**一个精心策划、高信号的索引，涵盖了自主改进循环、研究代理以及受** [**karpathy\u002Fautoresearch**](https:\u002F\u002Fgithub.com\u002Fkarpathy\u002Fautoresearch) **启发的衍生项目。**\n\n[![Awesome](https:\u002F\u002Fawesome.re\u002Fbadge.svg)](https:\u002F\u002Fawesome.re)\n[![欢迎 PR](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FPRs-welcome-brightgreen.svg?style=flat-square)](.\u002FCONTRIBUTING.md)\n[![许可证：CC0-1.0](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002Flicense-CC0--1.0-blue.svg?style=flat-square)](.\u002FLICENSE)\n\n\u003Csub>由 **Boring Dystopia Development** 提供\u003C\u002Fsub>\n\n\u003Cp align=\"center\">\n  \u003Ca href=\"https:\u002F\u002Fboringdystopia.ai\u002F\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002Fboringdystopia.ai-111111?style=for-the-badge&logo=vercel&logoColor=white\" alt=\"boringdystopia.ai\" \u002F>\n  \u003C\u002Fa>&nbsp;\n  \u003Ca href=\"https:\u002F\u002Fx.com\u002Falvinunreal\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FX-@alvinunreal-000000?style=for-the-badge&logo=x&logoColor=white\" alt=\"X @alvinunreal\" \u002F>\n  \u003C\u002Fa>&nbsp;\n  \u003Ca href=\"https:\u002F\u002Ft.me\u002Fboringdystopiadevelopment\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FTelegram-Join%20channel-2CA5E0?style=for-the-badge&logo=telegram&logoColor=white\" alt=\"Telegram 加入频道\" \u002F>\n  \u003C\u002Fa>\n\u003C\u002Fp>\n\n\u003C\u002Fdiv>\n\n## 目录\n\n- [🛠️ 通用型衍生项目](#️-general-purpose-descendants)\n- [🔬 研究代理系统](#-research-agent-systems)\n- [💻 平台移植与硬件分叉](#-platform-ports-and-hardware-forks)\n- [🎯 领域特定的适配](#-domain-specific-adaptations)\n- [📊 评估与基准测试](#-evaluation--benchmarks)\n- [📈 值得关注的应用案例与报告](#-notable-use-cases-and-writeups)\n- [📚 相关资源](#-related-resources)\n- [📄 许可证](#-license)\n\n## 🛠️ 通用型衍生项目\n\n- [kayba-ai\u002Frecursive-improve](https:\u002F\u002Fgithub.com\u002Fkayba-ai\u002Frecursive-improve) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fkayba-ai\u002Frecursive-improve?style=social) - 递归式自我改进框架，代理会捕获执行轨迹、分析失败模式，并通过保留或回滚的评估机制应用针对性修复。\n- [vukrosic\u002Fauto-research](https:\u002F\u002Fgithub.com\u002Fvukrosic\u002Fauto-research) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fvukrosic\u002Fauto-research?style=social) - 面向开放自治AI研究实验室的纯文档控制平面——基于文件的操作模式，用于人类指导与代理执行。\n- [uditgoenka\u002Fautoresearch](https:\u002F\u002Fgithub.com\u002Fuditgoenka\u002Fautoresearch) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fuditgoenka\u002Fautoresearch?style=social) - Claude Code技能，将自动研究泛化为一个可重用的循环，适用于软件开发、文档编写、安全防护、产品发布、调试以及其他可衡量的目标。\n- [leo-lilinxiao\u002Fcodex-autoresearch](https:\u002F\u002Fgithub.com\u002Fleo-lilinxiao\u002Fcodex-autoresearch) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fleo-lilinxiao\u002Fcodex-autoresearch?style=social) - 原生Codex的自动研究技能，支持断点续跑、跨次运行的经验总结、可选的并行实验以及特定模式的工作流。\n- [supratikpm\u002Fgemini-autoresearch](https:\u002F\u002Fgithub.com\u002Fsupratikpm\u002Fgemini-autoresearch) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fsupratikpm\u002Fgemini-autoresearch?style=social) - Gemini CLI技能，将自动研究泛化到任何可衡量的目标。Gemini原生：利用Google搜索结果作为循环内的实时验证来源，通过--yolo --prompt实现真正的无头夜间模式，并支持1M token上下文。也可在Antigravity IDE中通过.agents\u002Fskills\u002F使用。\n- [davebcn87\u002Fpi-autoresearch](https:\u002F\u002Fgithub.com\u002Fdavebcn87\u002Fpi-autoresearch) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdavebcn87\u002Fpi-autoresearch?style=social) - `pi`扩展及仪表盘，用于持久化的实验循环、实时指标监控、置信度跟踪和可恢复的自动研究会话。\n- [drivelineresearch\u002Fautoresearch-claude-code](https:\u002F\u002Fgithub.com\u002Fdrivelineresearch\u002Fautoresearch-claude-code) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdrivelineresearch\u002Fautoresearch-claude-code?style=social) - `pi-autoresearch`的Claude Code插件\u002F技能移植版本，拥有清晰的实验循环工作流，并结合了一个具体的生物力学案例研究。\n- [greyhaven-ai\u002Fautocontext](https:\u002F\u002Fgithub.com\u002Fgreyhaven-ai\u002Fautocontext) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgreyhaven-ai\u002Fautocontext?style=social) - 闭环控制平面，用于代理的反复改进，包含评估、持久化知识、分阶段验证，以及可选的蒸馏以降低本地运行时的成本。\n- [jmilinovich\u002Fgoal-md](https:\u002F\u002Fgithub.com\u002Fjmilinovich\u002Fgoal-md) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjmilinovich\u002Fgoal-md?style=social) - 将自动研究泛化为一种`GOAL.md`模式，适用于那些要求代理必须先构建可衡量的适应度函数才能进行优化的仓库。\n- [james-s-tayler\u002Flazy-developer](https:\u002F\u002Fgithub.com\u002Fjames-s-tayler\u002Flazy-developer) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjames-s-tayler\u002Flazy-developer?style=social) - Claude Code技能，以`GOAL.md`为引擎，按优先级顺序编排自动研究流程（代码覆盖率、测试速度、构建速度、复杂性、代码行数、性能等）。支持独立运行及Ralph Mode下的多实例执行。\n- [mutable-state-inc\u002Fautoresearch-at-home](https:\u002F\u002Fgithub.com\u002Fmutable-state-inc\u002Fautoresearch-at-home) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmutable-state-inc\u002Fautoresearch-at-home?style=social) - 上游自动研究项目的协作分支，新增了实验认领、共享最佳配置同步、假设交流以及多台单GPU代理间的群体协同功能。\n- [zkarimi22\u002Fautoresearch-anything](https:\u002F\u002Fgithub.com\u002Fzkarimi22\u002Fautoresearch-anything) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fzkarimi22\u002Fautoresearch-anything?style=social) - 将自动研究泛化到**任何可衡量的指标**——系统提示词、API性能、着陆页、测试套件、配置调优、SQL查询。“只要能测量，就能优化。”\n- [Entrpi\u002Fautoresearch-everywhere](https:\u002F\u002Fgithub.com\u002FEntrpi\u002Fautoresearch-everywhere) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FEntrpi\u002Fautoresearch-everywhere?style=social) - 跨平台扩展，能够自动检测硬件配置并启动循环。“粘合剂与泛化”的自动研究部分。\n- [ShengranHu\u002FADAS](https:\u002F\u002Fgithub.com\u002FShengranHu\u002FADAS) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FShengranHu\u002FADAS?style=social) - **智能体系统的自动化设计**——ICLR 2025。元智能体通过代码编程来发明新型智能体架构。\n- [MaximeRobeyns\u002Fself_improving_coding_agent](https:\u002F\u002Fgithub.com\u002FMaximeRobeyns\u002Fself_improving_coding_agent) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FMaximeRobeyns\u002Fself_improving_coding_agent?style=social) - **SICA**：自我改进型编码智能体，能够编辑自身的代码库。ICLR 2025研讨会论文，展示了在编码基准测试上 Scaffold级别的自我改进能力。\n- [peterskoett\u002Fself-improving-agent](https:\u002F\u002Fgithub.com\u002Fpeterskoett\u002Fself-improving-agent) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fpeterskoett\u002Fself-improving-agent?style=social) - 另一种具有反思与元学习循环的自我改进型智能体架构。\n- [metauto-ai\u002FHGM](https:\u002F\u002Fgithub.com\u002Fmetauto-ai\u002FHGM) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmetauto-ai\u002FHGM?style=social) - 编码智能体的**赫胥黎-哥德尔机**——通过元级别优化提升SWE-bench性能。\n- [gepa-ai\u002Fgepa](https:\u002F\u002Fgithub.com\u002Fgepa-ai\u002Fgepa) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgepa-ai\u002Fgepa?style=social) - **GEPA（遗传-帕累托）**——ICLR 2026口头报告。一种反射式提示词进化方法，在基准测试中表现优于RL（GRPO）。利用自然语言反思，针对任意文本参数优化任意指标。\n- [MrTsepa\u002Fautoevolve](https:\u002F\u002Fgithub.com\u002FMrTsepa\u002Fautoevolve) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FMrTsepa\u002Fautoevolve?style=social) - 受GEPA启发的自对弈式自动研究：变异代码策略、两两对决评估、采用Elo\u002F布拉德利-特里评分体系、从帕累托前沿分支。智能体根据对局日志确定突变方向。可作为Claude Code技能使用。\n- [HKUDS\u002FClawTeam](https:\u002F\u002Fgithub.com\u002FHKUDS\u002FClawTeam) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FHKUDS\u002FClawTeam?style=social) - 用于自动研究的智能体 swarm 智能——生成并行的GPU研究方向，分配任务给各智能体，并汇总结果。\n- [Orchestra-Research\u002FAI-Research-SKILLs](https:\u002F\u002Fgithub.com\u002FOrchestra-Research\u002FAI-Research-SKILLs) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FOrchestra-Research\u002FAI-Research-SKILLs?style=social) - 全面的技能库，包括双环架构的自动研究编排（内层优化 + 外层综合）。\n- [WecoAI\u002Faideml](https:\u002F\u002Fgithub.com\u002FWecoAI\u002Faideml) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FWecoAI\u002Faideml?style=social) - **AIDE**：树搜索型机器学习工程智能体，通过迭代式代码生成与评估自主提升模型性能。\n- [weco.ai] - **Weco**：提供可观测性、实验追踪和托管运行服务的云平台，将自动研究循环引入生产环境。\n\n## 🔬 研究代理系统\n\n- [aiming-lab\u002FAutoResearchClaw](https:\u002F\u002Fgithub.com\u002Faiming-lab\u002FAutoResearchClaw) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Faiming-lab\u002FAutoResearchClaw?style=social) - 端到端的研究流水线，可将一个主题转化为文献综述、实验、分析、同行评审以及论文草稿；其范围比自动研究更广，但显然属于同一脉络。\n- [OpenRaiser\u002FNanoResearch](https:\u002F\u002Fgithub.com\u002FOpenRaiser\u002FNanoResearch) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FOpenRaiser\u002FNanoResearch?style=social) - 一种端到端的自主研究引擎，能够规划实验、生成代码、在本地或 SLURM 上运行任务、分析实际结果，并基于这些输出撰写论文。\n- [wanshuiyin\u002FAuto-claude-code-research-in-sleep](https:\u002F\u002Fgithub.com\u002Fwanshuiyin\u002FAuto-claude-code-research-in-sleep) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fwanshuiyin\u002FAuto-claude-code-research-in-sleep?style=social) - 面向 Claude Code 及其他代理的以 Markdown 优先的研究工作流，核心围绕自主文献综述、实验、论文迭代和跨模型批判展开。\n- [Sibyl-Research-Team\u002FAutoResearch-SibylSystem](https:\u002F\u002Fgithub.com\u002FSibyl-Research-Team\u002FAutoResearch-SibylSystem) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FSibyl-Research-Team\u002FAutoResearch-SibylSystem?style=social) - 基于 Claude Code 构建的完全自主 AI 科学家，具有明确的自动研究血统，支持多代理研究迭代、GPU 实验执行以及自我演进的外层循环。\n- [eimenhmdt\u002Fautoresearcher](https:\u002F\u002Fgithub.com\u002Feimenhmdt\u002Fautoresearcher) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Feimenhmdt\u002Fautoresearcher?style=social) - 早期开源科学工作流自动化工具包，目前专注于文献综述生成，目标是实现更广泛的自主研究。\n- [hyperspaceai\u002Fagi](https:\u002F\u002Fgithub.com\u002Fhyperspaceai\u002Fagi) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fhyperspaceai\u002Fagi?style=social) - 分布式、点对点的研究网络，其中自主代理运行实验、共享发现、维护 CRDT 排行榜，并将结果跨多个研究领域归档至 GitHub。\n- [SakanaAI\u002FAI-Scientist](https:\u002F\u002Fgithub.com\u002FSakanaAI\u002FAI-Scientist) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FSakanaAI\u002FAI-Scientist?style=social) - **AI 科学家**：首个用于完全自动科学发现的综合系统。从想法生成到论文写作，几乎无需人工干预。\n- [SakanaAI\u002FAI-Scientist-v2](https:\u002F\u002Fgithub.com\u002FSakanaAI\u002FAI-Scientist-v2) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FSakanaAI\u002FAI-Scientist-v2?style=social) - 通过代理树搜索实现车间级别的自动化科学发现。去除了 v1 中的模板依赖，使其能够泛化应用于各个研究领域。\n- [HKUDS\u002FAI-Researcher](https:\u002F\u002Fgithub.com\u002FHKUDS\u002FAI-Researcher) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FHKUDS\u002FAI-Researcher?style=social) - NeurIPS 2025 论文。完整的端到端研究自动化：假设 → 实验 → 手稿 → 同行评审。生产版本位于 [novix.science](https:\u002F\u002Fnovix.science\u002Fchat)。\n- [openags\u002FAuto-Research](https:\u002F\u002Fgithub.com\u002Fopenags\u002FAuto-Research) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fopenags\u002FAuto-Research?style=social) - **OpenAGS**：协调一支 AI 代理团队，覆盖整个研究生命周期——文献综述、假设生成、实验、手稿撰写和同行评审。\n- [SamuelSchmidgall\u002FAgentLaboratory](https:\u002F\u002Fgithub.com\u002FSamuelSchmidgall\u002FAgentLaboratory) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FSamuelSchmidgall\u002FAgentLaboratory?style=social) - 端到端的自主研究工作流：想法 → 文献综述 → 实验 → 报告。支持自主模式和协作辅助模式。\n- [AgentRxiv](https:\u002F\u002Fagentrxiv.github.io\u002F) - 一个协作式的自主研究框架，其中各代理实验室共享预印本服务器，以便相互迭代地推进工作。\n- [JinheonBaek\u002FResearchAgent](https:\u002F\u002Fgithub.com\u002FJinheonBaek\u002FResearchAgent) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FJinheonBaek\u002FResearchAgent?style=social) - 利用 LLM 在科学文献基础上进行迭代式研究思路生成。采用多代理综述与反馈循环。\n- [du-nlp-lab\u002FMLR-Copilot](https:\u002F\u002Fgithub.com\u002Fdu-nlp-lab\u002FMLR-Copilot) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fdu-nlp-lab\u002FMLR-Copilot?style=social) - 自主 ML 研究框架——生成想法、实施实验、分析结果。\n- [MASWorks\u002FML-Agent](https:\u002F\u002Fgithub.com\u002FMASWorks\u002FML-Agent) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FMASWorks\u002FML-Agent?style=social) - 用于自主 ML 工程的强化学习代理。通过试错学习来提升模型性能。\n- [PouriaRouzrokh\u002FLatteReview](https:\u002F\u002Fgithub.com\u002FPouriaRouzrokh\u002FLatteReview) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FPouriaRouzrokh\u002FLatteReview?style=social) - 低代码 Python 包，用于通过 AI 驱动的代理实现**自动化系统性文献综述**。\n- [LitLLM\u002FLitLLM](https:\u002F\u002Fgithub.com\u002FLitLLM\u002FLitLLM) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FLitLLM\u002FLitLLM?style=social) - 基于 RAG 的 AI 驱动文献综述助手，用于在学术写作中生成准确且结构良好的相关工作部分。\n- [Agent Laboratory](https:\u002F\u002Fagentlaboratory.github.io\u002F) - 三阶段研究流程：文献综述 → 实验 → 报告撰写，每个阶段配备专门的代理。\n- [WecoAI\u002Faideml](https:\u002F\u002Fgithub.com\u002FWecoAI\u002Faideml) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FWecoAI\u002Faideml?style=social) - **AIDE**：AI 驱动探索——基于树搜索的 ML 工程代理，可自动化实验设计、代码生成和评估。将 ML 工程视为针对任意指标的代码优化。\n\n## 💻 平台移植与硬件分支\n\n- [gianfrancopiana\u002Fopenclaw-autoresearch](https:\u002F\u002Fgithub.com\u002Fgianfrancopiana\u002Fopenclaw-autoresearch) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgianfrancopiana\u002Fopenclaw-autoresearch?style=social) - OpenClaw 对 pi-autoresearch 的移植；适用于任何优化目标、带有统计置信度评分的自主实验循环。\n- [miolini\u002Fautoresearch-macos](https:\u002F\u002Fgithub.com\u002Fmiolini\u002Fautoresearch-macos) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmiolini\u002Fautoresearch-macos?style=social) - 被广泛采用的 macOS 分支，将上游 autoresearch 适配到 Apple Silicon \u002F MPS，同时保留原始的循环结构。\n- [trevin-creator\u002Fautoresearch-mlx](https:\u002F\u002Fgithub.com\u002Ftrevin-creator\u002Fautoresearch-mlx) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftrevin-creator\u002Fautoresearch-mlx?style=social) - MLX 原生的 Apple Silicon 移植版，保持上游固定预算的 `val_bpb` 循环，同时完全移除 PyTorch\u002FCUDA 依赖。\n- [jsegov\u002Fautoresearch-win-rtx](https:\u002F\u002Fgithub.com\u002Fjsegov\u002Fautoresearch-win-rtx) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjsegov\u002Fautoresearch-win-rtx?style=social) - 面向消费级 NVIDIA GPU 的 Windows 原生 RTX 分支，设有显存下限，并提供实用的桌面配置路径。\n- [iii-hq\u002Fn-autoresearch](https:\u002F\u002Fgithub.com\u002Fiii-hq\u002Fn-autoresearch) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fiii-hq\u002Fn-autoresearch?style=social) - 多 GPU 自动研究基础设施，具备结构化的实验跟踪、自适应搜索策略、崩溃恢复功能，以及围绕经典 `train.py` 循环的可查询编排能力。\n- [lucasgelfond\u002Fautoresearch-webgpu](https:\u002F\u002Fgithub.com\u002Flucasgelfond\u002Fautoresearch-webgpu) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flucasgelfond\u002Fautoresearch-webgpu?style=social) - 浏览器\u002FWebGPU 移植版，允许代理生成训练代码、在浏览器中运行实验，并将结果反馈回循环，无需 Python 环境。\n- [tonitangpotato\u002Fautoresearch-engram](https:\u002F\u002Fgithub.com\u002Ftonitangpotato\u002Fautoresearch-engram) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftonitangpotato\u002Fautoresearch-engram?style=social) - 带有 **持久认知记忆** 的分支——跨会话知识的频率加权检索，以提升实验的连续性。\n- **Colab\u002FKaggle T4 移植版** - 将 autoresearch 适配到免费的 T4 GPU（Google Colab \u002F Kaggle），无需任何成本和本地设置。关键改动：将 Flash Attention 3 替换为 PyTorch SDPA，并移除仅 H100 支持的内核依赖。（[上游议题 #208](https:\u002F\u002Fgithub.com\u002Fkarpathy\u002Fautoresearch\u002Fissues\u002F208)）\n- [ArmanJR-Lab\u002Fautoautoresearch](https:\u002F\u002Fgithub.com\u002FArmanJR-Lab\u002Fautoautoresearch) - Jetson AGX Orin 移植版，配备一个 **导演**——一个 Go 二进制文件，充当“创意总监”，通过引入新颖性内容（arXiv 论文 + DeepSeek Reasoner）来帮助跳出局部最优。包含多实验对比（基准 vs 导演引导），并提供详细的停滞分析。\n\n## 🎯 领域特定的适配\n\n- [mattprusak\u002Fautoresearch-genealogy](https:\u002F\u002Fgithub.com\u002Fmattprusak\u002Fautoresearch-genealogy) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmattprusak\u002Fautoresearch-genealogy?style=social) - 将 autoresearch 模式应用于家谱研究，利用结构化提示、档案指南、来源核查和保险库工作流，迭代扩展并验证家族历史研究。\n- [ArchishmanSengupta\u002Fautovoiceevals](https:\u002F\u002Fgithub.com\u002FArchishmanSengupta\u002Fautovoiceevals) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FArchishmanSengupta\u002Fautovoiceevals?style=social) - 使用对抗性呼叫者及“保留或回滚”提示编辑，强化 Vapi、Smallest AI 和 ElevenLabs 中的语音 AI 代理。\n- [chrisworsey55\u002Fatlas-gic](https:\u002F\u002Fgithub.com\u002Fchrisworsey55\u002Fatlas-gic) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fchrisworsey55\u002Fatlas-gic?style=social) - 将 autoresearch 的“保留或回滚”循环应用于交易代理，以滚动夏普比而非模型损失为目标优化提示和投资组合编排。\n- [RightNow-AI\u002Fautokernel](https:\u002F\u002Fgithub.com\u002FRightNow-AI\u002Fautokernel) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FRightNow-AI\u002Fautokernel?style=social) - 将 autoresearch 循环应用于 GPU 内核优化：剖析瓶颈、修改单个内核、基准测试、保留或回滚，反复进行。\n- [Rkcr7\u002Fautoresearch-sudoku](https:\u002F\u002Fgithub.com\u002FRkcr7\u002Fautoresearch-sudoku) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FRkcr7\u002Fautoresearch-sudoku?style=social) - 增强型 autoresearch 工作流，AI 代理迭代重写并基准测试 Rust 编写的数独求解器，在硬基准测试集中最终超越了人类编写的领先求解器。\n- [jeongph\u002Fautospec](https:\u002F\u002Fgithub.com\u002Fjeongph\u002Fautospec) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fjeongph\u002Fautospec?style=social) - 读取自然语言业务规则，通过“保留或回滚”循环自主构建带有测试的 Spring Boot 服务。使用 Gradle 构建和 JUnit XML 进行评估。从 119 行骨架代码发展至 950 行，历经 5 个周期。\n\n## 📊 评估与基准测试\n\n- [snap-stanford\u002FMLAgentBench](https:\u002F\u002Fgithub.com\u002Fsnap-stanford\u002FMLAgentBench) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fsnap-stanford\u002FMLAgentBench?style=social) - 用于评估 AI 代理在机器学习实验任务上表现的基准测试套件。包含从 CIFAR-10 到 BabyLM 的 13 项任务。\n- [openai\u002Fmle-bench](https:\u002F\u002Fgithub.com\u002Fopenai\u002Fmle-bench) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fopenai\u002Fmle-bench?style=social) - OpenAI 用于衡量 AI 代理在机器学习工程方面表现的基准测试。\n- [chchenhui\u002Fmlrbench](https:\u002F\u002Fgithub.com\u002Fchchenhui\u002Fmlrbench) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fchchenhui\u002Fmlrbench?style=social) - MLR-Bench：评估 AI 代理在开放式机器学习研究中的表现。包含来自 NeurIPS\u002FICLR\u002FICML 研讨会的 201 项任务。\n- [gersteinlab\u002FML-Bench](https:\u002F\u002Fgithub.com\u002Fgersteinlab\u002FML-Bench) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fgersteinlab\u002FML-Bench?style=social) - 在仓库级别的代码上评估 LLM 和代理处理机器学习任务的能力。\n- [THUDM\u002FAgentBench](https:\u002F\u002Fgithub.com\u002FTHUDM\u002FAgentBench) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FTHUDM\u002FAgentBench?style=social) - 针对 LLM 作为代理的全面基准测试，覆盖 8 个不同环境。ICLR 2024。\n\n## 📈 值得关注的应用案例与技术文章\n\n- **Shopify Liquid 优化** - Tobi Lütke 分享了针对 Shopify 的 Liquid 引擎进行的自动研究式优化实验，公开的追踪数据展示了解析与渲染速度的显著提升以及内存分配的大幅减少。（[推文](https:\u002F\u002Fx.com\u002Ftobi\u002Fstatus\u002F2032212531846971413)，[包含追踪数据的 PR](https:\u002F\u002Fgithub.com\u002FShopify\u002Fliquid\u002Fpull\u002F2056)）\n- **Driveline 棒球生物力学** - 公开的自动研究式实验循环，用于从生物力学数据预测投球速度，并报告了模型性能的显著提升。（[推文](https:\u002F\u002Fx.com\u002Fdrivelinekyle\u002Fstatus\u002F2032242254035992610)）\n- **网球 XGBoost 预测 + 奖励欺骗分析** - Nick Oak 记录了一个受自动研究启发的网球比赛预测循环，详细说明了优化设置中的问题所在。（[博客](https:\u002F\u002Fnickoak.com\u002Fposts\u002Ftennis-xgboost-autoresearch\u002F) · [代码库](https:\u002F\u002Fgithub.com\u002Fbuildoak\u002Ftennis-xgboost-autoresearch) · [游戏分支](https:\u002F\u002Fgithub.com\u002Fbuildoak\u002Ftennis-xgboost-autoresearch\u002Ftree\u002Farchived\u002Fgamed-iterations)）\n- **维苏威火山挑战赛墨水检测群体智能** - 多智能体实验循环应用于古代卷轴墨水检测任务，相关文章深入探讨了跨卷轴泛化能力的提升。（[博客](https:\u002F\u002Fscrollprize.substack.com\u002Fp\u002Fwe-are-cooking)）\n- **地球系统模型优化** - 混合工作流中，LLM 提出方程结构，搜索过程调优参数，展示了自动研究模式如何扩展到科学建模领域。（[推文](https:\u002F\u002Fx.com\u002Fdevparagiri\u002Fstatus\u002F2035075626273739068)，[博客](https:\u002F\u002Fparagiri.com\u002Fblog\u002F2026\u002Fautoresearch-earth-system-models\u002F)）\n- **代理型研究者** - 论文：“数学与机器学习中 AI 辅助研究的实用指南”。文中将自动研究视为自动化 ML 实验流水线的典型范例。（[arXiv 2603.15914](https:\u002F\u002Farxiv.org\u002Fhtml\u002F2603.15914)）\n- **将自动研究扩展至 GPU 集群** - SkyPilot 博客介绍了如何利用云编排工具在 H100\u002FH200 集群上运行自动研究。（[SkyPilot 博客](https:\u002F\u002Fblog.skypilot.co\u002Fscaling-autoresearch\u002F)）\n- **自我改进型编码代理** - Addy Osmani 的实用指南，介绍如何使用 Claude Code 构建自我改进型代理循环。（[文章](https:\u002F\u002Faddyosmani.com\u002Fblog\u002Fself-improving-agents\u002F)）\n- **autoresearch@home：分布式 AI 研究** - 将 SETI@home 模式应用于自动研究——贡献 GPU 算力以参与集体模型优化。（[Ensue 博客](https:\u002F\u002Fensue.dev\u002Fblog\u002Fautoresearch-at-home\u002F)）\n- **Claude Code + 自动研究用于自我提升技能** - MindStudio 指南，介绍如何结合自动研究模式使用 Claude Code 构建自我改进型 AI 技能。（[文章](https:\u002F\u002Fwww.mindstudio.ai\u002Fblog\u002Fclaude-code-autoresearch-self-improving-skills)）\n- **一夜完成 100 个 ML 实验** - Particula 的技术解析，探讨了该方法在不同领域的通用性应用。（[文章](https:\u002F\u002Fparticula.tech\u002Fblog\u002Fkarpathy-autoresearch-autonomous-ml-experiments)）\n- **产品经理的自动研究指南** - 覆盖设置、社区分支及实际应用的产品经理指南。（[文章](https:\u002F\u002Fwww.news.aakashg.com\u002Fp\u002Fautoresearch-guide-for-pms)）\n- **自动研究 101：构建者手册** - Substack 上的一篇深度文章，通过具体示例讲解如何将自动研究模式应用于提示词、代理和工作流。（[文章](https:\u002F\u002Fsidsaladi.substack.com\u002Fp\u002Fautoresearch-101-builders-playbook)）\n- **Kingy AI 技术解析** - 详细剖析了自动研究循环的架构、变异算子及适应度函数的设计。（[文章](https:\u002F\u002Fkingy.ai\u002Fai\u002Fautoresearch-karpathys-minimal-agent-loop-for-autonomous-llm-experimentation\u002F)）\n- **财富杂志专题报道** - 从商业和行业角度阐述了为什么自动研究对自主 AI 代理的未来发展至关重要。（[文章](https:\u002F\u002Ffortune.com\u002F2026\u002F03\u002F17\u002Fandrej-karpathy-loop-autonomous-ai-agents-future\u002F)）\n\n## 📚 相关资源\n\n精选的 AI 代理、自主系统及自动化研究相关的列表与论文集：\n\n- [ai-agents-2030\u002Fawesome-deep-research-agent](https:\u002F\u002Fgithub.com\u002Fai-agents-2030\u002Fawesome-deep-research-agent) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fai-agents-2030\u002Fawesome-deep-research-agent?style=social) - 深度研究代理相关论文与系统的精选列表。\n- [YoungDubbyDu\u002FLLM-Agent-Optimization](https:\u002F\u002Fgithub.com\u002FYoungDubbyDu\u002FLLM-Agent-Optimization) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FYoungDubbyDu\u002FLLM-Agent-Optimization?style=social) - 关于 LLM 代理优化方法的论文。\n- [VoltAgent\u002Fawesome-ai-agent-papers](https:\u002F\u002Fgithub.com\u002FVoltAgent\u002Fawesome-ai-agent-papers) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FVoltAgent\u002Fawesome-ai-agent-papers?style=social) - 2026 年精选的 AI 代理论文——涵盖代理工程、记忆、评估、工作流及自主系统。\n- [masamasa59\u002Fai-agent-papers](https:\u002F\u002Fgithub.com\u002Fmasamasa59\u002Fai-agent-papers) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmasamasa59\u002Fai-agent-papers?style=social) - 通过自动化 arXiv 搜索并精选后，每两周更新一次的 AI 代理研究论文。\n- [tmgthb\u002FAutonomous-Agents](https:\u002F\u002Fgithub.com\u002Ftmgthb\u002FAutonomous-Agents) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Ftmgthb\u002FAutonomous-Agents?style=social) - 每日更新的自主代理研究论文。\n- [HKUST-KnowComp\u002FAwesome-LLM-Scientific-Discovery](https:\u002F\u002Fgithub.com\u002FHKUST-KnowComp\u002FAwesome-LLM-Scientific-Discovery) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FHKUST-KnowComp\u002FAwesome-LLM-Scientific-Discovery?style=social) - EMNLP 2025 关于 LLM 在科学发现中的综述。\n- [openags\u002FAwesome-AI-Scientist-Papers](https:\u002F\u002Fgithub.com\u002Fopenags\u002FAwesome-AI-Scientist-Papers) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fopenags\u002FAwesome-AI-Scientist-Papers?style=social) - AI 科学家 \u002F 机器人科学家相关论文的集合。\n- [agenticscience.github.io](https:\u002F\u002Fagenticscience.github.io\u002F) - 综述：“从 AI for Science 到 Agentic Science：关于自主科学发现的综述”。\n- [dspy.ai\u002FGEPA](https:\u002F\u002Fdspy.ai\u002Fapi\u002Foptimizers\u002FGEPA\u002Foverview\u002F) - DSPy 集成的 GEPA 反思式提示优化器，适用于复合型 AI 系统。\n- [OpenAI Cookbook: Self-Evolving Agents](https:\u002F\u002Fdevelopers.openai.com\u002Fcookbook\u002Fexamples\u002Fpartners\u002Fself_evolving_agents\u002Fautonomous_agent_retraining) - 使用 GEPA 式反思进化进行自主代理再训练的食谱。\n- [WecoAI\u002Fawesome-autoresearch](https:\u002F\u002Fgithub.com\u002FWecoAI\u002Fawesome-autoresearch) ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FWecoAI\u002Fawesome-autoresearch?style=social) - 精选的自动研究应用案例，附有可验证的追踪数据和进展图表，按领域分类（LLM 训练、GPU 核心、语音代理、交易等）。\n\n\u003Cdiv align=\"center\">\n\n## 星标历史\n\n\u003Ca href=\"https:\u002F\u002Fwww.star-history.com\u002F?type=date&repos=alvinunreal%2Fawesome-autoresearch\">\n \u003Cpicture>\n   \u003Csource media=\"(prefers-color-scheme: dark)\" srcset=\"https:\u002F\u002Fapi.star-history.com\u002Fimage?repos=alvinunreal%2Fawesome-autoresearch&type=date&theme=dark&legend=top-left\" \u002F>\n   \u003Csource media=\"(prefers-color-scheme: light)\" srcset=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Falvinunreal_awesome-autoresearch_readme_cb955c9826be.png\" \u002F>\n   \u003Cimg alt=\"星标历史图表\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Falvinunreal_awesome-autoresearch_readme_cb955c9826be.png\" \u002F>\n \u003C\u002Fpicture>\n\u003C\u002Fa>\n\n## 📄 许可证\n\n本列表采用 [CC0-1.0](.\u002FLICENSE) 协议发布。","# Awesome Autoresearch 快速上手指南\n\n**Awesome Autoresearch** 并非单一的可安装软件包，而是一个精选的开源项目索引集合，收录了受 `karpathy\u002Fautoresearch` 启发的各类自主改进循环、研究智能体（Research Agents）及其衍生项目。本指南将指导你如何根据需求选择合适的项目并快速启动。\n\n## 环境准备\n\n由于列表中的项目分别基于不同的 AI 模型和运行环境，请根据你的目标项目准备以下基础环境：\n\n### 1. 系统要求\n- **操作系统**: Linux (推荐 Ubuntu 20.04+), macOS, 或 Windows (WSL2)。\n- **硬件**: \n  - 大多数项目需要访问云端 GPU 或本地高性能 GPU (NVIDIA RTX 3090\u002F4090 或更高)。\n  - 部分轻量级脚本仅需 CPU 即可运行，但效率较低。\n- **网络**: 需能访问 GitHub、Hugging Face 及对应的 LLM API 服务（如 OpenAI, Anthropic, Google）。\n\n### 2. 前置依赖\n绝大多数项目依赖以下通用工具：\n- **Python**: 版本 3.10 或更高。\n- **Git**: 用于克隆仓库。\n- **LLM API Key**: 根据所选项目，需准备对应的 API 密钥（例如 `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, `GOOGLE_API_KEY`）。\n- **Node.js** (可选): 部分基于 Claude Code 的项目可能需要 Node.js 环境。\n\n> **国内开发者提示**：\n> - 若无法直接访问 GitHub，可配置国内镜像源或使用加速代理。\n> - 若调用海外 LLM API 受限，建议检查项目是否支持接入国内大模型接口（部分分支已适配），或使用合规的网络代理方案。\n\n## 安装步骤\n\n由于这是一个项目列表，你需要先选择一个具体的子项目进行安装。以下以两个热门方向为例：\n\n### 场景 A：安装通用型自主改进框架 (以 `recursive-improve` 为例)\n\n该项目专注于递归自我改进，适合代码优化任务。\n\n```bash\n# 1. 克隆仓库\ngit clone https:\u002F\u002Fgithub.com\u002Fkayba-ai\u002Frecursive-improve.git\ncd recursive-improve\n\n# 2. 创建虚拟环境 (推荐)\npython -m venv venv\nsource venv\u002Fbin\u002Factivate  # Windows 用户请使用: venv\\Scripts\\activate\n\n# 3. 安装依赖\npip install -r requirements.txt\n\n# 4. 配置 API 密钥\nexport ANTHROPIC_API_KEY=\"your-api-key-here\"\n# 或在项目根目录创建 .env 文件填入密钥\n```\n\n### 场景 B：安装端到端科研智能体 (以 `NanoResearch` 为例)\n\n该项目适合自动化文献综述、实验执行及论文撰写。\n\n```bash\n# 1. 克隆仓库\ngit clone https:\u002F\u002Fgithub.com\u002FOpenRaiser\u002FNanoResearch.git\ncd NanoResearch\n\n# 2. 安装依赖 (通常使用 Poetry 或 Pip)\npip install -e .\n\n# 3. 配置环境\n# 根据 README 指示，配置 SLURM (如需集群) 或本地运行参数\n# 设置必要的 LLM 密钥\nexport OPENAI_API_KEY=\"your-api-key-here\"\n```\n\n> **注意**：每个子项目的具体依赖可能不同，请务必在进入目录后阅读该子项目的 `README.md` 确认特有的安装指令。\n\n## 基本使用\n\n安装完成后，通常通过命令行触发自主循环。以下是两种典型的使用模式：\n\n### 1. 定义目标并启动循环 (通用模式)\n\n大多数项目需要一个明确的目标文件或命令行参数来定义“可衡量的指标”。\n\n**示例：使用 `recursive-improve` 优化代码**\n\n```bash\n# 运行主脚本，指定目标文件和优化指标\npython main.py --target .\u002Fsrc\u002Fmain.py --metric \"test_coverage\" --max-iterations 5\n```\n*智能体将自动读取代码，运行测试，分析失败模式，应用修复并评估结果，循环执行直到达到最大次数或满足指标。*\n\n### 2. 启动全自动科研流程 (科研模式)\n\n**示例：使用 `NanoResearch` 生成研究论文**\n\n```bash\n# 启动研究任务，输入研究主题\nnanoresearch run --topic \"Efficient Fine-tuning of LLMs on Edge Devices\" --output-dir .\u002Fresults\n```\n*系统将自动规划实验、生成代码、在本地或集群运行任务、分析数据并起草论文草稿。*\n\n### 3. 使用 Claude Code 技能 (插件模式)\n\n部分项目（如 `uditgoenka\u002Fautoresearch`）是作为 Claude Code 的技能存在的。\n\n```bash\n# 在终端中调用 Claude Code 并加载技能\nclaude --skill autoresearch --prompt \"Optimize the build speed of this repository\"\n```\n\n---\n\n**下一步建议**：\n浏览 [Awesome Autoresearch 原始仓库](https:\u002F\u002Fgithub.com\u002Fboringdystopia\u002Fawesome-autoresearch) 的 \"Contents\" 部分，根据你的具体需求（如：特定硬件适配、特定领域优化、评估基准等）选择最适合的子项目进行深入探索。","某初创团队的后端工程师需要在周末紧急优化一个高延迟的微服务模块，但缺乏足够的时间进行多轮手动调试与验证。\n\n### 没有 awesome-autoresearch 时\n- **试错成本高昂**：工程师必须手动修改代码、运行测试、分析报错日志，再重复此过程，单次迭代耗时数小时。\n- **夜间中断频繁**：遇到复杂 Bug 时，无法在无人值守的情况下自动探索解决方案，导致研发进度被迫停滞过夜。\n- **经验难以沉淀**：每次修复都是“一次性”操作，失败的尝试和成功的策略未被系统记录，团队无法复用历史调试智慧。\n- **验证手段单一**：仅依赖本地单元测试，缺乏像 Google Search 实时 grounding 这样的外部信息源来辅助验证假设。\n\n### 使用 awesome-autoresearch 后\n- **闭环自动迭代**：利用 `recursive-improve` 或 `gemini-autoresearch` 等衍生工具，AI 代理自动捕获执行轨迹、分析失败模式并应用针对性修复，将迭代周期缩短至分钟级。\n- **全天候无人值守**：通过 `--yolo --prompt` 等无头模式，系统在夜间自主运行并行实验，次日清晨直接交付经过验证的优化代码。\n- **跨会话知识累积**：工具自动记录每轮运行的教训与指标（如 `codex-autoresearch` 的 resume 支持），让后续任务能站在之前的“肩膀”上快速启动。\n- **多维实时验证**：集成实时搜索与动态指标监控（如 `pi-autoresearch` 仪表盘），确保修复方案不仅通过测试，更符合生产环境的实际表现。\n\nawesome-autoresearch 将原本依赖人工直觉的线性调试过程，转化为可自我进化、全天候运行的自动化科研闭环，极大释放了开发者的创新潜能。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Falvinreal_awesome-autoresearch_82ace005.png","alvinreal","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Falvinreal_b4ada5e9.jpg",null,"https:\u002F\u002Fgithub.com\u002Falvinreal",1288,106,"2026-04-16T16:31:50","NOASSERTION","","部分子项目（如 mutable-state-inc\u002Fautoresearch-at-home, HKUDS\u002FClawTeam）提及需要单 GPU 或多 GPU 环境以运行并行实验，但本项目作为索引列表无统一显卡型号、显存或 CUDA 版本要求。","未说明",{"notes":86,"python":84,"dependencies":87},"awesome-autoresearch 本身是一个 curated list（精选索引），而非单一的可执行软件工具。它列出了多个受 karpathy\u002Fautoresearch 启发的独立开源项目、框架和技能（如基于 Claude Code、Gemini CLI 的插件）。因此，具体的操作系统、GPU、内存、Python 版本及依赖库需求取决于用户选择运行的列表中的哪个具体子项目。部分项目设计为在无头模式（headless）下过夜运行，或利用 Google Search 进行实时验证。",[],[89,13,35],"其他",[91,92,93,94,95,96,97,98,99,100,101,102,103,104,105],"agentic-systems","ai-agents","ai-research","ai-tools","autonomous-agents","autoresearch","awesome-list","claude-code","llm-agents","research-agents","scientific-discovery","self-improving-systems","experiment-loops","karpathy","karpathy-inspired","2026-03-27T02:49:30.150509","2026-04-17T08:26:48.469820",[],[]]