[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-vietnh1009--Super-mario-bros-A3C-pytorch":3,"tool-vietnh1009--Super-mario-bros-A3C-pytorch":64},[4,17,27,35,43,56],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":16},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,3,"2026-04-05T11:01:52",[13,14,15],"开发框架","图像","Agent","ready",{"id":18,"name":19,"github_repo":20,"description_zh":21,"stars":22,"difficulty_score":23,"last_commit_at":24,"category_tags":25,"status":16},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",140436,2,"2026-04-05T23:32:43",[13,15,26],"语言模型",{"id":28,"name":29,"github_repo":30,"description_zh":31,"stars":32,"difficulty_score":23,"last_commit_at":33,"category_tags":34,"status":16},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",107662,"2026-04-03T11:11:01",[13,14,15],{"id":36,"name":37,"github_repo":38,"description_zh":39,"stars":40,"difficulty_score":23,"last_commit_at":41,"category_tags":42,"status":16},3704,"NextChat","ChatGPTNextWeb\u002FNextChat","NextChat 是一款轻量且极速的 AI 助手，旨在为用户提供流畅、跨平台的大模型交互体验。它完美解决了用户在多设备间切换时难以保持对话连续性，以及面对众多 AI 模型不知如何统一管理的痛点。无论是日常办公、学习辅助还是创意激发，NextChat 都能让用户随时随地通过网页、iOS、Android、Windows、MacOS 或 Linux 端无缝接入智能服务。\n\n这款工具非常适合普通用户、学生、职场人士以及需要私有化部署的企业团队使用。对于开发者而言，它也提供了便捷的自托管方案，支持一键部署到 Vercel 或 Zeabur 等平台。\n\nNextChat 的核心亮点在于其广泛的模型兼容性，原生支持 Claude、DeepSeek、GPT-4 及 Gemini Pro 等主流大模型，让用户在一个界面即可自由切换不同 AI 能力。此外，它还率先支持 MCP（Model Context Protocol）协议，增强了上下文处理能力。针对企业用户，NextChat 提供专业版解决方案，具备品牌定制、细粒度权限控制、内部知识库整合及安全审计等功能，满足公司对数据隐私和个性化管理的高标准要求。",87618,"2026-04-05T07:20:52",[13,26],{"id":44,"name":45,"github_repo":46,"description_zh":47,"stars":48,"difficulty_score":23,"last_commit_at":49,"category_tags":50,"status":16},2268,"ML-For-Beginners","microsoft\u002FML-For-Beginners","ML-For-Beginners 是由微软推出的一套系统化机器学习入门课程，旨在帮助零基础用户轻松掌握经典机器学习知识。这套课程将学习路径规划为 12 周，包含 26 节精炼课程和 52 道配套测验，内容涵盖从基础概念到实际应用的完整流程，有效解决了初学者面对庞大知识体系时无从下手、缺乏结构化指导的痛点。\n\n无论是希望转型的开发者、需要补充算法背景的研究人员，还是对人工智能充满好奇的普通爱好者，都能从中受益。课程不仅提供了清晰的理论讲解，还强调动手实践，让用户在循序渐进中建立扎实的技能基础。其独特的亮点在于强大的多语言支持，通过自动化机制提供了包括简体中文在内的 50 多种语言版本，极大地降低了全球不同背景用户的学习门槛。此外，项目采用开源协作模式，社区活跃且内容持续更新，确保学习者能获取前沿且准确的技术资讯。如果你正寻找一条清晰、友好且专业的机器学习入门之路，ML-For-Beginners 将是理想的起点。",84991,"2026-04-05T10:45:23",[14,51,52,53,15,54,26,13,55],"数据工具","视频","插件","其他","音频",{"id":57,"name":58,"github_repo":59,"description_zh":60,"stars":61,"difficulty_score":10,"last_commit_at":62,"category_tags":63,"status":16},3128,"ragflow","infiniflow\u002Fragflow","RAGFlow 是一款领先的开源检索增强生成（RAG）引擎，旨在为大语言模型构建更精准、可靠的上下文层。它巧妙地将前沿的 RAG 技术与智能体（Agent）能力相结合，不仅支持从各类文档中高效提取知识，还能让模型基于这些知识进行逻辑推理和任务执行。\n\n在大模型应用中，幻觉问题和知识滞后是常见痛点。RAGFlow 通过深度解析复杂文档结构（如表格、图表及混合排版），显著提升了信息检索的准确度，从而有效减少模型“胡编乱造”的现象，确保回答既有据可依又具备时效性。其内置的智能体机制更进一步，使系统不仅能回答问题，还能自主规划步骤解决复杂问题。\n\n这款工具特别适合开发者、企业技术团队以及 AI 研究人员使用。无论是希望快速搭建私有知识库问答系统，还是致力于探索大模型在垂直领域落地的创新者，都能从中受益。RAGFlow 提供了可视化的工作流编排界面和灵活的 API 接口，既降低了非算法背景用户的上手门槛，也满足了专业开发者对系统深度定制的需求。作为基于 Apache 2.0 协议开源的项目，它正成为连接通用大模型与行业专有知识之间的重要桥梁。",77062,"2026-04-04T04:44:48",[15,14,13,26,54],{"id":65,"github_repo":66,"name":67,"description_en":68,"description_zh":69,"ai_summary_zh":69,"readme_en":70,"readme_zh":71,"quickstart_zh":72,"use_case_zh":73,"hero_image_url":74,"owner_login":75,"owner_name":76,"owner_avatar_url":77,"owner_bio":78,"owner_company":79,"owner_location":80,"owner_email":81,"owner_twitter":81,"owner_website":81,"owner_url":82,"languages":83,"stars":88,"forks":89,"last_commit_at":90,"license":91,"difficulty_score":10,"env_os":92,"env_gpu":93,"env_ram":94,"env_deps":95,"category_tags":103,"github_topics":104,"view_count":10,"oss_zip_url":81,"oss_zip_packed_at":81,"status":16,"created_at":112,"updated_at":113,"faqs":114,"releases":145},1178,"vietnh1009\u002FSuper-mario-bros-A3C-pytorch","Super-mario-bros-A3C-pytorch","Asynchronous Advantage Actor-Critic (A3C) algorithm for Super Mario Bros","Super-mario-bros-A3C-pytorch 是一个基于 PyTorch 实现的异步优势演员-评论家算法（A3C），用于训练智能体在《超级马里奥兄弟》游戏中自主学习和玩耍。它简化了传统实现中复杂的预处理和环境配置步骤，让开发者能更专注于算法本身。通过模拟“演员”与“评论家”的协作关系，该工具帮助智能体在不断试错中提升游戏表现，最终学会如何完成关卡目标。适合对强化学习感兴趣的开发者和研究人员使用，尤其适合希望快速上手 A3C 算法并应用于具体任务的用户。其技术亮点在于简洁的代码结构和对论文方法的严格遵循。","# [PYTORCH] Asynchronous Advantage Actor-Critic (A3C) for playing Super Mario Bros\n\n## Introduction\n\nHere is my python source code for training an agent to play super mario bros. By using Asynchronous Advantage Actor-Critic (A3C) algorithm introduced in the paper **Asynchronous Methods for Deep Reinforcement Learning** [paper](https:\u002F\u002Farxiv.org\u002Fabs\u002F1602.01783).\n\u003Cp align=\"center\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_683fcabc6dee.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_e390b4669ec5.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_7c81348418df.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_f1ffcd1c071c.gif\" width=\"200\">\u003Cbr\u002F>\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_66ac284830eb.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_8d5a7585918e.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_738a9c5a2b91.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_db62d55bdf6a.gif\" width=\"200\">\u003Cbr\u002F>\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_f3a6f990ac75.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_3dd1b2b4b9ec.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_7d15d7905c19.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_ce25c6642bbb.gif\" width=\"200\">\u003Cbr\u002F>\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_799b46913b4c.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_2d52128e2036.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_1de19ff8ddec.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_aec94825bc07.gif\" width=\"200\">\u003Cbr\u002F>\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_49d0de8a2463.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_97a4e3741889.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_34f9d66ba82e.gif\" width=\"200\">\u003Cbr\u002F>\n  \u003Ci>Sample results\u003C\u002Fi>\n\u003C\u002Fp>\n\n## Motivation\n\nBefore I implemented this project, there are several repositories reproducing the paper's result quite well, in different common deep learning frameworks such as Tensorflow, Keras and Pytorch. In my opinion, most of them are great. However, they seem to be overly complicated in many parts including image's pre-processing, environtment setup and weight initialization, which distracts user's attention from more important matters. Therefore, I decide to write a cleaner code, which simplifies unimportant parts, while still follows the paper strictly. As you could see, with minimal setup and simple network's initialization, as long as you implement the algorithm correctly, an agent will teach itself how to interact with environment and gradually find out the way to reach the final goal.\n\n## Explanation in layman's term\nIf you are already familiar to reinforcement learning in general and A3C in particular, you could skip this part. I write this part for explaining what is A3C algorithm, how and why it works, to people who are interested in or curious about A3C or my implementation, but do not understand the mechanism behind. Therefore, you do not need any prerequiste knowledge for reading this part :relaxed:\n\nIf you search on the internet, there are numerous article introducing or explaining A3C, some even provide sample code. However, I would like to take another approach: Break down the name **Asynchronous Actor-Critic Agents** into smaller parts and explain in an aggregated manner.\n\n### Actor-Critic\nYour agent has 2 parts called **actor** and **critic**, and its goal is to make both parts perfom better over time by exploring and exploiting the environment. Let imagine a small mischievous child (**actor**) is discovering the amazing world around him, while his dad (**critic**) oversees him, to make sure that he does not do anything dangerous. Whenever the kid does anything good, his dad will praise and encourage him to repeat that action in the future. And of course, when the kid does anything harmful, he will get warning from his dad. The more the kid interacts to the world, and takes different actions, the more feedback, both positive and negative, he gets from his dad. The goal of the kid is, to collect as many positive feedback as possible from his dad, while the goal of the dad is to evaluate his son's action better. In other word, we have a win-win relationship between the kid and his dad, or equivalently between **actor** and **critic**.\n\n### Advantage Actor-Critic\nTo make the kid learn faster, and more stable, the dad, instead of telling his son how good his action is, will tell him how better or worse his action in compared to other actions (or **a \"virtual\" average action**). An example is worth a thousand words. Let's compare 2 pairs of dad and son. The first dad gives his son 10 candies for grade 10 and 1 candy for grade 1 in school. The second dad, on the other hand, gives his son 5 candies for grade 10, and \"punishes\" his son by not allowing him to watch his favorite TV series for a day when he gets grade 1. How do you think? The second dad seems to be a little bit smarter, right? Indeed, you could rarely prevent bad actions, if you still \"encourage\" them with small reward.\n\n### Asynchronous Advantage Actor-Critic\nIf an agent discovers environment alone, the learning process would be slow. More seriously, the agent could be possibly bias to a particular suboptimal solution, which is undesirable. What happen if you have a bunch of agents which simultaneously discover different part of the environment and update their new obtained knowledge to one another periodically? It is exactly the idea of **Asynchronous Advantage Actor-Critic**. Now the kid and his mates in kindergarten have a trip to a beautiful beach (with their teacher, of course). Their task is to build a great sand castle. Different child will build different parts of the castle, supervised by the teacher. Each of them will have different task, with the same final goal is a strong and eye-catching castle. Certainly, the role of the teacher now is the same as the dad in previous example. The only difference is that the former is busier :sweat_smile:\n\n## How to use my code\n\nWith my code, you can:\n* **Train your model** by running **python train.py**\n* **Test your trained model** by running **python test.py**\n\n## Trained models\n\nYou could find some trained models I have trained in [Super Mario Bros A3C trained models](https:\u002F\u002Fdrive.google.com\u002Fopen?id=1itDw9sXPiY7xC4u72RIfO5EdoVs0msLL)\n \n## Requirements\n\n* **python 3.6**\n* **gym**\n* **cv2**\n* **pytorch** \n* **numpy**\n\n## Acknowledgements\nAt the beginning, I could only train my agent to complete 9 stages. Then @davincibj pointed out that 19 stages could be completed and sent me the trained weights. Thank you a lot for the finding!\n","# [PYTORCH] 用于玩超级马里奥兄弟的异步优势演员-评论家（A3C）\n\n## 引言\n\n以下是用于训练智能体玩超级马里奥兄弟游戏的 Python 源代码。该代码采用了论文《深度强化学习中的异步方法》[论文](https:\u002F\u002Farxiv.org\u002Fabs\u002F1602.01783) 中提出的异步优势演员-评论家（A3C）算法。\n\u003Cp align=\"center\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_683fcabc6dee.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_e390b4669ec5.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_7c81348418df.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_f1ffcd1c071c.gif\" width=\"200\">\u003Cbr\u002F>\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_66ac284830eb.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_8d5a7585918e.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_738a9c5a2b91.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_db62d55bdf6a.gif\" width=\"200\">\u003Cbr\u002F>\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_f3a6f990ac75.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_3dd1b2b4b9ec.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_7d15d7905c19.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_ce25c6642bbb.gif\" width=\"200\">\u003Cbr\u002F>\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_799b46913b4c.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_2d52128e2036.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_1de19ff8ddec.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_aec94825bc07.gif\" width=\"200\">\u003Cbr\u002F>\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_49d0de8a2463.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_97a4e3741889.gif\" width=\"200\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_readme_34f9d66ba82e.gif\" width=\"200\">\u003Cbr\u002F>\n  \u003Ci>示例结果\u003C\u002Fi>\n\u003C\u002Fp>\n\n## 动机\n\n在我实现这个项目之前，已经有一些仓库在不同的主流深度学习框架（如 TensorFlow、Keras 和 PyTorch）中很好地复现了该论文的结果。在我看来，这些实现大多非常出色。然而，它们在许多方面显得过于复杂，包括图像预处理、环境搭建和权重初始化等，这会分散用户对更重要问题的关注。因此，我决定编写一段更简洁的代码，在严格遵循原论文的同时，简化那些不重要的部分。正如你所看到的，只需最少的设置和简单的网络初始化，只要正确实现了算法，智能体就能自行学习如何与环境交互，并逐步找到达成最终目标的方法。\n\n## 通俗解释\n如果你已经熟悉强化学习以及 A3C 算法，可以跳过这一部分。我写这部分是为了向那些对 A3C 或我的实现感兴趣或好奇，但不了解其背后机制的人解释什么是 A3C 算法、它如何工作以及为什么有效。因此，阅读这部分内容不需要任何先验知识 :relaxed:\n\n在网上搜索，你会发现大量介绍或解释 A3C 的文章，有些甚至提供了示例代码。然而，我想采用另一种方式：将“异步演员-评论家”这个名字拆解成更小的部分，然后以一种整合的方式进行说明。\n\n### 演员-评论家\n你的智能体包含两个部分，分别称为“演员”和“评论家”，其目标是通过探索和利用环境，使这两部分随着时间的推移表现得越来越好。想象一下，一个调皮的小孩（演员）正在探索周围奇妙的世界，而他的爸爸（评论家）则在一旁监督他，确保他不会做出危险的事情。每当孩子做了好事时，爸爸就会表扬并鼓励他以后继续这样做。当然，如果孩子做了坏事，爸爸就会警告他。随着孩子与世界互动得越多，尝试的动作越多，他从爸爸那里得到的反馈也就越多，既有积极的，也有消极的。孩子的目标是尽可能多地从爸爸那里获得正面反馈，而爸爸的目标则是更好地评估儿子的行为。换句话说，孩子和爸爸之间是一种双赢的关系，这也相当于“演员”和“评论家”之间的关系。\n\n### 利益演员-评论家\n为了让小孩学得更快、更稳定，爸爸不再直接告诉儿子他的行为有多好，而是会告诉他，与其他行为相比，他的行为是更好还是更差（或者说与“虚拟平均行为”相比）。举个例子胜过千言万语。我们来比较两对父子。第一对父亲给孩子在学校得了 10 分就奖励 10 颗糖果，而得了 1 分则只给 1 颗糖果。第二对父亲则相反，孩子得了 10 分才给 5 颗糖果，但如果得了 1 分，就会惩罚他一天不能看最喜欢的电视节目。你觉得哪一对父亲的做法更明智呢？显然，第二对父亲似乎更聪明一些，不是吗？的确，如果你仍然用少量奖励来“鼓励”不良行为，就很难真正阻止它们。\n\n### 异步利益演员-评论家\n如果智能体独自探索环境，学习过程将会很慢。更严重的是，智能体可能会陷入某种次优解，这是不可取的。那么，如果有多个智能体同时探索环境的不同部分，并定期将自己的新知识分享给彼此，会发生什么呢？这正是“异步利益演员-评论家”的核心思想。现在，幼儿园的小朋友们和老师一起去美丽的海滩游玩。他们的任务是建造一座漂亮的沙堡。每个小朋友负责建造沙堡的不同部分，由老师监督。每个人都有不同的任务，但最终目标都是建造一座坚固又引人注目的沙堡。当然，老师在这里的角色与前面例子中的爸爸是一样的。唯一的区别是，现在的老师要忙得多 :sweat_smile:\n\n## 如何使用我的代码\n使用我的代码，你可以：\n* **训练模型**：运行 `python train.py`\n* **测试训练好的模型**：运行 `python test.py`\n\n## 训练好的模型\n你可以在 [Super Mario Bros A3C 训练模型](https:\u002F\u002Fdrive.google.com\u002Fopen?id=1itDw9sXPiY7xC4u72RIfO5EdoVs0msLL) 中找到我训练的一些模型。\n\n## 要求\n* **Python 3.6**\n* **gym**\n* **cv2**\n* **PyTorch**\n* **NumPy**\n\n## 致谢\n最初，我只能训练智能体完成 9 个关卡。后来 @davincibj 指出可以完成 19 个关卡，并将训练好的权重发给了我。非常感谢他的发现！","# Super-mario-bros-A3C-pytorch 快速上手指南\n\n## 环境准备\n\n### 系统要求\n- Python 3.6 或更高版本\n- 支持 GPU 的环境（可选，但推荐以加快训练速度）\n\n### 前置依赖\n- gym\n- opencv-python (cv2)\n- pytorch\n- numpy\n\n> 推荐使用国内镜像源安装以提高速度：\n```bash\npip install -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple gym opencv-python pytorch numpy\n```\n\n## 安装步骤\n\n克隆项目到本地：\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Fyour-repo\u002FSuper-mario-bros-A3C-pytorch.git\ncd Super-mario-bros-A3C-pytorch\n```\n\n## 基本使用\n\n### 训练模型\n运行以下命令开始训练：\n```bash\npython train.py\n```\n\n### 测试模型\n在训练完成后，使用以下命令测试已训练的模型：\n```bash\npython test.py\n```\n\n> 注意：训练和测试需要确保环境配置正确，并且依赖库已成功安装。","一位游戏AI开发工程师正在尝试训练一个智能体玩《超级马里奥兄弟》，以探索强化学习在经典游戏中的应用。他需要一个高效、易用的框架来实现这一目标。\n\n### 没有 Super-mario-bros-A3C-pytorch 时  \n- 需要从零开始搭建强化学习环境，包括游戏模拟器、状态观测和奖励机制，耗时且容易出错  \n- 实现A3C算法需要深入理解论文细节，代码复杂度高，调试困难  \n- 缺乏清晰的示例和文档，导致学习曲线陡峭  \n- 训练过程不稳定，智能体难以有效学习游戏规则和策略  \n\n### 使用 Super-mario-bros-A3C-pytorch 后  \n- 提供开箱即用的游戏环境和预处理模块，节省大量配置时间  \n- 算法实现结构清晰，代码简洁，便于理解和调试  \n- 包含详细注释和示例，降低入门门槛，提升开发效率  \n- 训练过程更稳定，智能体能快速掌握游戏机制并完成关卡  \n\nSuper-mario-bros-A3C-pytorch 通过简化流程、提高稳定性，显著提升了强化学习在经典游戏中的应用效率与效果。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvietnh1009_Super-mario-bros-A3C-pytorch_683fcabc.gif","vietnh1009","Viet Nguyen","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fvietnh1009_265c45cc.jpg","Senior AI engineer at Sporttotal","SPORTTOTAL.TV","Berlin, Germany",null,"https:\u002F\u002Fgithub.com\u002Fvietnh1009",[84],{"name":85,"color":86,"percentage":87},"Python","#3572A5",100,1108,234,"2026-03-27T18:31:54","MIT","Linux, macOS, Windows","需要 NVIDIA GPU，显存 8GB+，CUDA 11.7+","未说明",{"notes":96,"python":97,"dependencies":98},"建议使用 conda 管理环境，首次运行需下载约 5GB 模型文件","3.6",[99,100,101,102],"gym","cv2","pytorch","numpy",[13,15,14],[105,106,101,99,107,108,109,110,111],"reinforcement-learning","a3c","python","deep-learning","super-mario-bros","mario","ai","2026-03-27T02:49:30.150509","2026-04-06T10:27:07.084303",[115,120,125,130,135,140],{"id":116,"question_zh":117,"answer_zh":118,"source_url":119},5335,"训练一个稳定的 A3C 策略需要多长时间？","根据维护者的回复，每个关卡的训练时间大约需要 6-12 小时。","https:\u002F\u002Fgithub.com\u002Fvietnh1009\u002FSuper-mario-bros-A3C-pytorch\u002Fissues\u002F7",{"id":121,"question_zh":122,"answer_zh":123,"source_url":124},5336,"运行 test.py 时出现 'Monitor' 对象没有 'pipe' 属性的错误如何解决？","该错误是由于未安装 ffmpeg 导致的。可以通过运行 `sudo apt-get install ffmpeg` 安装 ffmpeg 来解决。","https:\u002F\u002Fgithub.com\u002Fvietnh1009\u002FSuper-mario-bros-A3C-pytorch\u002Fissues\u002F5",{"id":126,"question_zh":127,"answer_zh":128,"source_url":129},5337,"无法找到 gym_super_mario_bros 环境怎么办？","可以访问 [gym-super-mario-bros](https:\u002F\u002Fgithub.com\u002FKautenja\u002Fgym-super-mario-bros) 获取该环境。","https:\u002F\u002Fgithub.com\u002Fvietnh1009\u002FSuper-mario-bros-A3C-pytorch\u002Fissues\u002F4",{"id":131,"question_zh":132,"answer_zh":133,"source_url":134},5338,"如何解决 ImportError: cannot import name 'BinarySpaceToDiscreteSpaceEnv' 的问题？","请将导入语句从 `from nes_py.wrappers import BinarySpaceToDiscreteSpaceEnv` 改为 `from nes_py.wrappers import JoypadSpace`，并相应修改代码。","https:\u002F\u002Fgithub.com\u002Fvietnh1009\u002FSuper-mario-bros-A3C-pytorch\u002Fissues\u002F3",{"id":136,"question_zh":137,"answer_zh":138,"source_url":139},5339,"在测试阶段，AI 无法通过第一个障碍物怎么办？","测试阶段使用了 `argmax` 方法选择动作，而训练阶段使用了 `Categorical` 方法。建议在测试阶段也使用 `Categorical` 方法来提高表现。","https:\u002F\u002Fgithub.com\u002Fvietnh1009\u002FSuper-mario-bros-A3C-pytorch\u002Fissues\u002F26",{"id":141,"question_zh":142,"answer_zh":143,"source_url":144},5340,"如何设置训练参数以让 AI 顺利通过关卡？","可以参考以下参数：--gamma 0.9, --tau 1.0, --beta 0.01, --num_local_steps 50, --num_global_steps 5e6。但具体参数可能需要根据实际情况调整。","https:\u002F\u002Fgithub.com\u002Fvietnh1009\u002FSuper-mario-bros-A3C-pytorch\u002Fissues\u002F24",[]]