[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-chauncygu--Safe-Reinforcement-Learning-Baselines":3,"tool-chauncygu--Safe-Reinforcement-Learning-Baselines":65},[4,23,32,40,49,57],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":22},2268,"ML-For-Beginners","microsoft\u002FML-For-Beginners","ML-For-Beginners 是由微软推出的一套系统化机器学习入门课程，旨在帮助零基础用户轻松掌握经典机器学习知识。这套课程将学习路径规划为 12 周，包含 26 节精炼课程和 52 道配套测验，内容涵盖从基础概念到实际应用的完整流程，有效解决了初学者面对庞大知识体系时无从下手、缺乏结构化指导的痛点。\n\n无论是希望转型的开发者、需要补充算法背景的研究人员，还是对人工智能充满好奇的普通爱好者，都能从中受益。课程不仅提供了清晰的理论讲解，还强调动手实践，让用户在循序渐进中建立扎实的技能基础。其独特的亮点在于强大的多语言支持，通过自动化机制提供了包括简体中文在内的 50 多种语言版本，极大地降低了全球不同背景用户的学习门槛。此外，项目采用开源协作模式，社区活跃且内容持续更新，确保学习者能获取前沿且准确的技术资讯。如果你正寻找一条清晰、友好且专业的机器学习入门之路，ML-For-Beginners 将是理想的起点。",85092,2,"2026-04-10T11:13:16",[13,14,15,16,17,18,19,20,21],"图像","数据工具","视频","插件","Agent","其他","语言模型","开发框架","音频","ready",{"id":24,"name":25,"github_repo":26,"description_zh":27,"stars":28,"difficulty_score":29,"last_commit_at":30,"category_tags":31,"status":22},5784,"funNLP","fighting41love\u002FfunNLP","funNLP 是一个专为中文自然语言处理（NLP）打造的超级资源库，被誉为\"NLP 民工的乐园”。它并非单一的软件工具，而是一个汇集了海量开源项目、数据集、预训练模型和实用代码的综合性平台。\n\n面对中文 NLP 领域资源分散、入门门槛高以及特定场景数据匮乏的痛点，funNLP 提供了“一站式”解决方案。这里不仅涵盖了分词、命名实体识别、情感分析、文本摘要等基础任务的标准工具，还独特地收录了丰富的垂直领域资源，如法律、医疗、金融行业的专用词库与数据集，甚至包含古诗词生成、歌词创作等趣味应用。其核心亮点在于极高的全面性与实用性，从基础的字典词典到前沿的 BERT、GPT-2 模型代码，再到高质量的标注数据和竞赛方案，应有尽有。\n\n无论是刚刚踏入 NLP 领域的学生、需要快速验证想法的算法工程师，还是从事人工智能研究的学者，都能在这里找到急需的“武器弹药”。对于开发者而言，它能大幅减少寻找数据和复现模型的时间；对于研究者，它提供了丰富的基准测试资源和前沿技术参考。funNLP 以开放共享的精神，极大地降低了中文自然语言处理的开发与研究成本，是中文 AI 社区不可或缺的宝藏仓库。",79857,1,"2026-04-08T20:11:31",[19,14,18],{"id":33,"name":34,"github_repo":35,"description_zh":36,"stars":37,"difficulty_score":29,"last_commit_at":38,"category_tags":39,"status":22},5773,"cs-video-courses","Developer-Y\u002Fcs-video-courses","cs-video-courses 是一个精心整理的计算机科学视频课程清单，旨在为自学者提供系统化的学习路径。它汇集了全球知名高校（如加州大学伯克利分校、新南威尔士大学等）的完整课程录像，涵盖从编程基础、数据结构与算法，到操作系统、分布式系统、数据库等核心领域，并深入延伸至人工智能、机器学习、量子计算及区块链等前沿方向。\n\n面对网络上零散且质量参差不齐的教学资源，cs-video-courses 解决了学习者难以找到成体系、高难度大学级别课程的痛点。该项目严格筛选内容，仅收录真正的大学层级课程，排除了碎片化的简短教程或商业广告，确保用户能接触到严谨的学术内容。\n\n这份清单特别适合希望夯实计算机基础的开发者、需要补充特定领域知识的研究人员，以及渴望像在校生一样系统学习计算机科学的自学者。其独特的技术亮点在于分类极其详尽，不仅包含传统的软件工程与网络安全，还细分了生成式 AI、大语言模型、计算生物学等新兴学科，并直接链接至官方视频播放列表，让用户能一站式获取高质量的教育资源，免费享受世界顶尖大学的课堂体验。",79792,"2026-04-08T22:03:59",[18,13,14,20],{"id":41,"name":42,"github_repo":43,"description_zh":44,"stars":45,"difficulty_score":46,"last_commit_at":47,"category_tags":48,"status":22},3128,"ragflow","infiniflow\u002Fragflow","RAGFlow 是一款领先的开源检索增强生成（RAG）引擎，旨在为大语言模型构建更精准、可靠的上下文层。它巧妙地将前沿的 RAG 技术与智能体（Agent）能力相结合，不仅支持从各类文档中高效提取知识，还能让模型基于这些知识进行逻辑推理和任务执行。\n\n在大模型应用中，幻觉问题和知识滞后是常见痛点。RAGFlow 通过深度解析复杂文档结构（如表格、图表及混合排版），显著提升了信息检索的准确度，从而有效减少模型“胡编乱造”的现象，确保回答既有据可依又具备时效性。其内置的智能体机制更进一步，使系统不仅能回答问题，还能自主规划步骤解决复杂问题。\n\n这款工具特别适合开发者、企业技术团队以及 AI 研究人员使用。无论是希望快速搭建私有知识库问答系统，还是致力于探索大模型在垂直领域落地的创新者，都能从中受益。RAGFlow 提供了可视化的工作流编排界面和灵活的 API 接口，既降低了非算法背景用户的上手门槛，也满足了专业开发者对系统深度定制的需求。作为基于 Apache 2.0 协议开源的项目，它正成为连接通用大模型与行业专有知识之间的重要桥梁。",77062,3,"2026-04-04T04:44:48",[17,13,20,19,18],{"id":50,"name":51,"github_repo":52,"description_zh":53,"stars":54,"difficulty_score":46,"last_commit_at":55,"category_tags":56,"status":22},519,"PaddleOCR","PaddlePaddle\u002FPaddleOCR","PaddleOCR 是一款基于百度飞桨框架开发的高性能开源光学字符识别工具包。它的核心能力是将图片、PDF 等文档中的文字提取出来，转换成计算机可读取的结构化数据，让机器真正“看懂”图文内容。\n\n面对海量纸质或电子文档，PaddleOCR 解决了人工录入效率低、数字化成本高的问题。尤其在人工智能领域，它扮演着连接图像与大型语言模型（LLM）的桥梁角色，能将视觉信息直接转化为文本输入，助力智能问答、文档分析等应用场景落地。\n\nPaddleOCR 适合开发者、算法研究人员以及有文档自动化需求的普通用户。其技术优势十分明显：不仅支持全球 100 多种语言的识别，还能在 Windows、Linux、macOS 等多个系统上运行，并灵活适配 CPU、GPU、NPU 等各类硬件。作为一个轻量级且社区活跃的开源项目，PaddleOCR 既能满足快速集成的需求，也能支撑前沿的视觉语言研究，是处理文字识别任务的理想选择。",75644,"2026-04-15T10:48:40",[19,13,20,18],{"id":58,"name":59,"github_repo":60,"description_zh":61,"stars":62,"difficulty_score":29,"last_commit_at":63,"category_tags":64,"status":22},3215,"awesome-machine-learning","josephmisiti\u002Fawesome-machine-learning","awesome-machine-learning 是一份精心整理的机器学习资源清单，汇集了全球优秀的机器学习框架、库和软件工具。面对机器学习领域技术迭代快、资源分散且难以甄选的痛点，这份清单按编程语言（如 Python、C++、Go 等）和应用场景（如计算机视觉、自然语言处理、深度学习等）进行了系统化分类，帮助使用者快速定位高质量项目。\n\n它特别适合开发者、数据科学家及研究人员使用。无论是初学者寻找入门库，还是资深工程师对比不同语言的技术选型，都能从中获得极具价值的参考。此外，清单还延伸提供了免费书籍、在线课程、行业会议、技术博客及线下聚会等丰富资源，构建了从学习到实践的全链路支持体系。\n\n其独特亮点在于严格的维护标准：明确标记已停止维护或长期未更新的项目，确保推荐内容的时效性与可靠性。作为机器学习领域的“导航图”，awesome-machine-learning 以开源协作的方式持续更新，旨在降低技术探索门槛，让每一位从业者都能高效地站在巨人的肩膀上创新。",72149,"2026-04-03T21:50:24",[20,18],{"id":66,"github_repo":67,"name":68,"description_en":69,"description_zh":70,"ai_summary_zh":71,"readme_en":72,"readme_zh":73,"quickstart_zh":74,"use_case_zh":75,"hero_image_url":76,"owner_login":77,"owner_name":78,"owner_avatar_url":79,"owner_bio":80,"owner_company":81,"owner_location":82,"owner_email":81,"owner_twitter":81,"owner_website":83,"owner_url":84,"languages":85,"stars":124,"forks":125,"last_commit_at":126,"license":81,"difficulty_score":127,"env_os":128,"env_gpu":129,"env_ram":129,"env_deps":130,"category_tags":133,"github_topics":134,"view_count":10,"oss_zip_url":81,"oss_zip_packed_at":81,"status":22,"created_at":141,"updated_at":142,"faqs":143,"releases":144},7802,"chauncygu\u002FSafe-Reinforcement-Learning-Baselines","Safe-Reinforcement-Learning-Baselines","The repository is for safe reinforcement learning baselines.","Safe-Reinforcement-Learning-Baselines 是一个专注于安全强化学习（Safe RL）研究的开源资源库，旨在为该领域提供全面的基准算法与测试环境。在传统的强化学习中，智能体往往只追求奖励最大化而忽视潜在风险，可能导致危险行为。此项目通过整合单智能体与多智能体场景下的多种安全算法及评估基准，帮助研究者在满足安全约束的前提下优化决策策略，有效解决了 AI 在复杂环境中“如何安全地学习”这一核心难题。\n\n该项目不仅系统梳理了从早期理论到最新进展的经典论文，还详细列出了如 Safety-Gymnasium、Safe Multi-Agent Mujoco 等主流仿真测试环境，部分条目甚至标注了代码可用性状态，极大地降低了复现与研究门槛。其独特的亮点在于涵盖了单智能体与多智能体双重维度，并持续更新社区贡献的最新成果，形成了一个动态生长的学术知识库。\n\nSafe-Reinforcement-Learning-Baselines 特别适合人工智能领域的研究人员、算法工程师以及高校师生使用。无论是希望快速了解安全强化学习发展脉络的初学者，还是正在寻找对比基线以验证新算法的","Safe-Reinforcement-Learning-Baselines 是一个专注于安全强化学习（Safe RL）研究的开源资源库，旨在为该领域提供全面的基准算法与测试环境。在传统的强化学习中，智能体往往只追求奖励最大化而忽视潜在风险，可能导致危险行为。此项目通过整合单智能体与多智能体场景下的多种安全算法及评估基准，帮助研究者在满足安全约束的前提下优化决策策略，有效解决了 AI 在复杂环境中“如何安全地学习”这一核心难题。\n\n该项目不仅系统梳理了从早期理论到最新进展的经典论文，还详细列出了如 Safety-Gymnasium、Safe Multi-Agent Mujoco 等主流仿真测试环境，部分条目甚至标注了代码可用性状态，极大地降低了复现与研究门槛。其独特的亮点在于涵盖了单智能体与多智能体双重维度，并持续更新社区贡献的最新成果，形成了一个动态生长的学术知识库。\n\nSafe-Reinforcement-Learning-Baselines 特别适合人工智能领域的研究人员、算法工程师以及高校师生使用。无论是希望快速了解安全强化学习发展脉络的初学者，还是正在寻找对比基线以验证新算法的资深开发者，都能从中获得宝贵的参考资源。欢迎社区成员共同参与维护，通过提交议题或合并请求来丰富这份清单，共同推动安全 AI 技术的发展。","## Safe-Reinforcement-Learning-Baselines\n\n\n\n\n\nThe repository is for Safe Reinforcement Learning (RL) research, in which we investigate various safe RL baselines and safe RL benchmarks, including single agent RL and multi-agent RL. If any authors do not want their paper to be listed here, please feel free to contact \u003Cgshangd[AT]foxmail.com>. (This repository is under actively development. We appreciate any constructive comments and suggestions)\n\n\nYou are more than welcome to update this list! If you find a paper about Safe RL which is not listed here, please\n\n- fork this repository, add it and merge back;\n- or report an issue here;\n- or email \u003Cgshangd[AT]foxmail.com>.\n\n\n\n***\nThe README is organized as follows:\n- [Safe-Reinforcement-Learning-Baselines](#safe-reinforcement-learning-baselines)\n  * [1. Environments Supported](#1-environments-supported)\n    + [1.1. Safe Single Agent RL benchmarks](#11-safe-single-agent-rl-benchmarks)\n    + [1.2. Safe Multi-Agent RL benchmarks](#12-safe-multi-agent-rl-benchmarks)\n  * [2. Safe RL Baselines](#2-safe-rl-baselines)\n    + [2.1. Safe Single Agent RL Baselines](#21-safe-single-agent-rl-baselines)\n    + [2.2. Safe Multi-Agent RL Baselines](#22-safe-multi-agent-rl-baselines)\n  * [3. Surveys](#3-surveys)\n  * [4. Theses](#4-theses)\n  * [5. Book](#5-book)\n  * [6. Tutorials](#6-tutorials)\n  * [7. Exercise](#7-exercise)\n- [Publication](#publication)\n\n***\n\n\n\n### 1. Environments Supported\n#### 1.1. Safe Single Agent RL benchmarks\n- [AI Safety Gridworlds](https:\u002F\u002Fgithub.com\u002Fdeepmind\u002Fai-safety-gridworlds)\n- [Safety-Gym](https:\u002F\u002Fgithub.com\u002Fopenai\u002Fsafety-gym)\n- [Safety-Gymnasium](https:\u002F\u002Fgithub.com\u002FPKU-Alignment\u002Fsafety-gymnasium)\n\n#### 1.2. Safe Multi-Agent RL benchmarks\n- [Safe Multi-Agent Mujoco](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Multi-Agent-Mujoco)\n- [Safe Multi-Agent Isaac Gym](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Multi-Agent-Isaac-Gym)\n- [Safe Multi-Agent Robosuite](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Multi-Agent-Robosuite)\n\n\n\n### 2. Safe RL Baselines\n\n#### 2.1. Safe Single Agent RL Baselines\n\n- Consideration of risk in reinforcement learning, [Paper](https:\u002F\u002Fciteseerx.ist.psu.edu\u002Fviewdoc\u002Fdownload?doi=10.1.1.45.8264&rep=rep1&type=pdf), Not Find Code, (Accepted by ICML 1994)\n- Multi-criteria Reinforcement Learning,  [Paper](https:\u002F\u002Fciteseerx.ist.psu.edu\u002Fviewdoc\u002Fdownload?doi=10.1.1.232.962&rep=rep1&type=pdf), Not Find Code, (Accepted by ICML 1998)\n- Lyapunov design for safe reinforcement learning, [Paper](https:\u002F\u002Fwww.jmlr.org\u002Fpapers\u002Fvolume3\u002Fperkins02a\u002Fperkins02a.pdf), Not Find Code, (Accepted by ICML 2002)\n- Risk-sensitive reinforcement learning, [Paper](https:\u002F\u002Flink.springer.com\u002Fcontent\u002Fpdf\u002F10.1023\u002FA:1017940631555.pdf), Not Find Code, (Accepted by Machine Learning, 2002)\n- Risk-Sensitive Reinforcement Learning Applied to Control under Constraints, [Paper](https:\u002F\u002Fwww.jair.org\u002Findex.php\u002Fjair\u002Farticle\u002Fview\u002F10415\u002F24966), Not Find Code, (Accepted by Journal of Artificial Intelligence Research, 2005)\n- An actor-critic algorithm for constrained markov decision processes, [Paper](https:\u002F\u002Freader.elsevier.com\u002Freader\u002Fsd\u002Fpii\u002FS0167691104001276?token=D2FDE94E441EB4182DF4CF382458FCA57BDCABECB2E17932BF52CABA7F46F0F67EE5E9A4BE19F9FD3E27D4099CA25C80&originRegion=eu-west-1&originCreation=20220304073259), Not Find Code, (Accepted by Systems & Control Letters, 2005)\n- Reinforcement learning for MDPs with constraints, [Paper](https:\u002F\u002Flink.springer.com\u002Fcontent\u002Fpdf\u002F10.1007\u002F11871842_63.pdf), Not Find Code, (Accepted by European Conference on Machine Learning 2006)\n- Discounted Markov decision processes with utility constraints, [Paper](http:\u002F\u002Fciteseerx.ist.psu.edu\u002Fviewdoc\u002Fdownload?doi=10.1.1.140.1315&rep=rep1&type=pdf), Not Find Code, (Accepted by Computers & Mathematics with Applications, 2006)\n- Constrained reinforcement learning from intrinsic and extrinsic rewards, [Paper](http:\u002F\u002Fciteseerx.ist.psu.edu\u002Fviewdoc\u002Fdownload?doi=10.1.1.1059.1383&rep=rep1&type=pdf), Not Find Code, (Accepted by International Conference on Development and Learning 2007)\n- Safe exploration for reinforcement learning, [Paper](https:\u002F\u002Fciteseerx.ist.psu.edu\u002Fviewdoc\u002Fdownload?doi=10.1.1.161.2786&rep=rep1&type=pdf), Not Find Code, (Accepted by ESANN 2008)\n- Percentile optimization for Markov decision processes with parameter uncertainty, [Paper](http:\u002F\u002Fciteseerx.ist.psu.edu\u002Fviewdoc\u002Fdownload?doi=10.1.1.400.5048&rep=rep1&type=pdf), Not Find Code, (Accepted by Operations research, 2010)\n- Probabilistic goal Markov decision processes, [Paper](http:\u002F\u002Fciteseerx.ist.psu.edu\u002Fviewdoc\u002Fdownload?doi=10.1.1.208.4804&rep=rep1&type=pdf), Not Find Code, (Accepted by IJCAI 2011)\n- Safe reinforcement learning in high-risk tasks through policy improvement, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fstamp\u002Fstamp.jsp?tp=&arnumber=5967356), Not Find Code, (Accepted by IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL) 2011) \n- Safe Exploration in Markov Decision Processes, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1205.4810.pdf), Not Find Code, (Accepted by ICML 2012)\n- Policy gradients with variance related risk criteria, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1206.6404.pdf), Not Find Code, (Accepted by ICML 2012)\n- Risk aversion in Markov decision processes via near optimal Chernoff bounds, [Paper](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2012\u002Ffile\u002Fe2f374c3418c50bc30d67d5f7454a5b4-Paper.pdf), Not Find Code, (Accepted by NeurIPS 2012)\n- Safe Exploration of State and Action Spaces in Reinforcement Learning, [Paper](https:\u002F\u002Fweb.archive.org\u002Fweb\u002F20180423223542id_\u002Fhttp:\u002F\u002Fwww.jair.org\u002Fmedia\u002F3761\u002Flive-3761-6687-jair.pdf), Not Find Code, (Accepted by Journal of Artificial Intelligence Research, 2012)\n- An Online Actor–Critic Algorithm with Function Approximation for Constrained Markov Decision Processes, [Paper](https:\u002F\u002Flink.springer.com\u002Fcontent\u002Fpdf\u002F10.1007\u002Fs10957-012-9989-5.pdf), Not Find Code, (Accepted by Journal of Optimization Theory and Applications, 2012)\n- Safe policy iteration, [Paper](http:\u002F\u002Fproceedings.mlr.press\u002Fv28\u002Fpirotta13.pdf), Not Find Code, (Accepted by ICML 2013)\n- Reachability-based safe learning with Gaussian processes, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fstamp\u002Fstamp.jsp?tp=&arnumber=7039601), Not Find Code (Accepted by IEEE CDC 2014)\n- Safe Policy Search for Lifelong Reinforcement Learning with Sublinear Regret, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1505.05798.pdf), Not Find Code, (Accepted by ICML 2015)\n- High-Confidence Off-Policy Evaluation, [Paper](https:\u002F\u002Fwww.ics.uci.edu\u002F~dechter\u002Fcourses\u002Fics-295\u002Fwinter-2018\u002Fpapers\u002F2015Thomas2015.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002FsafeRL) (Accepted by AAAI 2015)\n- Safe Exploration for Optimization with Gaussian Processes, [Paper](http:\u002F\u002Fproceedings.mlr.press\u002Fv37\u002Fsui15.pdf), Not Find Code (Accepted by ICML 2015)\n- Safe Exploration in Finite Markov Decision Processes with Gaussian Processes, [Paper](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2016\u002Ffile\u002F9a49a25d845a483fae4be7e341368e36-Paper.pdf), Not Find Code (Accepted by NeurIPS 2016)\n- Safe and efficient off-policy reinforcement learning, [Paper](https:\u002F\u002Fwww.researchgate.net\u002Fprofile\u002FAnna-Harutyunyan-3\u002Fpublication\u002F303859091_Safe_and_Efficient_Off-Policy_Reinforcement_Learning\u002Flinks\u002F57b2e8c908aeb2cf17c73ad2\u002FSafe-and-Efficient-Off-Policy-Reinforcement-Learning.pdf), [Code](https:\u002F\u002Fgithub.com\u002FALRhub\u002FRetrace-PyTorch) (Accepted by NeurIPS 2016)\n- Safe, Multi-Agent, Reinforcement Learning for Autonomous Driving, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1610.03295.pdf?ref=https:\u002F\u002Fgithubhelp.com), Not Find Code (only Arxiv, 2016, citation 530+)\n- Safe Learning of Regions of Attraction in Uncertain, Nonlinear Systems with Gaussian Processes, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1603.04915.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002Fsafe_learning) (Accepetd by CDC 2016)\n- Safety-constrained reinforcement learning for MDPs, [Paper](https:\u002F\u002Fwww.researchgate.net\u002Fprofile\u002FNils-Jansen-2\u002Fpublication\u002F283118102_Safety-Constrained_Reinforcement_Learning_for_MDPs\u002Flinks\u002F5630d2af08aef3349c29f90f\u002FSafety-Constrained-Reinforcement-Learning-for-MDPs.pdf), Not Find Code (Accepted by InInternational Conference on Tools and Algorithms for the Construction and Analysis of Systems 2016)\n- Convex synthesis of randomized policies for controlled Markov chains with density safety upper bound constraints, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fstamp\u002Fstamp.jsp?tp=&arnumber=7526658), Not Find Code (Accepted by American Control Conference 2016)\n- Combating Deep Reinforcement Learning's Sisyphean Curse with Intrinsic Fear, [Paper](https:\u002F\u002Fopenreview.net\u002Fpdf?id=r1tHvHKge), Not Find Code (only Openreview, 2016)\n- Combating reinforcement learning's sisyphean curse with intrinsic fear, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1611.01211.pdf), Not Find Code (only Arxiv, 2016)\n- Constrained Policy Optimization (CPO), [Paper](http:\u002F\u002Fproceedings.mlr.press\u002Fv70\u002Fachiam17a\u002Fachiam17a.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002Fsafety-starter-agents) (Accepted by ICML 2017)\n- Risk-constrained reinforcement learning with percentile risk criteria, [Paper](https:\u002F\u002Fwww.jmlr.org\u002Fpapers\u002Fvolume18\u002F15-636\u002F15-636.pdf), , Not Find Code (Accepted by The Journal of Machine Learning Research, 2017)\n- Probabilistically Safe Policy Transfer, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1705.05394.pdf),  Not Find Code (Accepted by ICRA 2017) \n- Accelerated primal-dual policy optimization for safe reinforcement learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1802.06480.pdf), Not Find Code (Arxiv, 2017)\n- Stagewise safe bayesian optimization with gaussian processes, [Paper](http:\u002F\u002Fwww.yisongyue.com\u002Fpublications\u002Ficml2018_stageopt.pdf),  Not Find Code (Accepted by ICML 2018)\n- Leave no Trace: Learning to Reset for Safe and Autonomous Reinforcement Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1711.06782.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002FLeaveNoTrace) (Accepted by ICLR 2018)\n- Safe Model-based Reinforcement Learning with Stability Guarantees, [Paper](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2017\u002Ffile\u002F766ebcd59621e305170616ba3d3dac32-Paper.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002Fsafe_learning) (Accepted by NeurIPS 2018)\n- A Lyapunov-based Approach to Safe Reinforcement Learning, [Paper](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2018\u002Ffile\u002F4fe5149039b52765bde64beb9f674940-Paper.pdf), Not Find Code (Accepted by NeurIPS 2018)\n- Constrained Cross-Entropy Method for Safe Reinforcement Learning, [Paper](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2018\u002Ffile\u002F34ffeb359a192eb8174b6854643cc046-Paper.pdf), Not Find Code (Accepted by NeurIPS 2018)\n- Safe Reinforcement Learning via Formal Methods, [Paper](http:\u002F\u002Fwww.cs.cmu.edu\u002F~aplatzer\u002Fpub\u002FSafeRL.pdf), Not Find Code (Accepted by AAAI 2018)\n- Safe exploration and optimization of constrained mdps using gaussian processes, [Paper](http:\u002F\u002Fwww.yisongyue.com\u002Fpublications\u002Faaai2018_safe_mdp.pdf), Not Find Code (Accepted by AAAI 2018)\n- Safe reinforcement learning via shielding, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1708.08611.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fsafe-rl\u002Fsafe-rl-shielding) (Accepted by AAAI 2018)\n- Trial without Error: Towards Safe Reinforcement Learning via Human Intervention, [Paper](https:\u002F\u002Fwww.ifaamas.org\u002FProceedings\u002Faamas2018\u002Fpdfs\u002Fp2067.pdf), Not Find Code (Accepted by AAMAS 2018)\n- Learning-based Model Predictive Control for Safe Exploration and Reinforcement Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1906.12189.pdf), Not Find Code (Accepted by CDC 2018)\n- The Lyapunov Neural Network: Adaptive Stability Certification for Safe Learning of Dynamical Systems, [Paper](http:\u002F\u002Fproceedings.mlr.press\u002Fv87\u002Frichards18a\u002Frichards18a.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002Fsafe_learning) (Accepted by CoRL 2018)\n- OptLayer - Practical Constrained Optimization for Deep Reinforcement Learning in the Real World, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1709.07643.pdf), Not Find Code (Accepted by ICRA 2018)\n- Safe learning of quadrotor dynamics using barrier certificates, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fiel7\u002F8449910\u002F8460178\u002F08460471.pdf), Not Find Code (Accepted by ICRA 2018)\n- Safe reinforcement learning on autonomous vehicles, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1910.00399.pdf), Not Find Code (Accepted by IROS 2018)\n- Trial without error: Towards safe reinforcement learning via human intervention, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1707.05173.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fgsastry\u002Fhuman-rl) (Accepted by AAMAS 2018)\n- Safe reinforcement learning: Learning with supervision using a constraint-admissible set, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fabstract\u002Fdocument\u002F8430770), Not Find Code (Accepted by Annual American Control Conference (ACC) 2018)\n- A General Safety Framework for Learning-Based Control in Uncertain Robotic Systems, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fstamp\u002Fstamp.jsp?tp=&arnumber=8493361), Not Find Code (Accepted by IEEE Transactions on Automatic Control 2018)\n- Safe exploration algorithms for reinforcement learning controllers, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fstamp\u002Fstamp.jsp?arnumber=7842559), Not Find Code (Accepted by IEEE transactions on neural networks and learning systems 2018)\n- Verification and repair of control policies for safe reinforcement learning, [Paper](https:\u002F\u002Flink.springer.com\u002Fcontent\u002Fpdf\u002F10.1007\u002Fs10489-017-0999-8.pdf), Not Find Code (Accepted by Applied Intelligence, 2018)\n- Safe Exploration in Continuous Action Spaces, [Paper](https:\u002F\u002Fwww.researchgate.net\u002Fprofile\u002FGal-Dalal\u002Fpublication\u002F322756278_Safe_Exploration_in_Continuous_Action_Spaces\u002Flinks\u002F5a71e84faca2720bc0d940b3\u002FSafe-Exploration-in-Continuous-Action-Spaces.pdf), [Code](https:\u002F\u002Fgithub.com\u002FAgrawalAmey\u002Fsafe-explorer), (only Arxiv, 2018, citation 200+)\n- Safe exploration of nonlinear dynamical systems: A predictive safety filter for reinforcement learning, [Paper](https:\u002F\u002Fwww.researchgate.net\u002Fprofile\u002FKim-Wabersich\u002Fpublication\u002F329641554_Safe_exploration_of_nonlinear_dynamical_systems_A_predictive_safety_filter_for_reinforcement_learning\u002Flinks\u002F5ede2aab299bf1d20bd87981\u002FSafe-exploration-of-nonlinear-dynamical-systems-A-predictive-safety-filter-for-reinforcement-learning.pdf), Not Find Code (Arxiv, 2018, citation 40+)\n- Batch policy learning under constraints, [Paper](http:\u002F\u002Fproceedings.mlr.press\u002Fv97\u002Fle19a\u002Fle19a.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fclvoloshin\u002Fconstrained_batch_policy_learning) (Accepted by ICML 2019)\n- Safe Policy Improvement with Baseline Bootstrapping, [Paper](https:\u002F\u002Fwww.researchgate.net\u002Fprofile\u002FRomain-Laroche\u002Fpublication\u002F334749134_Safe_Policy_Improvement_with_Baseline_Bootstrapping\u002Flinks\u002F5d3f3b634585153e592ceeb4\u002FSafe-Policy-Improvement-with-Baseline-Bootstrapping.pdf), Not Find Code (Accepted by ICML 2019)\n- Convergent Policy Optimization for Safe Reinforcement Learning, [Paper](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2019\u002Ffile\u002Fdb29450c3f5e97f97846693611f98c15-Paper.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002FSafe_reinforcement_learning) (Accepted by NeurIPS 2019)\n- Constrained reinforcement learning has zero duality gap, [Paper](https:\u002F\u002Fwww.researchgate.net\u002Fprofile\u002FLuiz-Chamon\u002Fpublication\u002F336889860_Constrained_Reinforcement_Learning_Has_Zero_Duality_Gap\u002Flinks\u002F5ef4df204585155050726b42\u002FConstrained-Reinforcement-Learning-Has-Zero-Duality-Gap.pdf), Not Find Code (Accepted by NeurIPS 2019)\n- Reinforcement learning with convex constraints, [Paper](https:\u002F\u002Fwww.cs.princeton.edu\u002F~syoosefi\u002Fpapers\u002FNeurIPS2019.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fxkianteb\u002FApproPO) (Accepted by NeurIPS 2019)\n- Reward constrained policy optimization, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1805.11074.pdf), Not Find Code (Accepted by ICLR 2019)\n- Supervised policy update for deep reinforcement learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1805.11706.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fquanvuong\u002FSupervised_Policy_Update), (Accepted by ICLR 2019)\n- End-to-end safe reinforcement learning through barrier functions for safety-critical continuous control tasks, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1903.08792.pdf), [Code](https:\u002F\u002Fgithub.com\u002Frcheng805\u002FRL-CBF) (Accepted by AAAI 2019)\n- Lyapunov-based safe policy optimization for continuous control, [Paper](https:\u002F\u002Fopenreview.net\u002Fpdf?id=SJgUYBVLsN), Not Find Code (Accepted by ICML Workshop RL4RealLife 2019)\n- Safe reinforcement learning with model uncertainty estimates, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1810.08700.pdf), Not Find Code (Accepted by ICRA 2019)\n- Safe reinforcement learning with scene decomposition for navigating complex urban environments, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1904.11483.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002FAutomotiveSafeRL), (Accepted by IV 2019)\n- Verifiably safe off-model reinforcement learning, [Paper](https:\u002F\u002Flink.springer.com\u002Fchapter\u002F10.1007\u002F978-3-030-17462-0_28), [Code](https:\u002F\u002Fgithub.com\u002FIBM\u002Fvsrl-framework\u002Fblob\u002F42e0853bffb5efbb66cd97178aff9e10ad18c5a9\u002FREADME.md) (Accepted by  InInternational Conference on Tools and Algorithms for the Construction and Analysis of Systems 2019)\n- Probabilistic policy reuse for safe reinforcement learning, [Paper](https:\u002F\u002Fdl.acm.org\u002Fdoi\u002Fpdf\u002F10.1145\u002F3310090?casa_token=OahWDUpVTxAAAAAA:MVJd1GjD6HDpFKMxXfp9pd3KaJbG879P7qvcMS0-VDGFAR0prYuXwzN9LwI4BfkPti085CGGhsz1llY), Not Find Code, (Accepted by ACM Transactions on Autonomous and Adaptive Systems (TAAS), 2019)\n- Projected stochastic primal-dual method for constrained online learning with kernels, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fielaam\u002F78\u002F8691646\u002F8678800-aam.pdf), Not Find Code, (Accepted by IEEE Transactions on Signal Processing, 2019)\n- Resource constrained deep reinforcement learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1812.00600.pdf), Not Find Code, (Accepted by 29th International Conference on Automated Planning and Scheduling  2019)\n- Temporal logic guided safe reinforcement learning using control barrier functions, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1903.09885.pdf), Not Find Code (Arxiv, Citation 25+, 2019)\n- Safe policies for reinforcement learning via primal-dual methods, [Paper](https:\u002F\u002Fwww.researchgate.net\u002Fprofile\u002FLuiz-Chamon\u002Fpublication\u002F337438444_Safe_Policies_for_Reinforcement_Learning_via_Primal-Dual_Methods\u002Flinks\u002F5ef4df1f299bf18816e7f62c\u002FSafe-Policies-for-Reinforcement-Learning-via-Primal-Dual-Methods.pdf), Not Find Code (Arxiv, Citation 25+, 2019)\n- Value constrained model-free continuous control, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1902.04623.pdf), Not Find Code (Arxiv, Citation 35+, 2019)\n- Safe Reinforcement Learning in Constrained Markov Decision Processes (SNO-MDP), [Paper](http:\u002F\u002Fproceedings.mlr.press\u002Fv119\u002Fwachi20a\u002Fwachi20a.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002Fsafe_near_optimal_mdp) (Accepted by ICML 2020)\n- Responsive Safety in Reinforcement Learning by PID Lagrangian Methods, [Paper](http:\u002F\u002Fproceedings.mlr.press\u002Fv119\u002Fstooke20a\u002Fstooke20a.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fkeirp\u002Fglamor\u002Ftree\u002F98681a23bae9e8e5e9fbf68a0316ca2a22a27593\u002Fdependencies\u002Frlpyt\u002Frlpyt\u002Fprojects\u002Fsafe) (Accepted by ICML 2020)\n- Constrained markov decision processes via backward value functions, [Paper](http:\u002F\u002Fproceedings.mlr.press\u002Fv119\u002Fsatija20a\u002Fsatija20a.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fhercky\u002Fcmdps_via_bvf\u002Ftree\u002F69b9f51cb6410673d0aa2e5b9c980b33e5a46dda) (Accepted by ICML 2020)\n- Projection-Based Constrained Policy Optimization (PCPO), [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2010.03152.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002FPCPO) (Accepted by ICLR 2020)\n- First order constrained optimization in policy space (FOCOPS),[Paper](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2020\u002Ffile\u002Faf5d5ef24881f3c3049a7b9bfe74d58b-Paper.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fymzhang01\u002Ffocops) (Accepted by NeurIPS 2020)\n- Safe reinforcement learning via curriculum induction, [Paper](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2020\u002Ffile\u002F8df6a65941e4c9da40a4fb899de65c55-Paper.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fzuzuba\u002FCISR_NeurIPS20) (Accepted by NeurIPS 2020)\n- Constrained episodic reinforcement learning in concave-convex and knapsack settings, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2006.05051.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fmiryoosefi\u002FConRL) (Accepted by NeurIPS 2020)\n- Risk-sensitive reinforcement learning: Near-optimal risk-sample tradeoff in regret, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2006.13827.pdf), Not Find Code  (Accepted by NeurIPS 2020)\n- Upper confidence primal-dual reinforcement learning for CMDP with adversarial loss, [Paper](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper_files\u002Fpaper\u002F2020\u002Ffile\u002Fae95296e27d7f695f891cd26b4f37078-Paper.pdf), Not Find Code  (Accepted by NeurIPS 2020)\n- IPO: Interior-point Policy Optimization under Constraints, [Paper](https:\u002F\u002Fwww.researchgate.net\u002Fprofile\u002FYongshuai-Liu\u002Fpublication\u002F336735393_IPO_Interior-point_Policy_Optimization_under_Constraints\u002Flinks\u002F5e1670874585159aa4bff037\u002FIPO-Interior-point-Policy-Optimization-under-Constraints.pdf), Not Find Code (Accepted by AAAI 2020)\n- Safe reinforcement learning using robust mpc, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1906.04005.pdf), Not Find Code (IEEE Transactions on Automatic Control, 2020)\n- Safe reinforcement learning via projection on a safe set: How to achieve optimality? [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2004.00915.pdf), Not Find Code (Accepted by IFAC 2020)\n- Reinforcement learning for safety-critical control under model uncertainty, using control lyapunov functions and control barrier functions, [Paper](http:\u002F\u002Fwww.roboticsproceedings.org\u002Frss16\u002Fp088.pdf), Not Find Code (Accepted by RSS 2020)\n- Learning Transferable Domain Priors for Safe Exploration in Reinforcement Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1909.04307.pdf), [Code](https:\u002F\u002Fgithub.com\u002FGKthom\u002FPriors-for-safe-exploration), (Accepted by International Joint Conference on Neural Networks (IJCNN) 2020)\n- Safe reinforcement learning through meta-learned instincts, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2005.03233.pdf), Not Find Code (Accepted by The Conference on Artificial Life 2020)\n- Learning safe policies with cost-sensitive advantage estimation, [Paper](https:\u002F\u002Fopenreview.net\u002Fpdf?id=uVnhiRaW3J), Not Find Code (Openreview 2020)\n- Safe reinforcement learning using probabilistic shields, [Paper](https:\u002F\u002Frepository.ubn.ru.nl\u002Fbitstream\u002Fhandle\u002F2066\u002F224966\u002F224966.pdf?sequence=1), Not Find Code (2020)\n- A constrained reinforcement learning based approach for network slicing, [Paper](https:\u002F\u002Ficnp20.cs.ucr.edu\u002Fproceedings\u002Fhdrnets\u002FA%20Constrained%20Reinforcement%20Learning%20Based%20Approach%20for%20Network%20Slicing.pdf),  Not Find Code (Accepted by IEEE 28th International Conference on Network Protocols (ICNP) 2020)\n- Safe reinforcement learning: A control barrier function optimization approach, [Paper](https:\u002F\u002Fonlinelibrary.wiley.com\u002Fdoi\u002Fepdf\u002F10.1002\u002Frnc.5132), Not Find Code (Accepted by the International Journal of Robust and Nonlinear Control)\n- Exploration-exploitation in constrained mdps, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2003.02189.pdf), Not Find Code (Arxiv, 2020)\n- Safe reinforcement learning using advantage-based intervention, [Paper](http:\u002F\u002Fproceedings.mlr.press\u002Fv139\u002Fwagener21a\u002Fwagener21a.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fnolanwagener\u002Fsafe_rl) (Accepted by ICML 2021)\n- Shortest-path constrained reinforcement learning for sparse reward tasks, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2107.06405.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fsrsohn\u002Fshortest-path-rl), (Accepted by ICML 2021)\n- Density constrained reinforcement learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2106.12764.pdf), Not Find Code (Accepted by ICML 2021)\n- CRPO: A New Approach for Safe Reinforcement Learning with Convergence Guarantee, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2011.05869.pdf), Not Find Code (Accepted by ICML 2021)\n- Safe reinforcement learning with linear function approximation, [Paper](https:\u002F\u002Fproceedings.mlr.press\u002Fv139\u002Famani21a\u002Famani21a.pdf), Not Find Code (Accepted by ICML 2021)\n- Safe Reinforcement Learning by Imagining the Near Future (SMBPO), [Paper](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2021\u002Ffile\u002F73b277c11266681122132d024f53a75b-Paper.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002FSafe-MBPO) (Accepted by NeurIPS 2021) \n- Towards safe reinforcement learning with a safety editor policy, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2201.12427.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fhnyu\u002Fseditor) (Accepted by NeurIPS 2021)\n- Exponential Bellman Equation and Improved Regret Bounds for Risk-Sensitive Reinforcement Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2111.03947.pdf),  Not Find Code (Accepted by NeurIPS 2021)\n- Risk-Sensitive Reinforcement Learning: Symmetry, Asymmetry, and Risk-Sample Tradeoff, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2111.03947.pdf),  Not Find Code (Accepted by NeurIPS 2021)\n- Safe reinforcement learning with natural language constraints, [Paper](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2021\u002Ffile\u002F72f67e70f6b7cdc4cc893edaddf0c4c6-Paper.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fprinceton-nlp\u002FSRL-NLC), (Accepted by NeurIPS 2021)\n- Learning policies with zero or bounded constraint violation for constrained mdps, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2106.02684.pdf),  Not Find Code (Accepted by NeurIPS 2021)\n-  Conservative safety critics for exploration, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2010.14497.pdf), Not Find Code (Accepted by ICLR 2021)\n-  Wcsac: Worst-case soft actor critic for safety-constrained reinforcement learning, [Paper](https:\u002F\u002Fwww.st.ewi.tudelft.nl\u002Fmtjspaan\u002Fpub\u002FYang21aaai.pdf), Not Find Code (Accepted by AAAI 2021)\n-  Risk-averse trust region optimization for reward-volatility reduction, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1912.03193.pdf), Not Find Code (Accepted by IJCAI 2021)\n- AlwaysSafe: Reinforcement Learning Without Safety Constraint Violations During Training, [Paper](https:\u002F\u002Fpure.tudelft.nl\u002Fws\u002Ffiles\u002F96913978\u002Fp1226.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002FAlwaysSafe) (Accepted by AAMAS 2021)\n- Safe Continuous Control with Constrained Model-Based Policy Optimization (CMBPO), [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2104.06922.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fanyboby\u002FConstrained-Model-Based-Policy-Optimization) (Accepted by IROS 2021)\n- Context-aware safe reinforcement learning for non-stationary environments, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2101.00531.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fbaimingc\u002Fcasrl) (Accepted by ICRA 2021)\n- Model-based Constrained Reinforcement Learning using Generalized Control Barrier Function, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fstamp\u002Fstamp.jsp?arnumber=9636468), [Code](https:\u002F\u002Fgithub.com\u002Fmahaitongdae\u002Fsafe_exp_env) (Accepted by IROS 2021)\n- Robot Reinforcement Learning on the Constraint Manifold, [Paper](https:\u002F\u002Fproceedings.mlr.press\u002Fv164\u002Fliu22c\u002Fliu22c.pdf), [Code](https:\u002F\u002Fgithub.com\u002FPuzeLiu\u002Frl_on_manifold) (Accepted by CoRL 2021)\n- Provably efficient safe exploration via primal-dual policy optimization, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2003.00534.pdf), Not Find Code (Accepted by the International Conference on Artificial Intelligence and Statistics 2021)\n- Safe model-based reinforcement learning with robust cross-entropy method, [Paper](https:\u002F\u002Faisecure-workshop.github.io\u002Faml-iclr2021\u002Fpapers\u002F8.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002Fsafe-mbrl) (Accepted by ICLR 2021 Workshop on Security and Safety in Machine Learning Systems)\n- MESA: Offline Meta-RL for Safe Adaptation and Fault Tolerance, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2112.03575.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fmichaelzhiluo\u002Fmesa-safe-rl) (Accepted by Workshop on Safe and Robust Control of Uncertain Systems at NeurIPS 2021)\n- Safe Reinforcement Learning of Control-Affine Systems with Vertex Networks, [Paper](http:\u002F\u002Fproceedings.mlr.press\u002Fv144\u002Fzheng21a\u002Fzheng21a.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002Fvertex-net) (Accepted by Conference on Learning for Dynamics and Control 2021)\n- Can You Trust Your Autonomous Car? Interpretable and Verifiably Safe Reinforcement Learning, [Paper](http:\u002F\u002Fdownload.cmutschler.de\u002Fpublications\u002F2021\u002FIV2021.pdf), Not Find Code (Accepted by IV 2021)\n- Provably safe model-based meta reinforcement learning: An abstraction-based approach, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2109.01255.pdf), Not Find Code (Accepted by CDC 2021)\n- Recovery RL: Safe Reinforcement Learning with Learned Recovery Zones, [Paper](https:\u002F\u002Fwww.researchgate.net\u002Fprofile\u002FMinho-Hwang\u002Fpublication\u002F345152769_Recovery_RL_Safe_Reinforcement_Learning_with_Learned_Recovery_Zones\u002Flinks\u002F5fe37ea2299bf140883a35cb\u002FRecovery-RL-Safe-Reinforcement-Learning-with-Learned-Recovery-Zones.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fabalakrishna123\u002Frecovery-rl), (Accepted by IEEE RAL, 2021)\n- Reinforcement learning control of constrained dynamic systems with uniformly ultimate boundedness stability guarantee, [Paper](https:\u002F\u002Fwww.sciencedirect.com\u002Fscience\u002Farticle\u002Fpii\u002FS0005109821002090), Not Find Code (Accepted by Automatica, 2021)\n- A predictive safety filter for learning-based control of constrained nonlinear dynamical systems, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1812.05506.pdf), Not Find Code (Accepted by Automatica, 2021)\n- A simple reward-free approach to constrained reinforcement learning, [Paper](https:\u002F\u002Fwww.cs.princeton.edu\u002F~syoosefi\u002Fpapers\u002Freward-free2021.pdf),  Not Find Code (Arxiv, 2021)\n- State augmented constrained reinforcement learning: Overcoming the limitations of learning with rewards, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2102.11941.pdf),  Not Find Code (Arxiv, 2021)\n- DESTA: A Framework for Safe Reinforcement Learning with Markov Games of Intervention, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2110.14468.pdf),  Not Find Code (Arxiv, 2021)\n- Safe Exploration in Model-based Reinforcement Learning using Control Barrier Functions, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2104.08171.pdf), Not Find Code (Arxiv, 2021)\n- Constrained Variational Policy Optimization for Safe Reinforcement Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2201.11927.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fliuzuxin\u002Fcvpo-safe-rl) (ICML 2022)\n- Provably efficient model-free constrained rl with linear function approximation, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2206.11889), Not Find Code (NeurIPS 2022)\n- Constrained Policy Optimization via Bayesian World Models, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2201.09802), [Code](https:\u002F\u002Fgithub.com\u002Fyardenas\u002Fla-mbda) (ICLR 2022)\n- Stability-Constrained Markov Decision Processes Using MPC, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2102.01383.pdf), Not Find Code (Accepted by Automatica, 2022)\n- Constrained Reinforcement Learning for Vehicle Motion Planning with Topological Reachability Analysis, [Paper](https:\u002F\u002Fwww.mdpi.com\u002F2218-6581\u002F11\u002F4\u002F81\u002Fpdf), Not Find Code (Accepted by Robotics, 2022)\n- Triple-Q: A Model-Free Algorithm for Constrained Reinforcement Learning with Sublinear Regret and Zero Constraint Violation, [Paper](https:\u002F\u002Fproceedings.mlr.press\u002Fv151\u002Fwei22a\u002Fwei22a.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fhonghaow\u002FTriple-q) (Accepted by AISTATS 2022)\n- Safe reinforcement learning using robust action governor, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2102.10643.pdf), Not Find Code (Accepted by In Learning for Dynamics and Control, 2022)\n- A primal-dual approach to constrained markov decision processes, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2101.10895.pdf),  Not Find Code (Arxiv, 2022)\n- SAUTE RL: Almost Surely Safe Reinforcement Learning Using State Augmentation, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2202.06558.pdf), Not Find Code (Arxiv, 2022)\n- Finding Safe Zones of policies Markov Decision Processes, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2202.11593.pdf), Not Find Code (Arxiv, 2022)\n- CUP: A Conservative Update Policy Algorithm for Safe Reinforcement Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2202.07565.pdf), [Code](https:\u002F\u002Fgithub.com\u002FRL-boxes\u002FSafe-RL) (Arxiv, 2022)\n- SAFER: Data-Efficient and Safe Reinforcement Learning via Skill Acquisition, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2202.04849.pdf), Not Find Code (Arxiv, 2022)\n- Penalized Proximal Policy Optimization for Safe Reinforcement Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2205.11814.pdf), Not Find Code (Arxiv, 2022)\n- Mean-Semivariance Policy Optimization via Risk-Averse Reinforcement Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2206.07376.pdf), Not Find Code (Arxiv, 2022)\n- Convergence and sample complexity of natural policy gradient primal-dual methods for constrained MDPs, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2206.02346.pdf), Not Find Code (Arxiv, 2022)\n- Guided Safe Shooting: model based reinforcement learning with safety constraints, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2206.09743.pdf), Not Find Code (Arxiv, 2022)\n- Safe Reinforcement Learning via Confidence-Based Filters, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2207.01337.pdf), Not Find Code (Arxiv, 2022)\n- TRC: Trust Region Conditional Value at Risk for Safe Reinforcement Learning, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fdocument\u002F9677982), [Code](https:\u002F\u002Fgithub.com\u002Frllab-snu\u002FTrust-Region-CVaR) (Accepted by IEEE RAL, 2022)\n- Efficient Off-Policy Safe Reinforcement Learning Using Trust Region Conditional Value at Risk, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fdocument\u002F9802647), Not Find Code (Accepted by IEEE RAL, 2022)\n- Enhancing Safe Exploration Using Safety State Augmentation, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2206.02675), Not Find Code (Arxiv, 2022)\n- Towards Safe Reinforcement Learning via Constraining Conditional Value-at-Risk, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2206.04436.pdf), Not Find Code (Accepted by IJCAI 2022)\n- Safe reinforcement learning of dynamic high-dimensional robotic tasks: navigation, manipulation, interaction, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2209.13308.pdf), Not Find Code (Arxiv, 2022)\n- Safe Exploration Method for Reinforcement Learning under Existence of Disturbance, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2209.15452.pdf), Not Find Code (Arxiv, 2022)\n- Guiding Safe Exploration with Weakest Preconditions, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2209.14148.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fgavlegoat\u002Fspice) (Arxiv, 2022)\n- Temporal logic guided safe model-based reinforcement learning: A hybrid systems approach, [Paper](https:\u002F\u002Fwww.sciencedirect.com\u002Fscience\u002Farticle\u002Fpii\u002FS1751570X22000905), Not Find Code (Accepted by Nonlinear Analysis: Hybrid Systems, 2022)\n- Provably Safe Reinforcement Learning via Action Projection using Reachability Analysis and Polynomial Zonotopes, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2210.10691.pdf),  Not Find Code (Arxiv, 2022)\n- Model-based Safe Deep Reinforcement Learning via a Constrained Proximal Policy Optimization Algorithm, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2210.07573.pdf),  [Code](https:\u002F\u002Fgithub.com\u002Fakjayant\u002Fmbppol) (Arxiv, 2022)\n- Safe Model-Based Reinforcement Learning with an Uncertainty-Aware Reachability Certificate, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2210.07553.pdf), Not Find Code (Arxiv, 2022)\n- UNIFY: a Unified Policy Designing Framework for Solving Constrained Optimization Problems with Machine Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2210.14030.pdf), Not Find Code (Arxiv, 2022)\n- Enforcing Hard Constraints with Soft Barriers: Safe Reinforcement Learning in Unknown Stochastic Environments, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2209.15090.pdf),  Not Find Code (Arxiv, 2022)\n- Safe Reinforcement Learning Using Robust Control Barrier Functions, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fstamp\u002Fstamp.jsp?tp=&arnumber=9928337), Not Find Code (Accepted by IEEE RAL, 2022)\n- Model-free Neural Lyapunov Control for Safe Robot Navigation, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2203.01190.pdf), [Code](https:\u002F\u002Fgithub.com\u002FZikangXiong\u002FMFNLC), [Demo](https:\u002F\u002Fsites.google.com\u002Fview\u002Fmf-nlc) (Accepted by IROS 2022)\n- Safe Reinforcement Learning via Probabilistic Logic Shields, [Paper](https:\u002F\u002Fwww.ijcai.org\u002Fproceedings\u002F2023\u002F0637.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fwenchiyang\u002Fpls) (Accepted by IJCAI 2023, Distinguished Paper Award)\n- Towards robust and safe reinforcement learning with benign off-policy data, [Paper](https:\u002F\u002Fproceedings.mlr.press\u002Fv202\u002Fliu23l\u002Fliu23l.pdf),  Not Find Code (Accepted by ICML 2023)\n- Enforcing hard constraints with soft barriers: Safe reinforcement learning in unknown stochastic environments, [Paper](https:\u002F\u002Fproceedings.mlr.press\u002Fv202\u002Fwang23as\u002Fwang23as.pdf),  Not Find Code (Accepted by ICML 2023)\n- Safe Exploration Incurs Nearly No Additional Sample Complexity for Reward-free RL, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2206.14057),  Not Find Code (Accepted by ICLR 2023)\n- A CMDP-within-online framework for Meta-Safe Reinforcement Learning, [Paper](https:\u002F\u002Fopenreview.net\u002Fpdf?id=mbxz9Cjehr),  Not Find Code (Accepted by ICLR 2023)\n- Datasets and Benchmarks for Offline Safe Reinforcement Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.09303), [Code](https:\u002F\u002Fgithub.com\u002Fliuzuxin\u002Fosrl), (Accepted by Journal of Data-centric Machine Learning Research)\n- SCPO: Safe Reinforcement Learning with Safety Critic Policy Optimization, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.00880), [Code](https:\u002F\u002Fgithub.com\u002FSafeRL-Lab\u002FSCPO) (Arxiv, 2023)\n- Shielded Reinforcement Learning for Hybrid Systems, [Paper](https:\u002F\u002Flink.springer.com\u002Fchapter\u002F10.1007\u002F978-3-031-46002-9_3) [(Arxiv)](https:\u002F\u002Farxiv.org\u002Fabs\u002F2308.14424), [Code](https:\u002F\u002Fgithub.com\u002FAsgerHB\u002FShielded-Learning-for-Hybrid-Systems) (AISOLA, 2023)\n- Adaptive primal-dual method for safe reinforcement learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.00355), Not Find Code (Accepted by AAMAS 2024)\n- Probabilistic constraint for safety-critical reinforcement learning, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fiel7\u002F9\u002F4601496\u002F10475493.pdf), Not Find Code (Accepted by TAC)\n- Generalized constraint for probabilistic safe reinforcement learning, [Paper](https:\u002F\u002Fproceedings.mlr.press\u002Fv242\u002Fchen24b\u002Fchen24b.pdf), Not Find Code (Accepted by DCC 2024)\n- Log Barriers for Safe Black-box Optimization with Application to Safe Reinforcement Learning, [Paper](https:\u002F\u002Fwww.jmlr.org\u002Fpapers\u002Fvolume25\u002F22-0878\u002F22-0878.pdf), [Code](https:\u002F\u002Fgithub.com\u002FIlnura\u002FLB_SGD) (JMLR, 2024)\n- Provably safe reinforcement learning with step-wise violation constraints, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2302.06064), Not Find Code (Accepted by NeurIPS 2024)\n- Feasibility Consistent Representation Learning for Safe Reinforcement Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fabs\u002F2405.11718), [Code](https:\u002F\u002Fgithub.com\u002Fczp16\u002FFCSRL), (Accepted by ICML 2024)\n- Balance Reward and Safety Optimization for Safe Reinforcement Learning: A Perspective of Gradient Manipulation, [Paper](https:\u002F\u002Fojs.aaai.org\u002Findex.php\u002FAAAI\u002Farticle\u002Fview\u002F30102\u002F31944), Not Find Code (Accepted by AAAI 2024)\n- Safe Reinforcement Learning with Free-form Natural Language Constraints and Pre-Trained Language Models, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.07553v1), Not Find Code (Accepted by AAMAS 2024)\n- Enhancing Efficiency of Safe Reinforcement Learning via Sample Manipulation, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2405.20860), Not Find Code (Arxiv, 2024)\n- Safe and Balanced: A Framework for Constrained Multi-Objective Reinforcement Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2405.16390), Not Find Code (Arxiv, 2024)\n- Confident Natural Policy Gradient for Local Planning in qπ-realizable Constrained MDPs, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2406.18529), Not Find Code (Arxiv, 2024)\n- Safe Exploration Using Bayesian World Models and Log-Barrier Optimization, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2405.05890), [Code](https:\u002F\u002Fanonymous.4open.science\u002Fr\u002Fsafe-opax-F5FF\u002FREADME.md) (Arxiv, 2024)\n- Safe and Balanced: A Framework for Constrained Multi-Objective Reinforcement Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2405.16390), [Code](https:\u002F\u002Fgithub.com\u002FSafeRL-Lab\u002FCMORL) (Accepted by IEEE TPAMI 2025)\n- Reward-Safety Balance in Offline Safe RL via Diffusion Regularization, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2502.12391?), Not Find Code (Accepted by NeurIPS 2025)\n\n\n\n\n#### 2.2. Safe Multi-Agent RL Baselines\n- Multi-Agent Constrained Policy Optimisation (MACPO), [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2110.02793.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FMulti-Agent-Constrained-Policy-Optimisation) (Arxiv, 2021)\n- MAPPO-Lagrangian, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2110.02793.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FMulti-Agent-Constrained-Policy-Optimisation)  (Arxiv, 2021)\n- Decentralized policy gradient descent ascent for safe multi-agent reinforcement learning, [Paper](https:\u002F\u002Fchentianyi1991.github.io\u002Faaai.pdf), Not Find Code (Accepted by AAAI 2021)\n- Safe multi-agent reinforcement learning via shielding, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2101.11196.pdf), Not Find Code (Accepted by AAMAS 2021)\n- CMIX: Deep Multi-agent Reinforcement Learning with Peak and Average Constraints, [Paper](https:\u002F\u002F2021.ecmlpkdd.org\u002Fwp-content\u002Fuploads\u002F2021\u002F07\u002Fsub_181.pdf), Not Find Code (Accepted by Joint European Conference on Machine Learning and Knowledge Discovery in Databases 2021)\n- Safe multi-agent reinforcement learning through decentralized multiple control barrier functions, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2103.12553.pdf), Not Find Code (Arxiv 2021)\n- CAMA: A New Framework for Safe Multi-Agent Reinforcement Learning Using Constraint Augmentation, [Paper](https:\u002F\u002Fopenreview.net\u002Fpdf?id=jK02XX9ZpJkt), Not Find Code (Openreview 2022)\n- Shield decentralization for safe multi-agent reinforcement learning, [Paper](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper_files\u002Fpaper\u002F2022\u002Ffile\u002F57444e14ecd9e2c8f603b4f012ce3811-Paper-Conference.pdf), Not Find Code (NeurIPS 2022)\n- Solving Multi-Agent Safe Optimal Control with Distributed Epigraph Form MARL, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2504.15425), [Code](https:\u002F\u002Fgithub.com\u002FMIT-REALM\u002Fdef-marl\u002F) (RSS 2025)\n\n\n### 3. Surveys\n- A Review of Safe Reinforcement Learning: Methods, Theory and Applications, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2205.10330.pdf) (IEEE TPAMI, 2024)\n- State-wise Safe Reinforcement Learning: A Survey, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2302.03122.pdf) (Accepted by IJCAI 2023)\n- Policy learning with constraints in model-free reinforcement learning: A survey, [Paper](https:\u002F\u002Fweb.archive.org\u002Fweb\u002F20210812230501id_\u002Fhttps:\u002F\u002Fwww.ijcai.org\u002Fproceedings\u002F2021\u002F0614.pdf) (Accepted by IJCAI 2021)\n- Safe learning in robotics: From learning-based control to safe reinforcement learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2108.06266.pdf) (Accepted by Annual Review of Control, Robotics, and Autonomous Systems, 2021)\n- Safe learning and optimization techniques: Towards a survey of the state of the art, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2101.09505.pdf) (Accepted by In International Workshop on the Foundations of Trustworthy AI Integrating Learning, Optimization and Reasoning, 2020)\n- A comprehensive survey on safe reinforcement learning, [Paper](https:\u002F\u002Fwww.jmlr.org\u002Fpapers\u002Fvolume16\u002Fgarcia15a\u002Fgarcia15a.pdf) (Accepted by Journal of Machine Learning Research, 2015)\n\n\n\n\n### 4. Theses\n- Safe Reinforcement Learning to Make Decisions in Robotics, [Thesis](https:\u002F\u002Fpeople.eecs.berkeley.edu\u002F~shangding.gu\u002Fpapers\u002FPhD_Dissertation_Shangding_Gu_2024.pdf) (PhD thesis, Shangding Gu, TU Munich, 2024)\n- Safe Exploration in Reinforcement Learning: Theory and Applications in Robotics, [Thesis](https:\u002F\u002Fwww.research-collection.ethz.ch\u002Fbitstream\u002Fhandle\u002F20.500.11850\u002F370833\u002F1\u002Froot.pdf) (PhD thesis, Felix Berkenkamp, ETH Zurich, 2019)\n- Safe reinforcement learning, [Thesis](https:\u002F\u002Fscholarworks.umass.edu\u002Fcgi\u002Fviewcontent.cgi?article=1527&context=dissertations_2) (PhD thesis, Philip S. Thomas, University of Massachusetts Amherst, 2015)\n\n\n\n\n### 5. Book\n- Constrained Markov decision processes: stochastic modeling, [Book](https:\u002F\u002Fwww-sop.inria.fr\u002Fmembers\u002FEitan.Altman\u002FPAPERS\u002Fh.pdf), (Eitan Altman, Routledge, 1999)\n\n### 6. Tutorials\n- Safe Reinforcement Learning: Bridging Theory and Practice, [tutorial](https:\u002F\u002Fdocs.google.com\u002Fpresentation\u002Fd\u002F1slZyKj1G_XvtH8laWMClcQVMLbiQyqKW25cV9gY3ypE\u002Fedit?usp=sharing), (Ming Jin & Shangding Gu, 2024)\n- Safe Reinforcement Learning for Smart Grid Control and Operations, [tutorial](https:\u002F\u002Fdocs.google.com\u002Fpresentation\u002Fd\u002F1o3t3KMfgCL5fo_zHZH2ChMkJTkbJ7sY7lMomBE8iRNE\u002Fedit?usp=sharing), (Ming Jin & Shangding Gu, 2024)\n- Safe Reinforcement Learning, [tutorial](https:\u002F\u002Fdrive.google.com\u002Ffile\u002Fd\u002F1Hpu9HZbXkurTMWvj63m-aLYxay66E2Vz\u002Fview), (Felix Berkenkamp, 2023)\n- Primal-Dual Methods, [tutorial](https:\u002F\u002Fdrive.google.com\u002Ffile\u002Fd\u002F1_NRil0__6375nIqMT6jXw-PB6CkwvvDH\u002Fview), (Gergely Neu, 2023)\n\n### 7. Exercise\n- Primal-Dual Reinforcement Learning, [exercise code](https:\u002F\u002Fgithub.com\u002Ftyrion\u002Fprimal-dual-exercise) and [exercise Colab](https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Ftyrion\u002Fprimal-dual-exercise\u002Fblob\u002Fmaster\u002FPrimal_Dual_Colab.ipynb), (Germano Gabbianelli, 2023)\n\n\n## Publication\nIf you find the repository useful, please cite the [paper](https:\u002F\u002Farxiv.org\u002Fabs\u002F2205.10330):\n```\n@article{gu2024review,\n  title={A Review of Safe Reinforcement Learning: Methods, Theories and Applications},\n  author={Gu, Shangding and Yang, Long and Du, Yali and Chen, Guang and Walter, Florian and Wang, Jun and Knoll, Alois},\n  journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},\n  year={2024},\n  publisher={IEEE}\n}\n```\n\n","## 安全强化学习基准\n\n\n\n\n\n该仓库用于安全强化学习（RL）研究，我们在此探索各种安全RL基准和安全RL评测环境，涵盖单智能体RL和多智能体RL。若任何作者不希望其论文在此列出，请随时联系 \u003Cgshangd[AT]foxmail.com>。（本仓库仍在积极开发中，我们欢迎任何建设性的意见和建议）\n\n\n欢迎您更新此列表！如果您发现一篇未在此列出的安全强化学习相关论文，请：\n\n- 分支本仓库，添加相关信息并合并回主仓库；\n- 或在此处提交问题；\n- 或发送邮件至 \u003Cgshangd[AT]foxmail.com>。\n\n\n\n***\nREADME的组织结构如下：\n- [安全强化学习基准](#安全强化学习基准)\n  * [1. 支持的环境](#1-支持的环境)\n    + [1.1. 安全单智能体RL基准](#11-安全单智能体RL基准)\n    + [1.2. 安全多智能体RL基准](#12-安全多智能体RL基准)\n  * [2. 安全RL基准方法](#2-安全RL基准方法)\n    + [2.1. 安全单智能体RL基准方法](#21-安全单智能体RL基准方法)\n    + [2.2. 安全多智能体RL基准方法](#22-安全多智能体RL基准方法)\n  * [3. 综述](#3-综述)\n  * [4. 学位论文](#4-学位论文)\n  * [5. 书籍](#5-书籍)\n  * [6. 教程](#6-教程)\n  * [7. 练习题](#7-练习题)\n- [出版物](#出版物)\n\n***\n\n\n\n### 1. 支持的环境\n#### 1.1. 安全单智能体RL基准\n- [AI Safety Gridworlds](https:\u002F\u002Fgithub.com\u002Fdeepmind\u002Fai-safety-gridworlds)\n- [Safety-Gym](https:\u002F\u002Fgithub.com\u002Fopenai\u002Fsafety-gym)\n- [Safety-Gymnasium](https:\u002F\u002Fgithub.com\u002FPKU-Alignment\u002Fsafety-gymnasium)\n\n#### 1.2. 安全多智能体RL基准\n- [Safe Multi-Agent Mujoco](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Multi-Agent-Mujoco)\n- [Safe Multi-Agent Isaac Gym](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Multi-Agent-Isaac-Gym)\n- [Safe Multi-Agent Robosuite](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Multi-Agent-Robosuite)\n\n\n\n### 2. 安全RL基准方法\n\n#### 2.1. 安全单智能体RL基准方法\n\n- Consideration of risk in reinforcement learning, [Paper](https:\u002F\u002Fciteseerx.ist.psu.edu\u002Fviewdoc\u002Fdownload?doi=10.1.1.45.8264&rep=rep1&type=pdf), Not Find Code, (Accepted by ICML 1994)\n- Multi-criteria Reinforcement Learning,  [Paper](https:\u002F\u002Fciteseerx.ist.psu.edu\u002Fviewdoc\u002Fdownload?doi=10.1.1.232.962&rep=rep1&type=pdf), Not Find Code, (Accepted by ICML 1998)\n- Lyapunov design for safe reinforcement learning, [Paper](https:\u002F\u002Fwww.jmlr.org\u002Fpapers\u002Fvolume3\u002Fperkins02a\u002Fperkins02a.pdf), Not Find Code, (Accepted by ICML 2002)\n- Risk-sensitive reinforcement learning, [Paper](https:\u002F\u002Flink.springer.com\u002Fcontent\u002Fpdf\u002F10.1023\u002FA:1017940631555.pdf), Not Find Code, (Accepted by Machine Learning, 2002)\n- Risk-Sensitive Reinforcement Learning Applied to Control under Constraints, [Paper](https:\u002F\u002Fwww.jair.org\u002Findex.php\u002Fjair\u002Farticle\u002Fview\u002F10415\u002F24966), Not Find Code, (Accepted by Journal of Artificial Intelligence Research, 2005)\n- An actor-critic algorithm for constrained markov decision processes, [Paper](https:\u002F\u002Freader.elsevier.com\u002Freader\u002Fsd\u002Fpii\u002FS0167691104001276?token=D2FDE94E441EB4182DF4CF382458FCA57BDCABECB2E17932BF52CABA7F46F0F67EE5E9A4BE19F9FD3E27D4099CA25C80&originRegion=eu-west-1&originCreation=20220304073259), Not Find Code, (Accepted by Systems & Control Letters, 2005)\n- Reinforcement learning for MDPs with constraints, [Paper](https:\u002F\u002Flink.springer.com\u002Fcontent\u002Fpdf\u002F10.1007\u002F11871842_63.pdf), Not Find Code, (Accepted by European Conference on Machine Learning 2006)\n- Discounted Markov decision processes with utility constraints, [Paper](http:\u002F\u002Fciteseerx.ist.psu.edu\u002Fviewdoc\u002Fdownload?doi=10.1.1.140.1315&rep=rep1&type=pdf), Not Find Code, (Accepted by Computers & Mathematics with Applications, 2006)\n- Constrained reinforcement learning from intrinsic and extrinsic rewards, [Paper](http:\u002F\u002Fciteseerx.ist.psu.edu\u002Fviewdoc\u002Fdownload?doi=10.1.1.1059.1383&rep=rep1&type=pdf), Not Find Code, (Accepted by International Conference on Development and Learning 2007)\n- Safe exploration for reinforcement learning, [Paper](https:\u002F\u002Fciteseerx.ist.psu.edu\u002Fviewdoc\u002Fdownload?doi=10.1.1.161.2786&rep=rep1&type=pdf), Not Find Code, (Accepted by ESANN 2008)\n- Percentile optimization for Markov decision processes with parameter uncertainty, [Paper](http:\u002F\u002Fciteseerx.ist.psu.edu\u002Fviewdoc\u002Fdownload?doi=10.1.1.400.5048&rep=rep1&type=pdf), Not Find Code, (Accepted by Operations research, 2010)\n- Probabilistic goal Markov decision processes, [Paper](http:\u002F\u002Fciteseerx.ist.psu.edu\u002Fviewdoc\u002Fdownload?doi=10.1.1.208.4804&rep=rep1&type=pdf), Not Find Code, (Accepted by IJCAI 2011)\n- Safe reinforcement learning in high-risk tasks through policy improvement, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fstamp\u002Fstamp.jsp?tp=&arnumber=5967356), Not Find Code, (Accepted by IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL) 2011) \n- Safe Exploration in Markov Decision Processes, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1205.4810.pdf), Not Find Code, (Accepted by ICML 2012)\n- Policy gradients with variance related risk criteria, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1206.6404.pdf), Not Find Code, (Accepted by ICML 2012)\n- Risk aversion in Markov decision processes via near optimal Chernoff bounds, [Paper](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2012\u002Ffile\u002Fe2f374c3418c50bc30d67d5f7454a5b4-Paper.pdf), Not Find Code, (Accepted by NeurIPS 2012)\n- Safe Exploration of State and Action Spaces in Reinforcement Learning, [Paper](https:\u002F\u002Fweb.archive.org\u002Fweb\u002F20180423223542id_\u002Fhttp:\u002F\u002Fwww.jair.org\u002Fmedia\u002F3761\u002Flive-3761-6687-jair.pdf), Not Find Code, (Accepted by Journal of Artificial Intelligence Research, 2012)\n- An Online Actor–Critic Algorithm with Function Approximation for Constrained Markov Decision Processes, [Paper](https:\u002F\u002Flink.springer.com\u002Fcontent\u002Fpdf\u002F10.1007\u002Fs10957-012-9989-5.pdf), Not Find Code, (Accepted by Journal of Optimization Theory and Applications, 2012)\n- Safe policy iteration, [Paper](http:\u002F\u002Fproceedings.mlr.press\u002Fv28\u002Fpirotta13.pdf), Not Find Code, (Accepted by ICML 2013)\n- Reachability-based safe learning with Gaussian processes, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fstamp\u002Fstamp.jsp?tp=&arnumber=7039601), Not Find Code (Accepted by IEEE CDC 2014)\n- Safe Policy Search for Lifelong Reinforcement Learning with Sublinear Regret, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1505.05798.pdf), Not Find Code, (Accepted by ICML 2015)\n- High-Confidence Off-Policy Evaluation, [Paper](https:\u002F\u002Fwww.ics.uci.edu\u002F~dechter\u002Fcourses\u002Fics-295\u002Fwinter-2018\u002Fpapers\u002F2015Thomas2015.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002FsafeRL) (Accepted by AAAI 2015)\n- Safe Exploration for Optimization with Gaussian Processes, [Paper](http:\u002F\u002Fproceedings.mlr.press\u002Fv37\u002Fsui15.pdf), Not Find Code (Accepted by ICML 2015)\n- Safe Exploration in Finite Markov Decision Processes with Gaussian Processes, [Paper](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2016\u002Ffile\u002F9a49a25d845a483fae4be7e341368e36-Paper.pdf), Not Find Code (Accepted by NeurIPS 2016)\n- Safe and efficient off-policy reinforcement learning, [Paper](https:\u002F\u002Fwww.researchgate.net\u002Fprofile\u002FAnna-Harutyunyan-3\u002Fpublication\u002F303859091_Safe_and_Efficient_Off-Policy_Reinforcement_Learning\u002Flinks\u002F57b2e8c908aeb2cf17c73ad2\u002FSafe-and-Efficient-Off-Policy-Reinforcement-Learning.pdf), [Code](https:\u002F\u002Fgithub.com\u002FALRhub\u002FRetrace-PyTorch) (Accepted by NeurIPS 2016)\n- Safe, Multi-Agent, Reinforcement Learning for Autonomous Driving, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1610.03295.pdf?ref=https:\u002F\u002Fgithubhelp.com), Not Find Code (only Arxiv, 2016, citation 530+)\n- Safe Learning of Regions of Attraction in Uncertain, Nonlinear Systems with Gaussian Processes, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1603.04915.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002Fsafe_learning) (Accepetd by CDC 2016)\n- Safety-constrained reinforcement learning for MDPs, [Paper](https:\u002F\u002Fwww.researchgate.net\u002Fprofile\u002FNils-Jansen-2\u002Fpublication\u002F283118102_Safety-Constrained_Reinforcement_Learning_for_MDPs\u002Flinks\u002F5630d2af08aef3349c29f90f\u002FSafety-Constrained-Reinforcement-Learning-for-MDPs.pdf), Not Find Code (Accepted by InInternational Conference on Tools and Algorithms for the Construction and Analysis of Systems 2016)\n- Convex synthesis of randomized policies for controlled Markov chains with density safety upper bound constraints, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fstamp\u002Fstamp.jsp?tp=&arnumber=7526658), Not Find Code (Accepted by American Control Conference 2016)\n- Combating Deep Reinforcement Learning's Sisyphean Curse with Intrinsic Fear, [Paper](https:\u002F\u002Fopenreview.net\u002Fpdf?id=r1tHvHKge), Not Find Code (only Openreview, 2016)\n- Combating reinforcement learning's sisyphean curse with intrinsic fear, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1611.01211.pdf), Not Find Code (only Arxiv, 2016)\n- Constrained Policy Optimization (CPO), [Paper](http:\u002F\u002Fproceedings.mlr.press\u002Fv70\u002Fachiam17a\u002Fachiam17a.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002Fsafety-starter-agents) (Accepted by ICML 2017)\n- Risk-constrained reinforcement learning with percentile risk criteria, [Paper](https:\u002F\u002Fwww.jmlr.org\u002Fpapers\u002Fvolume18\u002F15-636\u002F15-636.pdf), , Not Find Code (Accepted by The Journal of Machine Learning Research, 2017)\n- Probabilistically Safe Policy Transfer, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1705.05394.pdf),  Not Find Code (Accepted by ICRA 2017) \n- Accelerated primal-dual policy optimization for safe reinforcement learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1802.06480.pdf), Not Find Code (Arxiv, 2017)\n- Stagewise safe bayesian optimization with gaussian processes, [Paper](http:\u002F\u002Fwww.yisongyue.com\u002Fpublications\u002Ficml2018_stageopt.pdf),  Not Find Code (Accepted by ICML 2018)\n- Leave no Trace: Learning to Reset for Safe and Autonomous Reinforcement Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1711.06782.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002FLeaveNoTrace) (Accepted by ICLR 2018)\n- Safe Model-based Reinforcement Learning with Stability Guarantees, [Paper](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2017\u002Ffile\u002F766ebcd59621e305170616ba3d3dac32-Paper.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002Fsafe_learning) (Accepted by NeurIPS 2018)\n- A Lyapunov-based Approach to Safe Reinforcement Learning, [Paper](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2018\u002Ffile\u002F4fe5149039b52765bde64beb9f674940-Paper.pdf), Not Find Code (Accepted by NeurIPS 2018)\n- Constrained Cross-Entropy Method for Safe Reinforcement Learning, [Paper](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2018\u002Ffile\u002F34ffeb359a192eb8174b6854643cc046-Paper.pdf), Not Find Code (Accepted by NeurIPS 2018)\n- Safe Reinforcement Learning via Formal Methods, [Paper](http:\u002F\u002Fwww.cs.cmu.edu\u002F~aplatzer\u002Fpub\u002FSafeRL.pdf), Not Find Code (Accepted by AAAI 2018)\n- Safe exploration and optimization of constrained mdps using gaussian processes, [Paper](http:\u002F\u002Fwww.yisongyue.com\u002Fpublications\u002Faaai2018_safe_mdp.pdf), Not Find Code (Accepted by AAAI 2018)\n- Safe reinforcement learning via shielding, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1708.08611.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fsafe-rl\u002Fsafe-rl-shielding) (Accepted by AAAI 2018)\n- Trial without Error: Towards Safe Reinforcement Learning via Human Intervention, [Paper](https:\u002F\u002Fwww.ifaamas.org\u002FProceedings\u002Faamas2018\u002Fpdfs\u002Fp2067.pdf), Not Find Code (Accepted by AAMAS 2018)\n- Learning-based Model Predictive Control for Safe Exploration and Reinforcement Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1906.12189.pdf), Not Find Code (Accepted by CDC 2018)\n- The Lyapunov Neural Network: Adaptive Stability Certification for Safe Learning of Dynamical Systems, [Paper](http:\u002F\u002Fproceedings.mlr.press\u002Fv87\u002Frichards18a\u002Frichards18a.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002Fsafe_learning) (Accepted by CoRL 2018)\n- OptLayer - Practical Constrained Optimization for Deep Reinforcement Learning in the Real World, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1709.07643.pdf), Not Find Code (Accepted by ICRA 2018)\n- Safe learning of quadrotor dynamics using barrier certificates, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fiel7\u002F8449910\u002F8460178\u002F08460471.pdf), Not Find Code (Accepted by ICRA 2018)\n- Safe reinforcement learning on autonomous vehicles, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1910.00399.pdf), Not Find Code (Accepted by IROS 2018)\n- Trial without error: Towards safe reinforcement learning via human intervention, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1707.05173.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fgsastry\u002Fhuman-rl) (Accepted by AAMAS 2018)\n- Safe reinforcement learning: Learning with supervision using a constraint-admissible set, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fabstract\u002Fdocument\u002F8430770), Not Find Code (Accepted by Annual American Control Conference (ACC) 2018)\n- A General Safety Framework for Learning-Based Control in Uncertain Robotic Systems, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fstamp\u002Fstamp.jsp?tp=&arnumber=8493361), Not Find Code (Accepted by IEEE Transactions on Automatic Control 2018)\n- Safe exploration algorithms for reinforcement learning controllers, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fstamp\u002Fstamp.jsp?arnumber=7842559), Not Find Code (Accepted by IEEE transactions on neural networks and learning systems 2018)\n- Verification and repair of control policies for safe reinforcement learning, [Paper](https:\u002F\u002Flink.springer.com\u002Fcontent\u002Fpdf\u002F10.1007\u002Fs10489-017-0999-8.pdf), Not Find Code (Accepted by Applied Intelligence, 2018)\n- Safe Exploration in Continuous Action Spaces, [Paper](https:\u002F\u002Fwww.researchgate.net\u002Fprofile\u002FGal-Dalal\u002Fpublication\u002F322756278_Safe_Exploration_in_Continuous_Action_Spaces\u002Flinks\u002F5a71e84faca2720bc0d940b3\u002FSafe-Exploration-in-Continuous-Action-Spaces.pdf), [Code](https:\u002F\u002Fgithub.com\u002FAgrawalAmey\u002Fsafe-explorer), (only Arxiv, 2018, citation 200+)\n- Safe exploration of nonlinear dynamical systems: A predictive safety filter for reinforcement learning, [Paper](https:\u002F\u002Fwww.researchgate.net\u002Fprofile\u002FKim-Wabersich\u002Fpublication\u002F329641554_Safe_exploration_of_nonlinear_dynamical_systems_A_predictive_safety_filter_for_reinforcement_learning\u002Flinks\u002F5ede2aab299bf1d20bd87981\u002FSafe-exploration-of-nonlinear-dynamical-systems-A-predictive-safety-filter-for-reinforcement-learning.pdf), Not Find Code (Arxiv, 2018, citation 40+)\n- Batch policy learning under constraints, [Paper](http:\u002F\u002Fproceedings.mlr.press\u002Fv97\u002Fle19a\u002Fle19a.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fclvoloshin\u002Fconstrained_batch_policy_learning) (Accepted by ICML 2019)\n- Safe Policy Improvement with Baseline Bootstrapping, [Paper](https:\u002F\u002Fwww.researchgate.net\u002Fprofile\u002FRomain-Laroche\u002Fpublication\u002F334749134_Safe_Policy_Improvement_with_Baseline_Bootstrapping\u002Flinks\u002F5d3f3b634585153e592ceeb4\u002FSafe-Policy-Improvement-with-Baseline-Bootstrapping.pdf), Not Find Code (Accepted by ICML 2019)\n- Convergent Policy Optimization for Safe Reinforcement Learning, [Paper](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2019\u002Ffile\u002Fdb29450c3f5e97f97846693611f98c15-Paper.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002FSafe_reinforcement_learning) (Accepted by NeurIPS 2019)\n- Constrained reinforcement learning has zero duality gap, [Paper](https:\u002F\u002Fwww.researchgate.net\u002Fprofile\u002FLuiz-Chamon\u002Fpublication\u002F336889860_Constrained_Reinforcement_Learning_Has_Zero_Duality_Gap\u002Flinks\u002F5ef4df204585155050726b42\u002FConstrained-Reinforcement-Learning-Has-Zero-Duality-Gap.pdf), Not Find Code (Accepted by NeurIPS 2019)\n- Reinforcement learning with convex constraints, [Paper](https:\u002F\u002Fwww.cs.princeton.edu\u002F~syoosefi\u002Fpapers\u002FNeurIPS2019.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fxkianteb\u002FApproPO) (Accepted by NeurIPS 2019)\n- Reward constrained policy optimization, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1805.11074.pdf), Not Find Code (Accepted by ICLR 2019)\n- Supervised policy update for deep reinforcement learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1805.11706.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fquanvuong\u002FSupervised_Policy_Update), (Accepted by ICLR 2019)\n- End-to-end safe reinforcement learning through barrier functions for safety-critical continuous control tasks, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1903.08792.pdf), [Code](https:\u002F\u002Fgithub.com\u002Frcheng805\u002FRL-CBF) (Accepted by AAAI 2019)\n- Lyapunov-based safe policy optimization for continuous control, [Paper](https:\u002F\u002Fopenreview.net\u002Fpdf?id=SJgUYBVLsN), Not Find Code (Accepted by ICML Workshop RL4RealLife 2019)\n- Safe reinforcement learning with model uncertainty estimates, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1810.08700.pdf), Not Find Code (Accepted by ICRA 2019)\n- Safe reinforcement learning with scene decomposition for navigating complex urban environments, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1904.11483.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002FAutomotiveSafeRL), (Accepted by IV 2019)\n- Verifiably safe off-model reinforcement learning, [Paper](https:\u002F\u002Flink.springer.com\u002Fchapter\u002F10.1007\u002F978-3-030-17462-0_28), [Code](https:\u002F\u002Fgithub.com\u002FIBM\u002Fvsrl-framework\u002Fblob\u002F42e0853bffb5efbb66cd97178aff9e10ad18c5a9\u002FREADME.md) (Accepted by  InInternational Conference on Tools and Algorithms for the Construction and Analysis of Systems 2019)\n- Probabilistic policy reuse for safe reinforcement learning, [Paper](https:\u002F\u002Fdl.acm.org\u002Fdoi\u002Fpdf\u002F10.1145\u002F3310090?casa_token=OahWDUpVTxAAAAAA:MVJd1GjD6HDpFKMxXfp9pd3KaJbG879P7qvcMS0-VDGFAR0prYuXwzN9LwI4BfkPti085CGGhsz1llY), Not Find Code, (Accepted by ACM Transactions on Autonomous and Adaptive Systems (TAAS), 2019)\n- Projected stochastic primal-dual method for constrained online learning with kernels, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fielaam\u002F78\u002F8691646\u002F8678800-aam.pdf), Not Find Code, (Accepted by IEEE Transactions on Signal Processing, 2019)\n- Resource constrained deep reinforcement learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1812.00600.pdf), Not Find Code, (Accepted by 29th International Conference on Automated Planning and Scheduling  2019)\n- Temporal logic guided safe reinforcement learning using control barrier functions, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1903.09885.pdf), Not Find Code (Arxiv, Citation 25+, 2019)\n- Safe policies for reinforcement learning via primal-dual methods, [Paper](https:\u002F\u002Fwww.researchgate.net\u002Fprofile\u002FLuiz-Chamon\u002Fpublication\u002F337438444_Safe_Policies_for_Reinforcement_Learning_via_Primal-Dual_Methods\u002Flinks\u002F5ef4df1f299bf18816e7f62c\u002FSafe-Policies-for-Reinforcement-Learning-via-Primal-Dual-Methods.pdf), Not Find Code (Arxiv, Citation 25+, 2019)\n- Value constrained model-free continuous control, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1902.04623.pdf), Not Find Code (Arxiv, Citation 35+, 2019)\n- Safe Reinforcement Learning in Constrained Markov Decision Processes (SNO-MDP), [Paper](http:\u002F\u002Fproceedings.mlr.press\u002Fv119\u002Fwachi20a\u002Fwachi20a.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002Fsafe_near_optimal_mdp) (Accepted by ICML 2020)\n- Responsive Safety in Reinforcement Learning by PID Lagrangian Methods, [Paper](http:\u002F\u002Fproceedings.mlr.press\u002Fv119\u002Fstooke20a\u002Fstooke20a.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fkeirp\u002Fglamor\u002Ftree\u002F98681a23bae9e8e5e9fbf68a0316ca2a22a27593\u002Fdependencies\u002Frlpyt\u002Frlpyt\u002Fprojects\u002Fsafe) (Accepted by ICML 2020)\n- Constrained markov decision processes via backward value functions, [Paper](http:\u002F\u002Fproceedings.mlr.press\u002Fv119\u002Fsatija20a\u002Fsatija20a.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fhercky\u002Fcmdps_via_bvf\u002Ftree\u002F69b9f51cb6410673d0aa2e5b9c980b33e5a46dda) (Accepted by ICML 2020)\n- Projection-Based Constrained Policy Optimization (PCPO), [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2010.03152.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002FPCPO) (Accepted by ICLR 2020)\n- First order constrained optimization in policy space (FOCOPS),[Paper](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2020\u002Ffile\u002Faf5d5ef24881f3c3049a7b9bfe74d58b-Paper.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fymzhang01\u002Ffocops) (Accepted by NeurIPS 2020)\n- Safe reinforcement learning via curriculum induction, [Paper](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2020\u002Ffile\u002F8df6a65941e4c9da40a4fb899de65c55-Paper.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fzuzuba\u002FCISR_NeurIPS20) (Accepted by NeurIPS 2020)\n- Constrained episodic reinforcement learning in concave-convex and knapsack settings, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2006.05051.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fmiryoosefi\u002FConRL) (Accepted by NeurIPS 2020)\n- Risk-sensitive reinforcement learning: Near-optimal risk-sample tradeoff in regret, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2006.13827.pdf), Not Find Code  (Accepted by NeurIPS 2020)\n- Upper confidence primal-dual reinforcement learning for CMDP with adversarial loss, [Paper](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper_files\u002Fpaper\u002F2020\u002Ffile\u002Fae95296e27d7f695f891cd26b4f37078-Paper.pdf), Not Find Code  (Accepted by NeurIPS 2020)\n- IPO: Interior-point Policy Optimization under Constraints, [Paper](https:\u002F\u002Fwww.researchgate.net\u002Fprofile\u002FYongshuai-Liu\u002Fpublication\u002F336735393_IPO_Interior-point_Policy_Optimization_under_Constraints\u002Flinks\u002F5e1670874585159aa4bff037\u002FIPO-Interior-point-Policy-Optimization-under-Constraints.pdf), Not Find Code (Accepted by AAAI 2020)\n- Safe reinforcement learning using robust mpc, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1906.04005.pdf), Not Find Code (IEEE Transactions on Automatic Control, 2020)\n- Safe reinforcement learning via projection on a safe set: How to achieve optimality? [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2004.00915.pdf), Not Find Code (Accepted by IFAC 2020)\n- Reinforcement learning for safety-critical control under model uncertainty, using control lyapunov functions and control barrier functions, [Paper](http:\u002F\u002Fwww.roboticsproceedings.org\u002Frss16\u002Fp088.pdf), Not Find Code (Accepted by RSS 2020)\n- Learning Transferable Domain Priors for Safe Exploration in Reinforcement Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1909.04307.pdf), [Code](https:\u002F\u002Fgithub.com\u002FGKthom\u002FPriors-for-safe-exploration), (Accepted by International Joint Conference on Neural Networks (IJCNN) 2020)\n- Safe reinforcement learning through meta-learned instincts, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2005.03233.pdf), Not Find Code (Accepted by The Conference on Artificial Life 2020)\n- Learning safe policies with cost-sensitive advantage estimation, [Paper](https:\u002F\u002Fopenreview.net\u002Fpdf?id=uVnhiRaW3J), Not Find Code (Openreview 2020)\n- Safe reinforcement learning using probabilistic shields, [Paper](https:\u002F\u002Frepository.ubn.ru.nl\u002Fbitstream\u002Fhandle\u002F2066\u002F224966\u002F224966.pdf?sequence=1), Not Find Code (2020)\n- A constrained reinforcement learning based approach for network slicing, [Paper](https:\u002F\u002Ficnp20.cs.ucr.edu\u002Fproceedings\u002Fhdrnets\u002FA%20Constrained%20Reinforcement%20Learning%20Based%20Approach%20for%20Network%20Slicing.pdf),  Not Find Code (Accepted by IEEE 28th International Conference on Network Protocols (ICNP) 2020)\n- Safe reinforcement learning: A control barrier function optimization approach, [Paper](https:\u002F\u002Fonlinelibrary.wiley.com\u002Fdoi\u002Fepdf\u002F10.1002\u002Frnc.5132), Not Find Code (Accepted by the International Journal of Robust and Nonlinear Control)\n- Exploration-exploitation in constrained mdps, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2003.02189.pdf), Not Find Code (Arxiv, 2020)\n- Safe reinforcement learning using advantage-based intervention, [Paper](http:\u002F\u002Fproceedings.mlr.press\u002Fv139\u002Fwagener21a\u002Fwagener21a.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fnolanwagener\u002Fsafe_rl) (Accepted by ICML 2021)\n- Shortest-path constrained reinforcement learning for sparse reward tasks, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2107.06405.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fsrsohn\u002Fshortest-path-rl), (Accepted by ICML 2021)\n- Density constrained reinforcement learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2106.12764.pdf), Not Find Code (Accepted by ICML 2021)\n- CRPO: A New Approach for Safe Reinforcement Learning with Convergence Guarantee, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2011.05869.pdf), Not Find Code (Accepted by ICML 2021)\n- Safe reinforcement learning with linear function approximation, [Paper](https:\u002F\u002Fproceedings.mlr.press\u002Fv139\u002Famani21a\u002Famani21a.pdf), Not Find Code (Accepted by ICML 2021)\n- Safe Reinforcement Learning by Imagining the Near Future (SMBPO), [Paper](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2021\u002Ffile\u002F73b277c11266681122132d024f53a75b-Paper.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002FSafe-MBPO) (Accepted by NeurIPS 2021) \n- Towards safe reinforcement learning with a safety editor policy, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2201.12427.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fhnyu\u002Fseditor) (Accepted by NeurIPS 2021)\n- Exponential Bellman Equation and Improved Regret Bounds for Risk-Sensitive Reinforcement Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2111.03947.pdf),  Not Find Code (Accepted by NeurIPS 2021)\n- Risk-Sensitive Reinforcement Learning: Symmetry, Asymmetry, and Risk-Sample Tradeoff, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2111.03947.pdf),  Not Find Code (Accepted by NeurIPS 2021)\n- Safe reinforcement learning with natural language constraints, [Paper](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2021\u002Ffile\u002F72f67e70f6b7cdc4cc893edaddf0c4c6-Paper.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fprinceton-nlp\u002FSRL-NLC), (Accepted by NeurIPS 2021)\n- Learning policies with zero or bounded constraint violation for constrained mdps, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2106.02684.pdf),  Not Find Code (Accepted by NeurIPS 2021)\n-  Conservative safety critics for exploration, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2010.14497.pdf), Not Find Code (Accepted by ICLR 2021)\n-  Wcsac: Worst-case soft actor critic for safety-constrained reinforcement learning, [Paper](https:\u002F\u002Fwww.st.ewi.tudelft.nl\u002Fmtjspaan\u002Fpub\u002FYang21aaai.pdf), Not Find Code (Accepted by AAAI 2021)\n-  Risk-averse trust region optimization for reward-volatility reduction, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1912.03193.pdf), Not Find Code (Accepted by IJCAI 2021)\n- AlwaysSafe: Reinforcement Learning Without Safety Constraint Violations During Training, [Paper](https:\u002F\u002Fpure.tudelft.nl\u002Fws\u002Ffiles\u002F96913978\u002Fp1226.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002FAlwaysSafe) (Accepted by AAMAS 2021)\n- Safe Continuous Control with Constrained Model-Based Policy Optimization (CMBPO), [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2104.06922.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fanyboby\u002FConstrained-Model-Based-Policy-Optimization) (Accepted by IROS 2021)\n- Context-aware safe reinforcement learning for non-stationary environments, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2101.00531.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fbaimingc\u002Fcasrl) (Accepted by ICRA 2021)\n- Model-based Constrained Reinforcement Learning using Generalized Control Barrier Function, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fstamp\u002Fstamp.jsp?arnumber=9636468), [Code](https:\u002F\u002Fgithub.com\u002Fmahaitongdae\u002Fsafe_exp_env) (Accepted by IROS 2021)\n- Robot Reinforcement Learning on the Constraint Manifold, [Paper](https:\u002F\u002Fproceedings.mlr.press\u002Fv164\u002Fliu22c\u002Fliu22c.pdf), [Code](https:\u002F\u002Fgithub.com\u002FPuzeLiu\u002Frl_on_manifold) (Accepted by CoRL 2021)\n- Provably efficient safe exploration via primal-dual policy optimization, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2003.00534.pdf), Not Find Code (Accepted by the International Conference on Artificial Intelligence and Statistics 2021)\n- Safe model-based reinforcement learning with robust cross-entropy method, [Paper](https:\u002F\u002Faisecure-workshop.github.io\u002Faml-iclr2021\u002Fpapers\u002F8.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002Fsafe-mbrl) (Accepted by ICLR 2021 Workshop on Security and Safety in Machine Learning Systems)\n- MESA: Offline Meta-RL for Safe Adaptation and Fault Tolerance, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2112.03575.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fmichaelzhiluo\u002Fmesa-safe-rl) (Accepted by Workshop on Safe and Robust Control of Uncertain Systems at NeurIPS 2021)\n- Safe Reinforcement Learning of Control-Affine Systems with Vertex Networks, [Paper](http:\u002F\u002Fproceedings.mlr.press\u002Fv144\u002Fzheng21a\u002Fzheng21a.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline\u002Ftree\u002Fmain\u002FSafe-RL\u002Fvertex-net) (Accepted by Conference on Learning for Dynamics and Control 2021)\n- Can You Trust Your Autonomous Car? Interpretable and Verifiably Safe Reinforcement Learning, [Paper](http:\u002F\u002Fdownload.cmutschler.de\u002Fpublications\u002F2021\u002FIV2021.pdf), Not Find Code (Accepted by IV 2021)\n- Provably safe model-based meta reinforcement learning: An abstraction-based approach, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2109.01255.pdf), Not Find Code (Accepted by CDC 2021)\n- Recovery RL: Safe Reinforcement Learning with Learned Recovery Zones, [Paper](https:\u002F\u002Fwww.researchgate.net\u002Fprofile\u002FMinho-Hwang\u002Fpublication\u002F345152769_Recovery_RL_Safe_Reinforcement_Learning_with_Learned_Recovery_Zones\u002Flinks\u002F5fe37ea2299bf140883a35cb\u002FRecovery-RL-Safe-Reinforcement-Learning-with-Learned-Recovery-Zones.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fabalakrishna123\u002Frecovery-rl), (Accepted by IEEE RAL, 2021)\n- Reinforcement learning control of constrained dynamic systems with uniformly ultimate boundedness stability guarantee, [Paper](https:\u002F\u002Fwww.sciencedirect.com\u002Fscience\u002Farticle\u002Fpii\u002FS0005109821002090), Not Find Code (Accepted by Automatica, 2021)\n- A predictive safety filter for learning-based control of constrained nonlinear dynamical systems, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1812.05506.pdf), Not Find Code (Accepted by Automatica, 2021)\n- A simple reward-free approach to constrained reinforcement learning, [Paper](https:\u002F\u002Fwww.cs.princeton.edu\u002F~syoosefi\u002Fpapers\u002Freward-free2021.pdf),  Not Find Code (Arxiv, 2021)\n- State augmented constrained reinforcement learning: Overcoming the limitations of learning with rewards, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2102.11941.pdf),  Not Find Code (Arxiv, 2021)\n- DESTA: A Framework for Safe Reinforcement Learning with Markov Games of Intervention, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2110.14468.pdf),  Not Find Code (Arxiv, 2021)\n- Safe Exploration in Model-based Reinforcement Learning using Control Barrier Functions, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2104.08171.pdf), Not Find Code (Arxiv, 2021)\n- Constrained Variational Policy Optimization for Safe Reinforcement Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2201.11927.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fliuzuxin\u002Fcvpo-safe-rl) (ICML 2022)\n- Provably efficient model-free constrained rl with linear function approximation, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2206.11889), Not Find Code (NeurIPS 2022)\n- Constrained Policy Optimization via Bayesian World Models, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2201.09802), [Code](https:\u002F\u002Fgithub.com\u002Fyardenas\u002Fla-mbda) (ICLR 2022)\n- Stability-Constrained Markov Decision Processes Using MPC, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2102.01383.pdf), Not Find Code (Accepted by Automatica, 2022)\n- Constrained Reinforcement Learning for Vehicle Motion Planning with Topological Reachability Analysis, [Paper](https:\u002F\u002Fwww.mdpi.com\u002F2218-6581\u002F11\u002F4\u002F81\u002Fpdf), Not Find Code (Accepted by Robotics, 2022)\n- Triple-Q: A Model-Free Algorithm for Constrained Reinforcement Learning with Sublinear Regret and Zero Constraint Violation, [Paper](https:\u002F\u002Fproceedings.mlr.press\u002Fv151\u002Fwei22a\u002Fwei22a.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fhonghaow\u002FTriple-q) (Accepted by AISTATS 2022)\n- Safe reinforcement learning using robust action governor, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2102.10643.pdf), Not Find Code (Accepted by In Learning for Dynamics and Control, 2022)\n- A primal-dual approach to constrained markov decision processes, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2101.10895.pdf),  Not Find Code (Arxiv, 2022)\n- SAUTE RL: Almost Surely Safe Reinforcement Learning Using State Augmentation, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2202.06558.pdf), Not Find Code (Arxiv, 2022)\n- Finding Safe Zones of policies Markov Decision Processes, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2202.11593.pdf), Not Find Code (Arxiv, 2022)\n- CUP: A Conservative Update Policy Algorithm for Safe Reinforcement Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2202.07565.pdf), [Code](https:\u002F\u002Fgithub.com\u002FRL-boxes\u002FSafe-RL) (Arxiv, 2022)\n- SAFER: Data-Efficient and Safe Reinforcement Learning via Skill Acquisition, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2202.04849.pdf), Not Find Code (Arxiv, 2022)\n- Penalized Proximal Policy Optimization for Safe Reinforcement Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2205.11814.pdf), Not Find Code (Arxiv, 2022)\n- Mean-Semivariance Policy Optimization via Risk-Averse Reinforcement Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2206.07376.pdf), Not Find Code (Arxiv, 2022)\n- Convergence and sample complexity of natural policy gradient primal-dual methods for constrained MDPs, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2206.02346.pdf), Not Find Code (Arxiv, 2022)\n- Guided Safe Shooting: model based reinforcement learning with safety constraints, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2206.09743.pdf), Not Find Code (Arxiv, 2022)\n- Safe Reinforcement Learning via Confidence-Based Filters, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2207.01337.pdf), Not Find Code (Arxiv, 2022)\n- TRC: Trust Region Conditional Value at Risk for Safe Reinforcement Learning, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fdocument\u002F9677982), [Code](https:\u002F\u002Fgithub.com\u002Frllab-snu\u002FTrust-Region-CVaR) (Accepted by IEEE RAL, 2022)\n- Efficient Off-Policy Safe Reinforcement Learning Using Trust Region Conditional Value at Risk, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fdocument\u002F9802647), Not Find Code (Accepted by IEEE RAL, 2022)\n- Enhancing Safe Exploration Using Safety State Augmentation, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2206.02675), Not Find Code (Arxiv, 2022)\n- Towards Safe Reinforcement Learning via Constraining Conditional Value-at-Risk, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2206.04436.pdf), Not Find Code (Accepted by IJCAI 2022)\n- Safe reinforcement learning of dynamic high-dimensional robotic tasks: navigation, manipulation, interaction, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2209.13308.pdf), Not Find Code (Arxiv, 2022)\n- Safe Exploration Method for Reinforcement Learning under Existence of Disturbance, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2209.15452.pdf), Not Find Code (Arxiv, 2022)\n- Guiding Safe Exploration with Weakest Preconditions, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2209.14148.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fgavlegoat\u002Fspice) (Arxiv, 2022)\n- Temporal logic guided safe model-based reinforcement learning: A hybrid systems approach, [Paper](https:\u002F\u002Fwww.sciencedirect.com\u002Fscience\u002Farticle\u002Fpii\u002FS1751570X22000905), Not Find Code (Accepted by Nonlinear Analysis: Hybrid Systems, 2022)\n- Provably Safe Reinforcement Learning via Action Projection using Reachability Analysis and Polynomial Zonotopes, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2210.10691.pdf),  Not Find Code (Arxiv, 2022)\n- Model-based Safe Deep Reinforcement Learning via a Constrained Proximal Policy Optimization Algorithm, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2210.07573.pdf),  [Code](https:\u002F\u002Fgithub.com\u002Fakjayant\u002Fmbppol) (Arxiv, 2022)\n- Safe Model-Based Reinforcement Learning with an Uncertainty-Aware Reachability Certificate, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2210.07553.pdf), Not Find Code (Arxiv, 2022)\n- UNIFY: a Unified Policy Designing Framework for Solving Constrained Optimization Problems with Machine Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2210.14030.pdf), Not Find Code (Arxiv, 2022)\n- Enforcing Hard Constraints with Soft Barriers: Safe Reinforcement Learning in Unknown Stochastic Environments, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2209.15090.pdf),  Not Find Code (Arxiv, 2022)\n- Safe Reinforcement Learning Using Robust Control Barrier Functions, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fstamp\u002Fstamp.jsp?tp=&arnumber=9928337), Not Find Code (Accepted by IEEE RAL, 2022)\n- Model-free Neural Lyapunov Control for Safe Robot Navigation, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2203.01190.pdf), [Code](https:\u002F\u002Fgithub.com\u002FZikangXiong\u002FMFNLC), [Demo](https:\u002F\u002Fsites.google.com\u002Fview\u002Fmf-nlc) (Accepted by IROS 2022)\n- Safe Reinforcement Learning via Probabilistic Logic Shields, [Paper](https:\u002F\u002Fwww.ijcai.org\u002Fproceedings\u002F2023\u002F0637.pdf), [Code](https:\u002F\u002Fgithub.com\u002Fwenchiyang\u002Fpls) (Accepted by IJCAI 2023, Distinguished Paper Award)\n- Towards robust and safe reinforcement learning with benign off-policy data, [Paper](https:\u002F\u002Fproceedings.mlr.press\u002Fv202\u002Fliu23l\u002Fliu23l.pdf),  Not Find Code (Accepted by ICML 2023)\n- Enforcing hard constraints with soft barriers: Safe reinforcement learning in unknown stochastic environments, [Paper](https:\u002F\u002Fproceedings.mlr.press\u002Fv202\u002Fwang23as\u002Fwang23as.pdf),  Not Find Code (Accepted by ICML 2023)\n- Safe Exploration Incurs Nearly No Additional Sample Complexity for Reward-free RL, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2206.14057),  Not Find Code (Accepted by ICLR 2023)\n- A CMDP-within-online framework for Meta-Safe Reinforcement Learning, [Paper](https:\u002F\u002Fopenreview.net\u002Fpdf?id=mbxz9Cjehr),  Not Find Code (Accepted by ICLR 2023)\n- Datasets and Benchmarks for Offline Safe Reinforcement Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.09303), [Code](https:\u002F\u002Fgithub.com\u002Fliuzuxin\u002Fosrl), (Accepted by Journal of Data-centric Machine Learning Research)\n- SCPO: Safe Reinforcement Learning with Safety Critic Policy Optimization, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.00880), [Code](https:\u002F\u002Fgithub.com\u002FSafeRL-Lab\u002FSCPO) (Arxiv, 2023)\n- Shielded Reinforcement Learning for Hybrid Systems, [Paper](https:\u002F\u002Flink.springer.com\u002Fchapter\u002F10.1007\u002F978-3-031-46002-9_3) [(Arxiv)](https:\u002F\u002Farxiv.org\u002Fabs\u002F2308.14424), [Code](https:\u002F\u002Fgithub.com\u002FAsgerHB\u002FShielded-Learning-for-Hybrid-Systems) (AISOLA, 2023)\n- Adaptive primal-dual method for safe reinforcement learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.00355), Not Find Code (Accepted by AAMAS 2024)\n- Probabilistic constraint for safety-critical reinforcement learning, [Paper](https:\u002F\u002Fieeexplore.ieee.org\u002Fiel7\u002F9\u002F4601496\u002F10475493.pdf), Not Find Code (Accepted by TAC)\n- Generalized constraint for probabilistic safe reinforcement learning, [Paper](https:\u002F\u002Fproceedings.mlr.press\u002Fv242\u002Fchen24b\u002Fchen24b.pdf), Not Find Code (Accepted by DCC 2024)\n- Log Barriers for Safe Black-box Optimization with Application to Safe Reinforcement Learning, [Paper](https:\u002F\u002Fwww.jmlr.org\u002Fpapers\u002Fvolume25\u002F22-0878\u002F22-0878.pdf), [Code](https:\u002F\u002Fgithub.com\u002FIlnura\u002FLB_SGD) (JMLR, 2024)\n- Provably safe reinforcement learning with step-wise violation constraints, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2302.06064), Not Find Code (Accepted by NeurIPS 2024)\n- Feasibility Consistent Representation Learning for Safe Reinforcement Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fabs\u002F2405.11718), [Code](https:\u002F\u002Fgithub.com\u002Fczp16\u002FFCSRL), (Accepted by ICML 2024)\n- Balance Reward and Safety Optimization for Safe Reinforcement Learning: A Perspective of Gradient Manipulation, [Paper](https:\u002F\u002Fojs.aaai.org\u002Findex.php\u002FAAAI\u002Farticle\u002Fview\u002F30102\u002F31944), Not Find Code (Accepted by AAAI 2024)\n- Safe Reinforcement Learning with Free-form Natural Language Constraints and Pre-Trained Language Models, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.07553v1), Not Find Code (Accepted by AAMAS 2024)\n- Enhancing Efficiency of Safe Reinforcement Learning via Sample Manipulation, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2405.20860), Not Find Code (Arxiv, 2024)\n- Safe and Balanced: A Framework for Constrained Multi-Objective Reinforcement Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2405.16390), Not Find Code (Arxiv, 2024)\n- Confident Natural Policy Gradient for Local Planning in qπ-realizable Constrained MDPs, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2406.18529), Not Find Code (Arxiv, 2024)\n- Safe Exploration Using Bayesian World Models and Log-Barrier Optimization, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2405.05890), [Code](https:\u002F\u002Fanonymous.4open.science\u002Fr\u002Fsafe-opax-F5FF\u002FREADME.md) (Arxiv, 2024)\n- Safe and Balanced: A Framework for Constrained Multi-Objective Reinforcement Learning, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2405.16390), [Code](https:\u002F\u002Fgithub.com\u002FSafeRL-Lab\u002FCMORL) (Accepted by IEEE TPAMI 2025)\n- Reward-Safety Balance in Offline Safe RL via Diffusion Regularization, [Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2502.12391?), Not Find Code (Accepted by NeurIPS 2025)\n\n#### 2.2. 安全多智能体强化学习基线\n- 多智能体约束策略优化（MACPO），[论文](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2110.02793.pdf)，[代码](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FMulti-Agent-Constrained-Policy-Optimisation)（Arxiv，2021年）\n- MAPPO-Lagrangian，[论文](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2110.02793.pdf)，[代码](https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FMulti-Agent-Constrained-Policy-Optimisation)（Arxiv，2021年）\n- 用于安全多智能体强化学习的去中心化策略梯度下降上升法，[论文](https:\u002F\u002Fchentianyi1991.github.io\u002Faaai.pdf)，未找到代码（被AAAI 2021接收）\n- 基于屏蔽的安全多智能体强化学习，[论文](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2101.11196.pdf)，未找到代码（被AAMAS 2021接收）\n- CMIX：具有峰值和平均约束的深度多智能体强化学习，[论文](https:\u002F\u002F2021.ecmlpkdd.org\u002Fwp-content\u002Fuploads\u002F2021\u002F07\u002Fsub_181.pdf)，未找到代码（被联合欧洲机器学习与数据库知识发现会议2021接收）\n- 通过去中心化多重控制屏障函数实现的安全多智能体强化学习，[论文](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2103.12553.pdf)，未找到代码（Arxiv 2021）\n- CAMA：一种使用约束增强的安全多智能体强化学习新框架，[论文](https:\u002F\u002Fopenreview.net\u002Fpdf?id=jK02XX9ZpJkt)，未找到代码（Openreview 2022）\n- 用于安全多智能体强化学习的屏蔽去中心化，[论文](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper_files\u002Fpaper\u002F2022\u002Ffile\u002F57444e14ecd9e2c8f603b4f012ce3811-Paper-Conference.pdf)，未找到代码（NeurIPS 2022）\n- 使用分布式上图形式多智能体强化学习求解多智能体安全最优控制，[论文](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2504.15425)，[代码](https:\u002F\u002Fgithub.com\u002FMIT-REALM\u002Fdef-marl\u002F)（RSS 2025）\n\n\n\n\n### 3. 综述\n- 安全强化学习综述：方法、理论与应用，[论文](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2205.10330.pdf)（IEEE TPAMI，2024年）\n- 基于状态的安全强化学习：综述，[论文](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2302.03122.pdf)（被IJCAI 2023接收）\n- 无模型强化学习中的带约束策略学习：综述，[论文](https:\u002F\u002Fweb.archive.org\u002Fweb\u002F20210812230501id_\u002Fhttps:\u002F\u002Fwww.ijcai.org\u002Fproceedings\u002F2021\u002F0614.pdf)（被IJCAI 2021接收）\n- 机器人领域的安全学习：从基于学习的控制到安全强化学习，[论文](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2108.06266.pdf)（被年度控制、机器人与自主系统评论接收，2021年）\n- 安全学习与优化技术：迈向现状综述，[论文](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2101.09505.pdf)（被国际可信人工智能基础研讨会——整合学习、优化与推理——接收，2020年）\n- 安全强化学习综合综述，[论文](https:\u002F\u002Fwww.jmlr.org\u002Fpapers\u002Fvolume16\u002Fgarcia15a\u002Fgarcia15a.pdf)（被机器学习研究期刊接收，2015年）\n\n\n\n\n### 4. 学位论文\n- 用于机器人决策的安全强化学习，[论文](https:\u002F\u002Fpeople.eecs.berkeley.edu\u002F~shangding.gu\u002Fpapers\u002FPhD_Dissertation_Shangding_Gu_2024.pdf)（Shangding Gu，慕尼黑工业大学博士论文，2024年）\n- 强化学习中的安全探索：理论与机器人应用，[论文](https:\u002F\u002Fwww.research-collection.ethz.ch\u002Fbitstream\u002Fhandle\u002F20.500.11850\u002F370833\u002F1\u002Froot.pdf)（Felix Berkenkamp，苏黎世联邦理工学院博士论文，2019年）\n- 安全强化学习，[论文](https:\u002F\u002Fscholarworks.umass.edu\u002Fcgi\u002Fviewcontent.cgi?article=1527&context=dissertations_2)（Philip S. Thomas，马萨诸塞大学阿默斯特分校博士论文，2015年）\n\n\n\n\n### 5. 书籍\n- 约束马尔可夫决策过程：随机建模，[书籍](https:\u002F\u002Fwww-sop.inria.fr\u002Fmembers\u002FEitan.Altman\u002FPAPERS\u002Fh.pdf)（Eitan Altman，Routledge，1999年）\n\n### 6. 教程\n- 安全强化学习：连接理论与实践，[教程](https:\u002F\u002Fdocs.google.com\u002Fpresentation\u002Fd\u002F1slZyKj1G_XvtH8laWMClcQVMLbiQyqKW25cV9gY3ypE\u002Fedit?usp=sharing)（Ming Jin & Shangding Gu，2024年）\n- 用于智能电网控制与运营的安全强化学习，[教程](https:\u002F\u002Fdocs.google.com\u002Fpresentation\u002Fd\u002F1o3t3KMfgCL5fo_zHZH2ChMkJTkbJ7sY7lMomBE8iRNE\u002Fedit?usp=sharing)（Ming Jin & Shangding Gu，2024年）\n- 安全强化学习，[教程](https:\u002F\u002Fdrive.google.com\u002Ffile\u002Fd\u002F1Hpu9HZbXkurTMWvj63m-aLYxay66E2Vz\u002Fview)（Felix Berkenkamp，2023年）\n- 原始-对偶方法，[教程](https:\u002F\u002Fdrive.google.com\u002Ffile\u002Fd\u002F1_NRil0__6375nIqMT6jXw-PB6CkwvvDH\u002Fview)（Gergely Neu，2023年）\n\n### 7. 练习\n- 原始-对偶强化学习，[练习代码](https:\u002F\u002Fgithub.com\u002Ftyrion\u002Fprimal-dual-exercise)和[练习Colab](https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Ftyrion\u002Fprimal-dual-exercise\u002Fblob\u002Fmaster\u002FPrimal_Dual_Colab.ipynb)（Germano Gabbianelli，2023年）\n\n\n## 出版物\n如果您觉得本仓库有用，请引用以下[论文](https:\u002F\u002Farxiv.org\u002Fabs\u002F2205.10330)：\n```\n@article{gu2024review,\n  title={A Review of Safe Reinforcement Learning: Methods, Theories and Applications},\n  author={Gu, Shangding and Yang, Long and Du, Yali and Chen, Guang and Walter, Florian and Wang, Jun and Knoll, Alois},\n  journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},\n  year={2024},\n  publisher={IEEE}\n}\n```","# Safe-Reinforcement-Learning-Baselines 快速上手指南\n\n本指南旨在帮助开发者快速了解并使用 **Safe-Reinforcement-Learning-Baselines** 项目。该项目是一个专注于安全强化学习（Safe RL）的研究资源库，汇集了单智能体与多智能体安全 RL 的基准环境、经典算法论文及代码实现。\n\n> **注意**：本项目主要作为一个**文献与代码索引库**存在，部分早期论文可能未提供官方代码。使用时请根据具体算法条目查看其对应的代码链接。\n\n## 1. 环境准备\n\n在开始之前，请确保您的开发环境满足以下基本要求：\n\n*   **操作系统**：Linux (推荐 Ubuntu 18.04\u002F20.04) 或 macOS。Windows 用户建议使用 WSL2。\n*   **Python 版本**：建议 Python 3.7 - 3.9（具体取决于子项目中使用的深度学习框架版本）。\n*   **前置依赖**：\n    *   Git\n    *   PyTorch 或 TensorFlow (视具体算法实现而定，大多数现代实现基于 PyTorch)\n    *   MuJoCo (部分物理仿真环境需要授权和安装)\n    *   Gym \u002F Gymnasium\n\n**国内加速建议**：\n*   **Git 克隆**：如果直接克隆 GitHub 仓库速度慢，可使用国内镜像源（如 Gitee 镜像，若有）或配置代理。\n*   **Python 包安装**：推荐使用清华源或阿里源安装依赖。\n    ```bash\n    pip config set global.index-url https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n    ```\n\n## 2. 安装步骤\n\n由于该仓库是多个独立算法和环境的集合，没有统一的 `setup.py` 来安装所有功能。请按照以下步骤获取资源并配置特定算法：\n\n### 第一步：克隆仓库\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Fchauncygu\u002FSafe-Reinforcement-Learning-Baseline.git\ncd Safe-Reinforcement-Learning-Baseline\n```\n\n### 第二步：选择并配置具体算法\n浏览仓库目录结构（通常在 `Safe-RL\u002F` 下），找到您需要的算法文件夹（例如 `safety-starter-agents` 对应 CPO 算法，`safe_learning` 对应基于高斯过程的方法等）。\n\n以 **Constrained Policy Optimization (CPO)** 为例，进入对应目录并安装依赖：\n\n```bash\n# 进入具体算法目录 (示例路径，请以实际目录结构为准)\ncd Safe-RL\u002Fsafety-starter-agents\n\n# 创建虚拟环境 (推荐)\npython -m venv venv\nsource venv\u002Fbin\u002Factivate  # Windows 使用: venv\\Scripts\\activate\n\n# 安装该算法特定的依赖\npip install -r requirements.txt\n```\n\n> **提示**：如果某个条目标记为 \"Not Find Code\"，则表示该仓库仅提供了论文链接，您需要自行寻找第三方实现或复现。\n\n### 第三步：安装仿真环境\n根据您想运行的基准测试（Benchmark），安装相应的环境包。\n\n*   **安全单智能体环境**：\n    ```bash\n    # 示例：安装 Safety-Gymnasium (PKU-Alignment 维护版，对国内更友好)\n    pip install safety-gymnasium\n    ```\n*   **安全多智能体环境**：\n    需参考 `Safe Multi-Agent Mujoco` 等子项目的具体 README 进行安装，通常涉及 `multi-agent-mujoco` 等包。\n\n## 3. 基本使用\n\n以下以运行一个典型的安全强化学习算法（如 CPO）在安全环境中训练为例。\n\n### 最简单的使用示例\n\n假设您已进入包含 CPO 代码的目录，并且已安装 `safety-gymnasium`。\n\n1.  **导入环境与算法**（Python 脚本示例）：\n\n    ```python\n    import gymnasium as gym\n    import safety_gymnasium\n    from your_algo_module import CPOAgent  # 替换为实际的算法导入路径\n\n    # 初始化安全环境 (例如：Safety-Gymnasium 中的 PointGoal1-v0)\n    env = gym.make(\"SafetyPointGoal1-v0\")\n\n    # 初始化智能体\n    agent = CPOAgent(env.observation_space, env.action_space)\n\n    # 训练循环示例\n    for episode in range(100):\n        obs, _ = env.reset()\n        done = False\n        total_reward = 0\n        total_cost = 0\n        \n        while not done:\n            # 获取动作\n            action = agent.select_action(obs)\n            \n            # 执行动作\n            next_obs, reward, cost, terminated, truncated, info = env.step(action)\n            done = terminated or truncated\n            \n            # 存储经验并更新策略 (具体方法名视实现而定)\n            agent.store_transition(obs, action, reward, cost, next_obs, done)\n            agent.update()\n            \n            obs = next_obs\n            total_reward += reward\n            total_cost += cost\n            \n        print(f\"Episode {episode}: Reward: {total_reward:.2f}, Cost: {total_cost:.2f}\")\n\n    env.close()\n    ```\n\n2.  **运行现有脚本**：\n    大多数子项目都提供了直接的训练脚本。在项目目录下查找类似 `run.py` 或 `main.py` 的文件：\n\n    ```bash\n    # 示例命令，具体参数请参考各子项目的 README\n    python run.py --algo CPO --env SafetyPointGoal1-v0 --epochs 50\n    ```\n\n### 核心资源索引\n在使用过程中，您可以参考仓库 README 中的分类快速定位资源：\n*   **基准环境 (Environments)**: 查看 `AI Safety Gridworlds`, `Safety-Gymnasium`, `Safe Multi-Agent Mujoco` 等。\n*   **基线算法 (Baselines)**: 涵盖从早期的风险敏感 RL 到最新的 CPO, PPO-Lagrangian, TRPO-Lagrangian 等算法实现。\n*   **综述与教程**: 适合入门安全强化学习理论。\n\n---\n*注：本仓库处于活跃开发中，如遇问题或发现新的安全 RL 论文未被收录，欢迎通过 Issue 或邮件联系作者贡献。*","某自动驾驶初创公司的算法团队正在开发城市道路自动泊车系统，需要在保证绝对不碰撞障碍物（安全约束）的前提下，最大化泊车效率。\n\n### 没有 Safe-Reinforcement-Learning-Baselines 时\n- **基准缺失导致重复造轮子**：团队需从零复现经典的约束马尔可夫决策过程（CMDP）算法，耗费数周时间调试基础代码，且难以确保实现与论文理论一致。\n- **缺乏统一评测环境**：内部自建仿真场景单一，无法在 Safety-Gym 或 Safe Multi-Agent Mujoco 等标准基准上验证算法泛化性，导致模型在真实路况下表现不可控。\n- **安全风险难以量化**：由于缺少成熟的风险敏感型强化学习基线对比，团队无法准确评估当前策略在极端情况下的违规概率，上线测试时频繁发生模拟碰撞。\n- **多车协同研发受阻**：面对停车场多车同时调度的复杂场景，缺乏可靠的多智能体安全 RL 参考架构，协同避障逻辑开发陷入瓶颈。\n\n### 使用 Safe-Reinforcement-Learning-Baselines 后\n- **快速集成成熟基线**：直接调用仓库中已整理的单智能体及多智能体安全 RL 基线代码（如 Lyapunov 设计或 Actor-Critic 约束算法），将算法验证周期从数周缩短至几天。\n- **标准化基准测试**：无缝接入支持的 Safety-Gymnasium 和 Safe Multi-Agent Isaac Gym 环境，在行业标准下客观评估模型性能，显著提升了算法的可信度。\n- **明确的安全边界优化**：通过对比不同风险约束下的基线表现，团队迅速定位并优化了策略中的高风险行为，使模拟测试中的碰撞率降低了 90% 以上。\n- **高效解决多车协同**：利用仓库提供的多智能体安全基准方案，快速构建了车辆间的协同避障机制，成功实现了高密度停车场内的无冲突调度。\n\nSafe-Reinforcement-Learning-Baselines 通过提供标准化的安全算法基线与评测环境，帮助研发团队大幅降低了试错成本，确保了 AI 系统在严苛安全约束下的可靠落地。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fchauncygu_Safe-Reinforcement-Learning-Baselines_df461e74.png","chauncygu","Shangding Gu","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fchauncygu_3161757b.jpg","Pursuing the essence of intelligence and bringing it into the real world.",null,"Berkeley, USA","https:\u002F\u002Fpeople.eecs.berkeley.edu\u002F~shangding.gu\u002F","https:\u002F\u002Fgithub.com\u002Fchauncygu",[86,90,94,98,102,106,110,114,117,121],{"name":87,"color":88,"percentage":89},"Jupyter Notebook","#DA5B0B",35.9,{"name":91,"color":92,"percentage":93},"C","#555555",35.8,{"name":95,"color":96,"percentage":97},"Julia","#a270ba",15.1,{"name":99,"color":100,"percentage":101},"Python","#3572A5",12.4,{"name":103,"color":104,"percentage":105},"TeX","#3D6117",0.5,{"name":107,"color":108,"percentage":109},"q","#0040cd",0.2,{"name":111,"color":112,"percentage":113},"Shell","#89e051",0.1,{"name":115,"color":116,"percentage":113},"MATLAB","#e16737",{"name":118,"color":119,"percentage":120},"Makefile","#427819",0,{"name":122,"color":123,"percentage":120},"Batchfile","#C1F12E",787,100,"2026-04-13T06:22:59",4,"","未说明",{"notes":131,"python":129,"dependencies":132},"该仓库主要是一个安全强化学习（Safe RL）的论文和基准测试列表，而非一个单一的独立软件包。文中列出的环境（如 Safety-Gym, Mujoco, Isaac Gym）和算法基线（如 CPO, PPO-Lagrangian）各自拥有独立的代码库和运行环境需求。用户需根据具体想要复现的论文或运行的环境，前往其对应的子项目链接查看具体的系统配置、Python 版本及依赖库要求。",[],[18],[135,136,137,138,139,140],"reinforcement-learning","baseline","robotics","safe-reinforcement-learning","safe-robot-learning","safety","2026-03-27T02:49:30.150509","2026-04-16T03:31:45.665797",[],[]]