[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-yzhao062--pyod":3,"tool-yzhao062--pyod":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",150037,2,"2026-04-10T23:33:47",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",108322,"2026-04-10T11:39:34",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[52,13,15,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":32,"last_commit_at":59,"category_tags":60,"status":17},4721,"markitdown","microsoft\u002Fmarkitdown","MarkItDown 是一款由微软 AutoGen 团队打造的轻量级 Python 工具，专为将各类文件高效转换为 Markdown 格式而设计。它支持 PDF、Word、Excel、PPT、图片（含 OCR）、音频（含语音转录）、HTML 乃至 YouTube 链接等多种格式的解析，能够精准提取文档中的标题、列表、表格和链接等关键结构信息。\n\n在人工智能应用日益普及的今天，大语言模型（LLM）虽擅长处理文本，却难以直接读取复杂的二进制办公文档。MarkItDown 恰好解决了这一痛点，它将非结构化或半结构化的文件转化为模型“原生理解”且 Token 效率极高的 Markdown 格式，成为连接本地文件与 AI 分析 pipeline 的理想桥梁。此外，它还提供了 MCP（模型上下文协议）服务器，可无缝集成到 Claude Desktop 等 LLM 应用中。\n\n这款工具特别适合开发者、数据科学家及 AI 研究人员使用，尤其是那些需要构建文档检索增强生成（RAG）系统、进行批量文本分析或希望让 AI 助手直接“阅读”本地文件的用户。虽然生成的内容也具备一定可读性，但其核心优势在于为机器",93400,"2026-04-06T19:52:38",[52,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":72,"owner_name":73,"owner_avatar_url":74,"owner_bio":75,"owner_company":76,"owner_location":77,"owner_email":68,"owner_twitter":68,"owner_website":78,"owner_url":79,"languages":80,"stars":89,"forks":90,"last_commit_at":91,"license":92,"difficulty_score":93,"env_os":94,"env_gpu":95,"env_ram":94,"env_deps":96,"category_tags":110,"github_topics":112,"view_count":32,"oss_zip_url":68,"oss_zip_packed_at":68,"status":132,"created_at":133,"updated_at":134,"faqs":135,"releases":166},6499,"yzhao062\u002Fpyod","pyod","A Python Library for Outlier and Anomaly Detection on Tabular, Text, and Image Data","PyOD 是一款功能强大且易于上手的 Python 开源库，专门用于在表格、文本及图像数据中识别异常值与离群点。它有效解决了用户在处理多维数据时，难以从海量信息中快速定位罕见模式、欺诈行为或系统故障等关键问题的挑战。\n\n无论是从事数据科学开发的工程师、进行算法研究的研究人员，还是希望提升数据质量的分析专家，都能通过 PyOD 高效完成异常检测任务。该库自 2017 年发布以来已发展成为行业标杆，其最新的 V2 版本更是带来了显著的技术突破：不仅将支持的检测算法扩展至 45 种，还深度集成了 12 种基于 PyTorch 的现代深度学习模型。\n\nPyOD 的独特亮点在于引入了大语言模型（LLM）辅助的自动模型选择功能，大幅降低了人工调参门槛，让经验有限的用户也能轻松上手。此外，通过 EmbeddingOD 模块，它能灵活结合基础模型编码器，实现对非结构化文本和图像的多模态异常检测。凭借丰富的算法库、优化的性能表现以及完善的文档支持，PyOD 是构建可靠异常检测系统的理想选择。","Python Outlier Detection (PyOD) V2\r\n==================================\r\n\r\n**Deployment & Documentation & Stats & License**\r\n\r\n|badge_pypi| |badge_anaconda| |badge_docs| |badge_stars| |badge_forks| |badge_downloads| |badge_testing| |badge_coverage| |badge_maintainability| |badge_license| |badge_benchmark|\r\n\r\n.. |badge_pypi| image:: https:\u002F\u002Fimg.shields.io\u002Fpypi\u002Fv\u002Fpyod.svg?color=brightgreen\r\n   :target: https:\u002F\u002Fpypi.org\u002Fproject\u002Fpyod\u002F\r\n   :alt: PyPI version\r\n\r\n.. |badge_anaconda| image:: https:\u002F\u002Fanaconda.org\u002Fconda-forge\u002Fpyod\u002Fbadges\u002Fversion.svg\r\n   :target: https:\u002F\u002Fanaconda.org\u002Fconda-forge\u002Fpyod\r\n   :alt: Anaconda version\r\n\r\n.. |badge_docs| image:: https:\u002F\u002Freadthedocs.org\u002Fprojects\u002Fpyod\u002Fbadge\u002F?version=latest\r\n   :target: https:\u002F\u002Fpyod.readthedocs.io\u002Fen\u002Flatest\u002F?badge=latest\r\n   :alt: Documentation status\r\n\r\n.. |badge_stars| image:: https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fyzhao062\u002Fpyod.svg\r\n   :target: https:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod\u002Fstargazers\r\n   :alt: GitHub stars\r\n\r\n.. |badge_forks| image:: https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fforks\u002Fyzhao062\u002Fpyod.svg?color=blue\r\n   :target: https:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod\u002Fnetwork\r\n   :alt: GitHub forks\r\n\r\n.. |badge_downloads| image:: https:\u002F\u002Fpepy.tech\u002Fbadge\u002Fpyod\r\n   :target: https:\u002F\u002Fpepy.tech\u002Fproject\u002Fpyod\r\n   :alt: Downloads\r\n\r\n.. |badge_testing| image:: https:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod\u002Factions\u002Fworkflows\u002Ftesting.yml\u002Fbadge.svg\r\n   :target: https:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod\u002Factions\u002Fworkflows\u002Ftesting.yml\r\n   :alt: Testing\r\n\r\n\r\n.. |badge_coverage| image:: https:\u002F\u002Fcoveralls.io\u002Frepos\u002Fgithub\u002Fyzhao062\u002Fpyod\u002Fbadge.svg\r\n   :target: https:\u002F\u002Fcoveralls.io\u002Fgithub\u002Fyzhao062\u002Fpyod\r\n   :alt: Coverage Status\r\n\r\n.. |badge_maintainability| image:: https:\u002F\u002Fapi.codeclimate.com\u002Fv1\u002Fbadges\u002Fbdc3d8d0454274c753c4\u002Fmaintainability\r\n   :target: https:\u002F\u002Fcodeclimate.com\u002Fgithub\u002Fyzhao062\u002FPyod\u002Fmaintainability\r\n   :alt: Maintainability\r\n\r\n.. |badge_license| image:: https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Flicense\u002Fyzhao062\u002Fpyod.svg\r\n   :target: https:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod\u002Fblob\u002Fmaster\u002FLICENSE\r\n   :alt: License\r\n\r\n.. |badge_benchmark| image:: https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FADBench-benchmark_results-pink\r\n   :target: https:\u002F\u002Fgithub.com\u002FMinqi824\u002FADBench\r\n   :alt: Benchmark\r\n\r\n\r\n-----\r\n\r\n\r\nRead Me First\r\n^^^^^^^^^^^^^\r\n\r\nWelcome to PyOD, a well-developed and easy-to-use Python library for detecting anomalies in multivariate data. Whether you are working with a small-scale project or large datasets, PyOD provides a range of algorithms to fit your needs.\r\n\r\n**PyOD Version 2 is now available** (`Paper \u003Chttps:\u002F\u002Fwww.arxiv.org\u002Fabs\u002F2412.12154>`_) [#Chen2024PyOD]_, featuring:\r\n\r\n* **Expanded Deep Learning Support**: Integrates 12 modern neural models into a single PyTorch-based framework, bringing the total number of outlier detection methods to 45.\r\n* **Enhanced Performance and Ease of Use**: Models are optimized for efficiency and consistent performance across different datasets.\r\n* **LLM-based Model Selection**: Automated model selection guided by a large language model reduces manual tuning and assists users who may have limited experience with outlier detection.\r\n* **Multi-Modal Detection via EmbeddingOD**: Chain foundation model encoders (sentence-transformers, OpenAI, HuggingFace) with any PyOD detector for text and image anomaly detection. See `EmbeddingOD example \u003Chttps:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod\u002Fblob\u002Fmaster\u002Fexamples\u002Fembedding_od_example.py>`_.\r\n\r\n**PyOD Ecosystem & Resources**:\r\n`NLP-ADBench \u003Chttps:\u002F\u002Fgithub.com\u002FUSC-FORTIS\u002FNLP-ADBench>`_ (NLP anomaly detection) | `TODS \u003Chttps:\u002F\u002Fgithub.com\u002Fdatamllab\u002Ftods>`_ (time-series) | `PyGOD \u003Chttps:\u002F\u002Fpygod.org\u002F>`_ (graph) | `ADBench \u003Chttps:\u002F\u002Fgithub.com\u002FMinqi824\u002FADBench>`_ (benchmark) | `AD-LLM \u003Chttps:\u002F\u002Farxiv.org\u002Fabs\u002F2412.11142>`_ (LLM-based AD) [#Yang2024ad]_ | `Resources \u003Chttps:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fanomaly-detection-resources>`_\r\n\r\n----\r\n\r\n\r\nAbout PyOD\r\n^^^^^^^^^^\r\n\r\nPyOD, established in 2017, has become a go-to **Python library** for **detecting anomalous\u002Foutlying objects** in multivariate data. This exciting yet challenging field is commonly referred to as `Outlier Detection \u003Chttps:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FAnomaly_detection>`_ or `Anomaly Detection \u003Chttps:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FAnomaly_detection>`_.\r\n\r\nPyOD includes more than 50 detection algorithms, from classical LOF (SIGMOD 2000) to the cutting-edge ECOD and DIF (TKDE 2022 and 2023). Since 2017, PyOD has been successfully used in numerous academic research projects and commercial products with more than `26 million downloads \u003Chttps:\u002F\u002Fpepy.tech\u002Fproject\u002Fpyod>`_. It is also well acknowledged by the machine learning community with various dedicated posts\u002Ftutorials, including `Analytics Vidhya \u003Chttps:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2019\u002F02\u002Foutlier-detection-python-pyod\u002F>`_, `KDnuggets \u003Chttps:\u002F\u002Fwww.kdnuggets.com\u002F2019\u002F02\u002Foutlier-detection-methods-cheat-sheet.html>`_, and `Towards Data Science \u003Chttps:\u002F\u002Ftowardsdatascience.com\u002Fanomaly-detection-for-dummies-15f148e559c1>`_.\r\n\r\n**PyOD is featured for**:\r\n\r\n* **Unified, User-Friendly Interface** across various algorithms.\r\n* **Wide Range of Models**, from classic techniques to the latest deep learning methods in **PyTorch**.\r\n* **High Performance & Efficiency**, leveraging `numba \u003Chttps:\u002F\u002Fgithub.com\u002Fnumba\u002Fnumba>`_ and `joblib \u003Chttps:\u002F\u002Fgithub.com\u002Fjoblib\u002Fjoblib>`_ for JIT compilation and parallel processing.\r\n* **Fast Training & Prediction**, achieved through the SUOD framework [#Zhao2021SUOD]_.\r\n\r\n**Outlier Detection with 5 Lines of Code**:\r\n\r\n.. code-block:: python\r\n\r\n    # Example: Training an ECOD detector\r\n    from pyod.models.ecod import ECOD\r\n    clf = ECOD()\r\n    clf.fit(X_train)\r\n    y_train_scores = clf.decision_scores_  # Outlier scores for training data\r\n    y_test_scores = clf.decision_function(X_test)  # Outlier scores for test data\r\n\r\n\r\n**Text Anomaly Detection with EmbeddingOD** (``pip install pyod sentence-transformers``):\r\n\r\n.. code-block:: python\r\n\r\n    from pyod.models.embedding import EmbeddingOD\r\n    clf = EmbeddingOD(encoder='all-MiniLM-L6-v2', detector='KNN')\r\n    clf.fit(train_texts)                          # list of strings\r\n    scores = clf.decision_function(test_texts)    # anomaly scores\r\n    labels = clf.predict(test_texts)              # binary labels\r\n\r\n    # Or use a preset:\r\n    clf = EmbeddingOD.for_text(quality='fast')    # MiniLM + KNN\r\n\r\nImage detection requires additional packages (``pip install transformers torch``). See `EmbeddingOD example \u003Chttps:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod\u002Fblob\u002Fmaster\u002Fexamples\u002Fembedding_od_example.py>`_ for details.\r\n\r\n\r\n**Selecting the Right Algorithm:** Start with `ECOD \u003Chttps:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod\u002Fblob\u002Fmaster\u002Fexamples\u002Fecod_example.py>`_ or `Isolation Forest \u003Chttps:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod\u002Fblob\u002Fmaster\u002Fexamples\u002Fiforest_example.py>`_ for tabular data, `EmbeddingOD \u003Chttps:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod\u002Fblob\u002Fmaster\u002Fexamples\u002Fembedding_od_example.py>`_ for text\u002Fimage, or `MetaOD \u003Chttps:\u002F\u002Fgithub.com\u002Fyzhao062\u002FMetaOD>`_ for data-driven selection.\r\n\r\n**Citing PyOD**:\r\n\r\nIf you use PyOD in a scientific publication, we would appreciate citations to the following paper(s):\r\n\r\n`PyOD 2: A Python Library for Outlier Detection with LLM-powered Model Selection \u003Chttps:\u002F\u002Farxiv.org\u002Fabs\u002F2412.12154>`_ is available as a preprint. If you use PyOD in a scientific publication, we would appreciate citations to the following paper::\r\n\r\n    @inproceedings{chen2025pyod,\r\n      title={Pyod 2: A python library for outlier detection with llm-powered model selection},\r\n      author={Chen, Sihan and Qian, Zhuangzhuang and Siu, Wingchun and Hu, Xingcan and Li, Jiaqi and Li, Shawn and Qin, Yuehan and Yang, Tiankai and Xiao, Zhuo and Ye, Wanghao and others},\r\n      booktitle={Companion Proceedings of the ACM on Web Conference 2025},\r\n      pages={2807--2810},\r\n      year={2025}\r\n    }\r\n\r\n\r\n`PyOD paper \u003Chttp:\u002F\u002Fwww.jmlr.org\u002Fpapers\u002Fvolume20\u002F19-011\u002F19-011.pdf>`_ is published in `Journal of Machine Learning Research (JMLR) \u003Chttp:\u002F\u002Fwww.jmlr.org\u002F>`_ (MLOSS track).::\r\n\r\n    @article{zhao2019pyod,\r\n        author  = {Zhao, Yue and Nasrullah, Zain and Li, Zheng},\r\n        title   = {PyOD: A Python Toolbox for Scalable Outlier Detection},\r\n        journal = {Journal of Machine Learning Research},\r\n        year    = {2019},\r\n        volume  = {20},\r\n        number  = {96},\r\n        pages   = {1-7},\r\n        url     = {http:\u002F\u002Fjmlr.org\u002Fpapers\u002Fv20\u002F19-011.html}\r\n    }\r\n\r\nor::\r\n\r\n    Zhao, Y., Nasrullah, Z. and Li, Z., 2019. PyOD: A Python Toolbox for Scalable Outlier Detection. Journal of machine learning research (JMLR), 20(96), pp.1-7.\r\n\r\n\r\nFor a broader perspective on anomaly detection, see our NeurIPS papers on `ADBench \u003Chttps:\u002F\u002Farxiv.org\u002Fabs\u002F2206.09426>`_ [#Han2022ADBench]_ and `ADGym \u003Chttps:\u002F\u002Farxiv.org\u002Fabs\u002F2309.15376>`_.\r\n\r\n\r\n**Table of Contents**:\r\n\r\n* `Installation \u003C#installation>`_\r\n* `API Cheatsheet & Reference \u003C#api-cheatsheet--reference>`_\r\n* `ADBench Benchmark and Datasets \u003C#adbench-benchmark-and-datasets>`_\r\n* `Additional Topics \u003C#additional-topics>`_ (Model Save\u002FLoad, SUOD, Thresholding)\r\n* `Implemented Algorithms \u003C#implemented-algorithms>`_\r\n* `Quick Start for Outlier Detection \u003C#quick-start-for-outlier-detection>`_\r\n* `How to Contribute \u003C#how-to-contribute>`_\r\n* `Inclusion Criteria \u003C#inclusion-criteria>`_\r\n\r\n----\r\n\r\nInstallation\r\n^^^^^^^^^^^^\r\n\r\nPyOD is designed for easy installation using either **pip** or **conda**. We recommend using the latest version of PyOD due to frequent updates and enhancements:\r\n\r\n.. code-block:: bash\r\n\r\n   pip install pyod            # normal install\r\n   pip install --upgrade pyod  # or update if needed\r\n\r\n.. code-block:: bash\r\n\r\n   conda install -c conda-forge pyod\r\n\r\nAlternatively, you can clone and run the setup.py file:\r\n\r\n.. code-block:: bash\r\n\r\n   git clone https:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod.git\r\n   cd pyod\r\n   pip install .\r\n\r\n**Required Dependencies**:\r\n\r\n* Python 3.8 or higher\r\n* joblib\r\n* matplotlib\r\n* numpy>=1.19\r\n* numba>=0.51\r\n* scipy>=1.5.1\r\n* scikit_learn>=0.22.0\r\n\r\n**Optional Dependencies (see details below)**:\r\n\r\n* combo (optional, required for models\u002Fcombination.py and FeatureBagging)\r\n* pytorch (optional, required for AutoEncoder, and other deep learning models)\r\n* suod (optional, required for running SUOD model)\r\n* xgboost (optional, required for XGBOD)\r\n* pythresh (optional, required for thresholding)\r\n* sentence-transformers (optional, required for EmbeddingOD text detection)\r\n* openai (optional, required for EmbeddingOD with OpenAI embeddings)\r\n* transformers and torch (optional, required for EmbeddingOD image detection and HuggingFace encoder)\r\n\r\n----\r\n\r\n\r\nAPI Cheatsheet & Reference\r\n^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n\r\nThe full API Reference is available at `PyOD Documentation \u003Chttps:\u002F\u002Fpyod.readthedocs.io\u002Fen\u002Flatest\u002Fpyod.html>`_. Below is a quick cheatsheet for all detectors:\r\n\r\n* **fit(X)**: Fit the detector. The parameter y is ignored in unsupervised methods.\r\n* **decision_function(X)**: Predict raw anomaly scores for X using the fitted detector.\r\n* **predict(X)**: Determine whether a sample is an outlier or not as binary labels using the fitted detector.\r\n* **predict_proba(X)**: Estimate the probability of a sample being an outlier using the fitted detector.\r\n* **predict_confidence(X)**: Assess the model's confidence on a per-sample basis (applicable in predict and predict_proba) [#Perini2020Quantifying]_.\r\n* **predict_with_rejection(X)**\\ : Allow the detector to reject (i.e., abstain from making) highly uncertain predictions (output = -2) [#Perini2023Rejection]_.\r\n\r\n**Key Attributes of a fitted model**:\r\n\r\n* **decision_scores_**: Outlier scores of the training data. Higher scores typically indicate more abnormal behavior. Outliers usually have higher scores.\r\n* **labels_**: Binary labels of the training data, where 0 indicates inliers and 1 indicates outliers\u002Fanomalies.\r\n\r\n\r\n----\r\n\r\n\r\nADBench Benchmark and Datasets\r\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n\r\nWe just released a 45-page, the most comprehensive `ADBench: Anomaly Detection Benchmark \u003Chttps:\u002F\u002Farxiv.org\u002Fabs\u002F2206.09426>`_ [#Han2022ADBench]_.\r\nThe fully `open-sourced ADBench \u003Chttps:\u002F\u002Fgithub.com\u002FMinqi824\u002FADBench>`_ compares 30 anomaly detection algorithms on 57 benchmark datasets.\r\n\r\nThe organization of **ADBench** is provided below:\r\n\r\n.. image:: https:\u002F\u002Fgithub.com\u002FMinqi824\u002FADBench\u002Fblob\u002Fmain\u002Ffigs\u002FADBench.png?raw=true\r\n   :target: https:\u002F\u002Fgithub.com\u002FMinqi824\u002FADBench\u002Fblob\u002Fmain\u002Ffigs\u002FADBench.png?raw=true\r\n   :alt: benchmark-fig\r\n\r\n\r\nFor a simpler visualization, we make **the comparison of selected models** via\r\n`compare_all_models.py \u003Chttps:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod\u002Fblob\u002Fmaster\u002Fexamples\u002Fcompare_all_models.py>`_\\.\r\n\r\n.. image:: https:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod\u002Fblob\u002Fdevelopment\u002Fexamples\u002FALL.png?raw=true\r\n   :target: https:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod\u002Fblob\u002Fdevelopment\u002Fexamples\u002FALL.png?raw=true\r\n   :alt: Comparison_of_All\r\n\r\n\r\n\r\n----\r\n\r\nAdditional Topics\r\n^^^^^^^^^^^^^^^^^\r\n\r\n* `Model Save & Load \u003Chttps:\u002F\u002Fpyod.readthedocs.io\u002Fen\u002Flatest\u002Fmodel_persistence.html>`_: Use joblib or pickle for saving and loading PyOD models. See `example \u003Chttps:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod\u002Fblob\u002Fmaster\u002Fexamples\u002Fsave_load_model_example.py>`_.\r\n* `Fast Train with SUOD \u003Chttps:\u002F\u002Fpyod.readthedocs.io\u002Fen\u002Flatest\u002Ffast_train.html>`_: Accelerate training and prediction with the SUOD framework [#Zhao2021SUOD]_. See `example \u003Chttps:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod\u002Fblob\u002Fmaster\u002Fexamples\u002Fsuod_example.py>`_.\r\n* `Thresholding Outlier Scores \u003Chttps:\u002F\u002Fpyod.readthedocs.io\u002Fen\u002Flatest\u002Fthresholding.html>`_: Data-driven approaches for setting contamination levels via `PyThresh \u003Chttps:\u002F\u002Fgithub.com\u002FKulikDM\u002Fpythresh>`_.\r\n\r\n----\r\n\r\n\r\n\r\nImplemented Algorithms\r\n^^^^^^^^^^^^^^^^^^^^^^\r\n\r\nPyOD toolkit consists of four major functional groups:\r\n\r\n**(i) Individual Detection Algorithms** :\r\n\r\n===================  ==================  ======================================================================================================  =====  ========================================\r\nType                 Abbr                Algorithm                                                                                               Year   Ref\r\n===================  ==================  ======================================================================================================  =====  ========================================\r\nProbabilistic        ECOD                Unsupervised Outlier Detection Using Empirical Cumulative Distribution Functions                        2022   [#Li2021ECOD]_\r\nProbabilistic        ABOD                Angle-Based Outlier Detection                                                                           2008   [#Kriegel2008Angle]_\r\nProbabilistic        FastABOD            Fast Angle-Based Outlier Detection using approximation                                                  2008   [#Kriegel2008Angle]_\r\nProbabilistic        COPOD               COPOD: Copula-Based Outlier Detection                                                                   2020   [#Li2020COPOD]_\r\nProbabilistic        MAD                 Median Absolute Deviation (MAD)                                                                         1993   [#Iglewicz1993How]_\r\nProbabilistic        SOS                 Stochastic Outlier Selection                                                                            2012   [#Janssens2012Stochastic]_\r\nProbabilistic        QMCD                Quasi-Monte Carlo Discrepancy outlier detection                                                         2001   [#Fang2001Wrap]_\r\nProbabilistic        KDE                 Outlier Detection with Kernel Density Functions                                                         2007   [#Latecki2007Outlier]_\r\nProbabilistic        Sampling            Rapid distance-based outlier detection via sampling                                                     2013   [#Sugiyama2013Rapid]_\r\nProbabilistic        GMM                 Probabilistic Mixture Modeling for Outlier Analysis                                                            [#Aggarwal2015Outlier]_ [Ch.2]\r\nLinear Model         PCA                 Principal Component Analysis (the sum of weighted projected distances to the eigenvector hyperplanes)   2003   [#Shyu2003A]_\r\nLinear Model         KPCA                Kernel Principal Component Analysis                                                                     2007   [#Hoffmann2007Kernel]_\r\nLinear Model         MCD                 Minimum Covariance Determinant (use the mahalanobis distances as the outlier scores)                    1999   [#Hardin2004Outlier]_ [#Rousseeuw1999A]_\r\nLinear Model         CD                  Use Cook's distance for outlier detection                                                               1977   [#Cook1977Detection]_\r\nLinear Model         OCSVM               One-Class Support Vector Machines                                                                       2001   [#Scholkopf2001Estimating]_\r\nLinear Model         LMDD                Deviation-based Outlier Detection (LMDD)                                                                1996   [#Arning1996A]_\r\nProximity-Based      LOF                 Local Outlier Factor                                                                                    2000   [#Breunig2000LOF]_\r\nProximity-Based      COF                 Connectivity-Based Outlier Factor                                                                       2002   [#Tang2002Enhancing]_\r\nProximity-Based      (Incremental) COF   Memory Efficient Connectivity-Based Outlier Factor (slower but reduce storage complexity)               2002   [#Tang2002Enhancing]_\r\nProximity-Based      CBLOF               Clustering-Based Local Outlier Factor                                                                   2003   [#He2003Discovering]_\r\nProximity-Based      LOCI                LOCI: Fast outlier detection using the local correlation integral                                       2003   [#Papadimitriou2003LOCI]_\r\nProximity-Based      HBOS                Histogram-based Outlier Score                                                                           2012   [#Goldstein2012Histogram]_\r\nProximity-Based      HDBSCAN             Density-based clustering based on hierarchical density estimates                                        2013   [#Campello2013Density]_\r\nProximity-Based      kNN                 k Nearest Neighbors (use the distance to the kth nearest neighbor as the outlier score)                 2000   [#Ramaswamy2000Efficient]_\r\nProximity-Based      AvgKNN              Average kNN (use the average distance to k nearest neighbors as the outlier score)                      2002   [#Angiulli2002Fast]_\r\nProximity-Based      MedKNN              Median kNN (use the median distance to k nearest neighbors as the outlier score)                        2002   [#Angiulli2002Fast]_\r\nProximity-Based      SOD                 Subspace Outlier Detection                                                                              2009   [#Kriegel2009Outlier]_\r\nProximity-Based      ROD                 Rotation-based Outlier Detection                                                                        2020   [#Almardeny2020A]_\r\nOutlier Ensembles    IForest             Isolation Forest                                                                                        2008   [#Liu2008Isolation]_\r\nOutlier Ensembles    INNE                Isolation-based Anomaly Detection Using Nearest-Neighbor Ensembles                                      2018   [#Bandaragoda2018Isolation]_\r\nOutlier Ensembles    DIF                 Deep Isolation Forest for Anomaly Detection                                                             2023   [#Xu2023Deep]_\r\nOutlier Ensembles    FB                  Feature Bagging                                                                                         2005   [#Lazarevic2005Feature]_\r\nOutlier Ensembles    LSCP                LSCP: Locally Selective Combination of Parallel Outlier Ensembles                                       2019   [#Zhao2019LSCP]_\r\nOutlier Ensembles    XGBOD               Extreme Boosting Based Outlier Detection **(Supervised)**                                               2018   [#Zhao2018XGBOD]_\r\nOutlier Ensembles    LODA                Lightweight On-line Detector of Anomalies                                                               2016   [#Pevny2016Loda]_\r\nOutlier Ensembles    SUOD                SUOD: Accelerating Large-scale Unsupervised Heterogeneous Outlier Detection **(Acceleration)**          2021   [#Zhao2021SUOD]_\r\nNeural Networks      AutoEncoder         Fully connected AutoEncoder (use reconstruction error as the outlier score)                                    [#Aggarwal2015Outlier]_ [Ch.3]\r\nNeural Networks      VAE                 Variational AutoEncoder (use reconstruction error as the outlier score)                                 2013   [#Kingma2013Auto]_\r\nNeural Networks      Beta-VAE            Variational AutoEncoder (all customized loss term by varying gamma and capacity)                        2018   [#Burgess2018Understanding]_\r\nNeural Networks      SO_GAAL             Single-Objective Generative Adversarial Active Learning                                                 2019   [#Liu2019Generative]_\r\nNeural Networks      MO_GAAL             Multiple-Objective Generative Adversarial Active Learning                                               2019   [#Liu2019Generative]_\r\nNeural Networks      DeepSVDD            Deep One-Class Classification                                                                           2018   [#Ruff2018Deep]_\r\nNeural Networks      AnoGAN              Anomaly Detection with Generative Adversarial Networks                                                  2017   [#Schlegl2017Unsupervised]_\r\nNeural Networks      ALAD                Adversarially learned anomaly detection                                                                 2018   [#Zenati2018Adversarially]_\r\nNeural Networks      AE1SVM              Autoencoder-based One-class Support Vector Machine                                                      2019   [#Nguyen2019scalable]_\r\nNeural Networks      DevNet              Deep Anomaly Detection with Deviation Networks                                                          2019   [#Pang2019Deep]_\r\nGraph-based          R-Graph             Outlier detection by R-graph                                                                            2017   [#You2017Provable]_\r\nGraph-based          LUNAR               LUNAR: Unifying Local Outlier Detection Methods via Graph Neural Networks                               2022   [#Goodge2022Lunar]_\r\nEmbedding-based      EmbeddingOD         Multi-modal anomaly detection via foundation model embeddings (text, image)                             2025   [#Li2024NLPADBench]_\r\n===================  ==================  ======================================================================================================  =====  ========================================\r\n\r\n\r\nEnsemble methods (IForest, INNE, DIF, FB, LSCP, LODA, SUOD, XGBOD) are included in the table above. Score combination functions (average, maximization, AOM, MOA, median, majority vote) are in ``pyod.models.combination``. See `API docs \u003Chttps:\u002F\u002Fpyod.readthedocs.io\u002Fen\u002Flatest\u002Fpyod.html>`_ for details.\r\n\r\n\r\n**(ii) Utility Functions**:\r\n\r\n===================  ============================  =====================================================================================================================================================\r\nType                 Name                          Function\r\n===================  ============================  =====================================================================================================================================================\r\nData                 generate_data                 Synthesized data generation; normal data from multivariate Gaussian, outliers from uniform distribution\r\nData                 generate_data_clusters        Synthesized data generation in clusters for more complex patterns\r\nEvaluation           evaluate_print                Print ROC-AUC and Precision @ Rank n for a detector\r\nEvaluation           precision_n_scores            Calculate Precision @ Rank n\r\nUtility              get_label_n                   Turn raw outlier scores into binary labels by assigning 1 to the top n scores\r\nStat                 wpearsonr                     Calculate the weighted Pearson correlation of two samples\r\nEncoding             resolve_encoder               Resolve an encoder from a string name, BaseEncoder instance, or callable\r\nEncoding             SentenceTransformerEncoder     Encode text via sentence-transformers models (e.g., MiniLM, mpnet)\r\nEncoding             OpenAIEncoder                 Encode text via OpenAI Embeddings API (text-embedding-3-small\u002Flarge)\r\nEncoding             HuggingFaceEncoder            Encode text or images via HuggingFace transformers (BERT, DINOv2, CLIP)\r\n===================  ============================  =====================================================================================================================================================\r\n\r\n----\r\n\r\nQuick Start for Outlier Detection\r\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\r\n\r\nPyOD has been well acknowledged by the machine learning community with a few featured posts and tutorials.\r\n\r\n**Analytics Vidhya**: `An Awesome Tutorial to Learn Outlier Detection in Python using PyOD Library \u003Chttps:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2019\u002F02\u002Foutlier-detection-python-pyod\u002F>`_\r\n\r\n**KDnuggets**: `Intuitive Visualization of Outlier Detection Methods \u003Chttps:\u002F\u002Fwww.kdnuggets.com\u002F2019\u002F02\u002Foutlier-detection-methods-cheat-sheet.html>`_, `An Overview of Outlier Detection Methods from PyOD \u003Chttps:\u002F\u002Fwww.kdnuggets.com\u002F2019\u002F06\u002Foverview-outlier-detection-methods-pyod.html>`_\r\n\r\n**Towards Data Science**: `Anomaly Detection for Dummies \u003Chttps:\u002F\u002Ftowardsdatascience.com\u002Fanomaly-detection-for-dummies-15f148e559c1>`_\r\n\r\n`\"examples\u002Fknn_example.py\" \u003Chttps:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod\u002Fblob\u002Fmaster\u002Fexamples\u002Fknn_example.py>`_\r\ndemonstrates the basic API of using kNN detector. **It is noted that the API across all other algorithms are consistent\u002Fsimilar**.\r\n\r\nMore detailed instructions for running examples can be found in `examples directory \u003Chttps:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod\u002Fblob\u002Fmaster\u002Fexamples>`_.\r\n\r\n\r\n#. Initialize a kNN detector, fit the model, and make the prediction.\r\n\r\n   .. code-block:: python\r\n\r\n\r\n       from pyod.models.knn import KNN   # kNN detector\r\n\r\n       # train kNN detector\r\n       clf_name = 'KNN'\r\n       clf = KNN()\r\n       clf.fit(X_train)\r\n\r\n       # get the prediction label and outlier scores of the training data\r\n       y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)\r\n       y_train_scores = clf.decision_scores_  # raw outlier scores\r\n\r\n       # get the prediction on the test data\r\n       y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)\r\n       y_test_scores = clf.decision_function(X_test)  # outlier scores\r\n\r\n       # it is possible to get the prediction confidence as well\r\n       y_test_pred, y_test_pred_confidence = clf.predict(X_test, return_confidence=True)  # outlier labels (0 or 1) and confidence in the range of [0,1]\r\n\r\n#. Evaluate the prediction by ROC and Precision @ Rank n (p@n).\r\n\r\n   .. code-block:: python\r\n\r\n       from pyod.utils.data import evaluate_print\r\n       \r\n       # evaluate and print the results\r\n       print(\"\\nOn Training Data:\")\r\n       evaluate_print(clf_name, y_train, y_train_scores)\r\n       print(\"\\nOn Test Data:\")\r\n       evaluate_print(clf_name, y_test, y_test_scores)\r\n\r\n\r\n#. See a sample output & visualization.\r\n\r\n\r\n   .. code-block:: python\r\n\r\n\r\n       On Training Data:\r\n       KNN ROC:1.0, precision @ rank n:1.0\r\n\r\n       On Test Data:\r\n       KNN ROC:0.9989, precision @ rank n:0.9\r\n\r\n   .. code-block:: python\r\n\r\n\r\n       visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred,\r\n           y_test_pred, show_figure=True, save_figure=False)\r\n\r\nVisualization (\\ `knn_figure \u003Chttps:\u002F\u002Fraw.githubusercontent.com\u002Fyzhao062\u002Fpyod\u002Fmaster\u002Fexamples\u002FKNN.png>`_\\ ):\r\n\r\n.. image:: https:\u002F\u002Fraw.githubusercontent.com\u002Fyzhao062\u002Fpyod\u002Fmaster\u002Fexamples\u002FKNN.png\r\n   :target: https:\u002F\u002Fraw.githubusercontent.com\u002Fyzhao062\u002Fpyod\u002Fmaster\u002Fexamples\u002FKNN.png\r\n   :alt: kNN example figure\r\n\r\n----\r\n\r\nReference\r\n^^^^^^^^^\r\n\r\n\r\n.. [#Aggarwal2015Outlier] Aggarwal, C.C., 2015. Outlier analysis. In Data mining (pp. 237-263). Springer, Cham.\r\n\r\n.. [#Aggarwal2015Theoretical] Aggarwal, C.C. and Sathe, S., 2015. Theoretical foundations and algorithms for outlier ensembles.\\ *ACM SIGKDD Explorations Newsletter*\\ , 17(1), pp.24-47.\r\n\r\n.. [#Aggarwal2017Outlier] Aggarwal, C.C. and Sathe, S., 2017. Outlier ensembles: An introduction. Springer.\r\n\r\n.. [#Almardeny2020A] Almardeny, Y., Boujnah, N. and Cleary, F., 2020. A Novel Outlier Detection Method for Multivariate Data. *IEEE Transactions on Knowledge and Data Engineering*.\r\n\r\n.. [#Angiulli2002Fast] Angiulli, F. and Pizzuti, C., 2002, August. Fast outlier detection in high dimensional spaces. In *European Conference on Principles of Data Mining and Knowledge Discovery* pp. 15-27.\r\n\r\n.. [#Arning1996A] Arning, A., Agrawal, R. and Raghavan, P., 1996, August. A Linear Method for Deviation Detection in Large Databases. In *KDD* (Vol. 1141, No. 50, pp. 972-981).\r\n\r\n.. [#Bandaragoda2018Isolation] Bandaragoda, T. R., Ting, K. M., Albrecht, D., Liu, F. T., Zhu, Y., and Wells, J. R., 2018, Isolation-based anomaly detection using nearest-neighbor ensembles. *Computational Intelligence*\\ , 34(4), pp. 968-998.\r\n\r\n.. [#Breunig2000LOF] Breunig, M.M., Kriegel, H.P., Ng, R.T. and Sander, J., 2000, May. LOF: identifying density-based local outliers. *ACM Sigmod Record*\\ , 29(2), pp. 93-104.\r\n\r\n.. [#Burgess2018Understanding] Burgess, Christopher P., et al. \"Understanding disentangling in beta-VAE.\" arXiv preprint arXiv:1804.03599 (2018).\r\n\r\n.. [#Campello2013Density] Campello, R.J.G.B., Moulavi, D. and Sander, J., 2013, April. Density-based clustering based on hierarchical density estimates. In *Pacific-Asia Conference on Knowledge Discovery and Data Mining* (pp. 160-172). Springer.\r\n\r\n.. [#Cook1977Detection] Cook, R.D., 1977. Detection of influential observation in linear regression. Technometrics, 19(1), pp.15-18.\r\n\r\n.. [#Chen2024PyOD] Chen, S., Qian, Z., Siu, W., Hu, X., Li, J., Li, S., Qin, Y., Yang, T., Xiao, Z., Ye, W. and Zhang, Y., 2024. PyOD 2: A Python Library for Outlier Detection with LLM-powered Model Selection. arXiv preprint arXiv:2412.12154.\r\n\r\n.. [#Fang2001Wrap] Fang, K.T. and Ma, C.X., 2001. Wrap-around L2-discrepancy of random sampling, Latin hypercube and uniform designs. Journal of complexity, 17(4), pp.608-624.\r\n\r\n.. [#Goldstein2012Histogram] Goldstein, M. and Dengel, A., 2012. Histogram-based outlier score (hbos): A fast unsupervised anomaly detection algorithm. In *KI-2012: Poster and Demo Track*\\ , pp.59-63.\r\n\r\n.. [#Goodge2022Lunar] Goodge, A., Hooi, B., Ng, S.K. and Ng, W.S., 2022, June. Lunar: Unifying local outlier detection methods via graph neural networks. In Proceedings of the AAAI Conference on Artificial Intelligence.\r\n\r\n.. [#Gopalan2019PIDForest] Gopalan, P., Sharan, V. and Wieder, U., 2019. PIDForest: Anomaly Detection via Partial Identification. In Advances in Neural Information Processing Systems, pp. 15783-15793.\r\n\r\n.. [#Han2022ADBench] Han, S., Hu, X., Huang, H., Jiang, M. and Zhao, Y., 2022. ADBench: Anomaly Detection Benchmark. arXiv preprint arXiv:2206.09426.\r\n\r\n.. [#Hardin2004Outlier] Hardin, J. and Rocke, D.M., 2004. Outlier detection in the multiple cluster setting using the minimum covariance determinant estimator. *Computational Statistics & Data Analysis*\\ , 44(4), pp.625-638.\r\n\r\n.. [#He2003Discovering] He, Z., Xu, X. and Deng, S., 2003. Discovering cluster-based local outliers. *Pattern Recognition Letters*\\ , 24(9-10), pp.1641-1650.\r\n\r\n.. [#Hoffmann2007Kernel] Hoffmann, H., 2007. Kernel PCA for novelty detection. Pattern recognition, 40(3), pp.863-874.\r\n\r\n.. [#Iglewicz1993How] Iglewicz, B. and Hoaglin, D.C., 1993. How to detect and handle outliers (Vol. 16). Asq Press.\r\n\r\n.. [#Janssens2012Stochastic] Janssens, J.H.M., Huszár, F., Postma, E.O. and van den Herik, H.J., 2012. Stochastic outlier selection. Technical report TiCC TR 2012-001, Tilburg University, Tilburg Center for Cognition and Communication, Tilburg, The Netherlands.\r\n\r\n.. [#Kingma2013Auto] Kingma, D.P. and Welling, M., 2013. Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114.\r\n\r\n.. [#Kriegel2008Angle] Kriegel, H.P. and Zimek, A., 2008, August. Angle-based outlier detection in high-dimensional data. In *KDD '08*\\ , pp. 444-452. ACM.\r\n\r\n.. [#Kriegel2009Outlier] Kriegel, H.P., Kröger, P., Schubert, E. and Zimek, A., 2009, April. Outlier detection in axis-parallel subspaces of high dimensional data. In *Pacific-Asia Conference on Knowledge Discovery and Data Mining*\\ , pp. 831-838. Springer, Berlin, Heidelberg.\r\n\r\n.. [#Latecki2007Outlier] Latecki, L.J., Lazarevic, A. and Pokrajac, D., 2007, July. Outlier detection with kernel density functions. In International Workshop on Machine Learning and Data Mining in Pattern Recognition (pp. 61-75). Springer, Berlin, Heidelberg.\r\n\r\n.. [#Lazarevic2005Feature] Lazarevic, A. and Kumar, V., 2005, August. Feature bagging for outlier detection. In *KDD '05*. 2005.\r\n\r\n.. [#Li2024NLPADBench] Li, Y., Li, J., Xiao, Z., Yang, T., Nian, Y., Hu, X. and Zhao, Y., 2025. NLP-ADBench: NLP Anomaly Detection Benchmark. In *Findings of the Association for Computational Linguistics: EMNLP 2025*.\r\n\r\n.. [#Li2019MADGAN] Li, D., Chen, D., Jin, B., Shi, L., Goh, J. and Ng, S.K., 2019, September. MAD-GAN: Multivariate anomaly detection for time series data with generative adversarial networks. In *International Conference on Artificial Neural Networks* (pp. 703-716). Springer, Cham.\r\n\r\n.. [#Li2020COPOD] Li, Z., Zhao, Y., Botta, N., Ionescu, C. and Hu, X. COPOD: Copula-Based Outlier Detection. *IEEE International Conference on Data Mining (ICDM)*, 2020.\r\n\r\n.. [#Li2021ECOD] Li, Z., Zhao, Y., Hu, X., Botta, N., Ionescu, C. and Chen, H. G. ECOD: Unsupervised Outlier Detection Using Empirical Cumulative Distribution Functions. *IEEE Transactions on Knowledge and Data Engineering (TKDE)*, 2022.\r\n\r\n.. [#Liu2008Isolation] Liu, F.T., Ting, K.M. and Zhou, Z.H., 2008, December. Isolation forest. In *International Conference on Data Mining*\\ , pp. 413-422. IEEE.\r\n\r\n.. [#Liu2019Generative] Liu, Y., Li, Z., Zhou, C., Jiang, Y., Sun, J., Wang, M. and He, X., 2019. Generative adversarial active learning for unsupervised outlier detection. *IEEE Transactions on Knowledge and Data Engineering*.\r\n\r\n.. [#Nguyen2019scalable] Nguyen, M.N. and Vien, N.A., 2019. Scalable and interpretable one-class svms with deep learning and random fourier features. In *Machine Learning and Knowledge Discovery in Databases: European Conference*, ECML PKDD, 2018.\r\n\r\n.. [#Pang2019Deep] Pang, Guansong, Chunhua Shen, and Anton Van Den Hengel. \"Deep anomaly detection with deviation networks.\" In *KDD*, pp. 353-362. 2019.\r\n\r\n.. [#Papadimitriou2003LOCI] Papadimitriou, S., Kitagawa, H., Gibbons, P.B. and Faloutsos, C., 2003, March. LOCI: Fast outlier detection using the local correlation integral. In *ICDE '03*, pp. 315-326. IEEE.\r\n\r\n.. [#Pevny2016Loda] Pevný, T., 2016. Loda: Lightweight on-line detector of anomalies. *Machine Learning*, 102(2), pp.275-304.\r\n\r\n.. [#Perini2020Quantifying] Perini, L., Vercruyssen, V., Davis, J. Quantifying the confidence of anomaly detectors in their example-wise predictions. In *Joint European Conference on Machine Learning and Knowledge Discovery in Databases (ECML-PKDD)*, 2020.\r\n\r\n.. [#Perini2023Rejection] Perini, L., Davis, J. Unsupervised anomaly detection with rejection. In *Proceedings of the Thirty-Seven Conference on Neural Information Processing Systems (NeurIPS)*, 2023.\r\n\r\n.. [#Ramaswamy2000Efficient] Ramaswamy, S., Rastogi, R. and Shim, K., 2000, May. Efficient algorithms for mining outliers from large data sets. *ACM Sigmod Record*\\ , 29(2), pp. 427-438.\r\n\r\n.. [#Rousseeuw1999A] Rousseeuw, P.J. and Driessen, K.V., 1999. A fast algorithm for the minimum covariance determinant estimator. *Technometrics*\\ , 41(3), pp.212-223.\r\n\r\n.. [#Ruff2018Deep] Ruff, L., Vandermeulen, R., Goernitz, N., Deecke, L., Siddiqui, S.A., Binder, A., Müller, E. and Kloft, M., 2018, July. Deep one-class classification. In *International conference on machine learning* (pp. 4393-4402). PMLR.\r\n\r\n.. [#Schlegl2017Unsupervised] Schlegl, T., Seeböck, P., Waldstein, S.M., Schmidt-Erfurth, U. and Langs, G., 2017, June. Unsupervised anomaly detection with generative adversarial networks to guide marker discovery. In International conference on information processing in medical imaging (pp. 146-157). Springer, Cham.\r\n\r\n.. [#Scholkopf2001Estimating] Scholkopf, B., Platt, J.C., Shawe-Taylor, J., Smola, A.J. and Williamson, R.C., 2001. Estimating the support of a high-dimensional distribution. *Neural Computation*, 13(7), pp.1443-1471.\r\n\r\n.. [#Shyu2003A] Shyu, M.L., Chen, S.C., Sarinnapakorn, K. and Chang, L., 2003. A novel anomaly detection scheme based on principal component classifier. *MIAMI UNIV CORAL GABLES FL DEPT OF ELECTRICAL AND COMPUTER ENGINEERING*.\r\n\r\n.. [#Sugiyama2013Rapid] Sugiyama, M. and Borgwardt, K., 2013. Rapid distance-based outlier detection via sampling. Advances in neural information processing systems, 26.\r\n\r\n.. [#Tang2002Enhancing] Tang, J., Chen, Z., Fu, A.W.C. and Cheung, D.W., 2002, May. Enhancing effectiveness of outlier detections for low density patterns. In *Pacific-Asia Conference on Knowledge Discovery and Data Mining*, pp. 535-548. Springer, Berlin, Heidelberg.\r\n\r\n.. [#Wang2020adVAE] Wang, X., Du, Y., Lin, S., Cui, P., Shen, Y. and Yang, Y., 2019. adVAE: A self-adversarial variational autoencoder with Gaussian anomaly prior knowledge for anomaly detection. *Knowledge-Based Systems*.\r\n\r\n.. [#Xu2023Deep] Xu, H., Pang, G., Wang, Y., Wang, Y., 2023. Deep isolation forest for anomaly detection. *IEEE Transactions on Knowledge and Data Engineering*.\r\n\r\n.. [#Yang2024ad] Yang, T., Nian, Y., Li, S., Xu, R., Li, Y., Li, J., Xiao, Z., Hu, X., Rossi, R., Ding, K. and Hu, X., 2024. AD-LLM: Benchmarking Large Language Models for Anomaly Detection. arXiv preprint arXiv:2412.11142.\r\n\r\n.. [#You2017Provable] You, C., Robinson, D.P. and Vidal, R., 2017. Provable self-representation based outlier detection in a union of subspaces. In Proceedings of the IEEE conference on computer vision and pattern recognition.\r\n\r\n.. [#Zenati2018Adversarially] Zenati, H., Romain, M., Foo, C.S., Lecouat, B. and Chandrasekhar, V., 2018, November. Adversarially learned anomaly detection. In 2018 IEEE International conference on data mining (ICDM) (pp. 727-736). IEEE.\r\n\r\n.. [#Zhao2018XGBOD] Zhao, Y. and Hryniewicki, M.K. XGBOD: Improving Supervised Outlier Detection with Unsupervised Representation Learning. *IEEE International Joint Conference on Neural Networks*\\ , 2018.\r\n\r\n.. [#Zhao2019LSCP] Zhao, Y., Nasrullah, Z., Hryniewicki, M.K. and Li, Z., 2019, May. LSCP: Locally selective combination in parallel outlier ensembles. In *Proceedings of the 2019 SIAM International Conference on Data Mining (SDM)*, pp. 585-593. Society for Industrial and Applied Mathematics.\r\n\r\n.. [#Zhao2021SUOD] Zhao, Y., Hu, X., Cheng, C., Wang, C., Wan, C., Wang, W., Yang, J., Bai, H., Li, Z., Xiao, C., Wang, Y., Qiao, Z., Sun, J. and Akoglu, L. (2021). SUOD: Accelerating Large-scale Unsupervised Heterogeneous Outlier Detection. *Conference on Machine Learning and Systems (MLSys)*.\r\n",null,"# PyOD 快速上手指南\n\nPyOD (Python Outlier Detection) 是一个功能强大且易于使用的 Python 库，专为检测多元数据中的异常值（离群点）而设计。它集成了从经典算法到最新深度学习模型在内的 45+ 种检测方法，并提供统一的 API 接口。\n\n## 1. 环境准备\n\n在开始之前，请确保您的开发环境满足以下要求：\n\n*   **操作系统**：Linux, macOS, Windows\n*   **Python 版本**：3.8 或更高\n*   **核心依赖**：\n    *   `numpy` >= 1.19\n    *   `scipy` >= 1.5.1\n    *   `scikit-learn` >= 0.22.0\n    *   `numba` >= 0.51\n    *   `joblib`\n    *   `matplotlib`\n\n> **注意**：如果您计划使用深度学习模型（如 AutoEncoder）或文本\u002F图像异常检测功能，还需额外安装 `torch`、`transformers` 或 `sentence-transformers` 等可选依赖。\n\n## 2. 安装步骤\n\n推荐使用 `pip` 进行安装。国内用户建议使用清华或阿里镜像源以加速下载。\n\n### 方式一：使用 pip 安装（推荐）\n\n**使用国内镜像源加速安装：**\n```bash\npip install pyod -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n```\n\n**或使用官方源安装\u002F更新：**\n```bash\npip install pyod\npip install --upgrade pyod\n```\n\n### 方式二：使用 Conda 安装\n如果您使用 Anaconda 或 Miniconda 环境：\n```bash\nconda install -c conda-forge pyod\n```\n\n### 方式三：从源码安装\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod.git\ncd pyod\npip install .\n```\n\n## 3. 基本使用\n\nPyOD 的核心优势在于其统一的 API。只需 5 行代码即可完成模型的训练与预测。\n\n### 场景一：表格数据异常检测（以 ECOD 算法为例）\n\nECOD 是一种无需参数调节且高效的无监督算法，适合作为入门首选。\n\n```python\n# 引入必要的库\nfrom pyod.models.ecod import ECOD\nimport numpy as np\n\n# 假设 X_train 和 X_test 是您的训练集和测试集 (numpy array)\n# X_train shape: (n_samples, n_features)\n\n# 1. 初始化检测器\nclf = ECOD()\n\n# 2. 拟合模型 (训练)\nclf.fit(X_train)\n\n# 3. 获取训练数据的异常分数 (分数越高越可能是异常)\ny_train_scores = clf.decision_scores_ \n\n# 4. 预测测试数据的异常分数\ny_test_scores = clf.decision_function(X_test)\n\n# 5. 获取二分类标签 (0 表示正常，1 表示异常)\ny_test_pred = clf.predict(X_test) \n```\n\n### 场景二：文本数据异常检测（EmbeddingOD）\n\nPyOD V2 新增了基于嵌入的模型，可直接处理文本数据。需先安装额外依赖：\n```bash\npip install pyod sentence-transformers -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n```\n\n使用示例：\n```python\nfrom pyod.models.embedding import EmbeddingOD\n\n# 准备文本数据 (字符串列表)\ntrain_texts = [\"normal text 1\", \"normal text 2\", ...]\ntest_texts = [\"normal text\", \"abnormal spam text\", ...]\n\n# 初始化：自动结合预训练编码器 (MiniLM) 和检测器 (KNN)\nclf = EmbeddingOD.for_text(quality='fast')\n\n# 拟合与预测\nclf.fit(train_texts)\nscores = clf.decision_function(test_texts)  # 异常分数\nlabels = clf.predict(test_texts)            # 0 或 1\n```\n\n### 核心 API 速查\n\n所有检测器均遵循以下标准接口：\n\n| 方法\u002F属性 | 说明 |\n| :--- | :--- |\n| `fit(X)` | 训练模型（无监督学习通常忽略 y 标签）。 |\n| `decision_function(X)` | 返回原始异常分数，分数越高代表越异常。 |\n| `predict(X)` | 返回二分类标签（0=正常，1=异常）。 |\n| `decision_scores_` | **属性**：训练数据的异常分数。 |\n| `labels_` | **属性**：训练数据的二分类标签。 |\n\n> **提示**：对于未知的表格数据集，建议优先尝试 `ECOD` 或 `IsolationForest`；若涉及文本或图像，请使用 `EmbeddingOD`。","某电商风控团队每天需从百万级订单数据中识别刷单、盗号等异常行为，传统规则引擎已难以应对日益复杂的欺诈模式。\n\n### 没有 pyod 时\n- 算法选型困难：面对孤立森林、LOF 等多种检测算法，缺乏统一接口，每次尝试新模型都需重写大量代码。\n- 多模态数据束手无策：仅能处理结构化表格数据，对于包含用户评论文本或商品图片的混合数据，无法有效提取特征进行联合异常判断。\n- 调参成本高昂：依赖人工经验反复调整阈值和参数，耗时数天且容易因主观判断导致漏报或误报。\n- 深度学习门槛高：想引入先进的神经网络模型提升准确率，但缺乏现成的 PyTorch 集成框架，自行实现难度极大。\n\n### 使用 pyod 后\n- 一键切换模型：通过统一的 API 接口，可在 45 种内置算法（含 12 种深度学习方法）间自由切换，快速验证哪种模型最适合当前数据分布。\n- 轻松搞定多模态：利用 EmbeddingOD 模块，直接调用预训练模型将文本和图片转化为向量，无缝衔接检测器，实现全维度风险扫描。\n- 智能自动调优：借助 LLM 驱动的模型选择功能，自动推荐最优算法与参数组合，将原本数天的调优过程缩短至小时级。\n- 性能效率双升：底层优化的计算框架显著提升了大规模数据集的处理速度，同时保持了高准确率，让实时风控成为可能。\n\npyod 将复杂的异常检测技术封装为简单易用的工具，让团队能以最低成本构建高精度的多模态智能风控系统。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fyzhao062_pyod_40884814.png","yzhao062","Yue Zhao","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fyzhao062_b7512e00.jpg","Assistant Professor at USC | AI Auditing |  Making AI agents and systems inspectable, safe, and accountable","University of Southern California","Los Angeles, CA, USA","https:\u002F\u002Fviterbi-web.usc.edu\u002F~yzhao010\u002F","https:\u002F\u002Fgithub.com\u002Fyzhao062",[81,85],{"name":82,"color":83,"percentage":84},"Python","#3572A5",89.7,{"name":86,"color":87,"percentage":88},"Jupyter Notebook","#DA5B0B",10.3,9778,1462,"2026-04-10T13:22:52","BSD-2-Clause",1,"未说明","非必需。仅在使用可选的深度学习模型（如 AutoEncoder）或 EmbeddingOD 进行图像\u002F文本检测时需要 PyTorch，具体显卡型号、显存及 CUDA 版本未在文档中明确指定。",{"notes":97,"python":98,"dependencies":99},"该工具核心功能基于 CPU 运行，利用 numba 和 joblib 进行加速和并行处理。GPU 及相关重型依赖（如 PyTorch, transformers）仅在启用深度学习模型或多模态检测（EmbeddingOD）时为可选依赖。建议使用 pip 或 conda 安装，若需使用文本或图像检测功能，需额外安装对应的编码器库（如 sentence-transformers 或 openai）。","3.8+",[100,101,102,103,104,105,106,107,108,109],"joblib","matplotlib","numpy>=1.19","numba>=0.51","scipy>=1.5.1","scikit_learn>=0.22.0","torch (可选，用于深度学习模型)","sentence-transformers (可选，用于文本异常检测)","transformers (可选，用于图像异常检测)","xgboost (可选，用于 XGBOD)",[35,15,14,111,16],"其他",[113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131],"outlier-detection","anomaly-detection","outlier-ensembles","outliers","anomaly","machine-learning","data-mining","unsupervised-learning","fraud-detection","autoencoder","neural-networks","deep-learning","data-science","novelty-detection","out-of-distribution-detection","foundation-models","image-anomaly-detection","multimodal","nlp-anomaly-detection","ready_partial","2026-03-27T02:49:30.150509","2026-04-11T10:01:53.022774",[136,141,146,151,156,161],{"id":137,"question_zh":138,"answer_zh":139,"source_url":140},29405,"使用 pip 安装 pyod 时出现 'ModuleNotFoundError: No module named nose' 错误怎么办？","这通常是因为您的 Python 脚本文件名与包名冲突。请检查当前目录下是否有名为 `pyod.py` 的文件。如果有，请将其重命名为其他名称（例如 `my_pyod_test.py`），因为当文件名与包名相同时，Python 会导入您自己的脚本而不是安装的库，从而导致此错误。","https:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod\u002Fissues\u002F6",{"id":142,"question_zh":143,"answer_zh":144,"source_url":145},29406,"在使用 KNN 算法的 Mahalanobis 距离度量时，即使提供了参数 V 仍报错 'Must provide either V or VI' 如何解决？","这是因为在使用 `np.cov` 计算协方差矩阵时，默认假设每一行是一个变量。对于机器学习数据集（通常每行是一个样本，每列是一个特征），需要显式设置 `rowvar=False`。请使用以下代码修正：`metric_params={'V': np.cov(X_train, rowvar=False)}`。该问题已在 v0.7.0 版本中得到改进。","https:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod\u002Fissues\u002F23",{"id":147,"question_zh":148,"answer_zh":149,"source_url":150},29407,"在使用 RandomizedSearchCV 对 COF 和 SOD 模型进行超参数调优时，构造函数参数始终为 None 怎么办？","如果您在 conda 环境中开发并修改了源代码，仅仅修改文件可能不会立即生效。请确保在修改代码后，在项目根目录下重新运行 `pip install .` 命令以更新 conda 环境中的安装包。此外，尝试将 `n_jobs` 参数设置为 1 也可能解决某些并行处理导致的参数传递问题。","https:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod\u002Fissues\u002F241",{"id":152,"question_zh":153,"answer_zh":154,"source_url":155},29408,"COPOD 模型的 explain_outlier() 函数是否支持对未参与训练的新数据（unseen data）进行可解释性分析？","虽然默认情况下缩放器是基于训练数据的，但可以通过手动应用训练好的缩放逻辑来处理新数据。一种方法是使用 MinMax Scaler 处理决策边界分数。示例代码如下：\n1. 训练模型：`model = COPOD().fit(X)`\n2. 获取分数并拟合缩放器：`predictions = model.decision_boundaries(X, scores_per_feature=True)`; `scaler = MinMax().fit(predictions)`\n3. 对新数据应用：`probas = scaler.transform(model.decision_boundaries(new_X, scores_per_feature=True)).clip(0, 1)`。\n注意使用 numpy 的 `.clip(0, 1)` 确保概率值在有效范围内。","https:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod\u002Fissues\u002F308",{"id":157,"question_zh":158,"answer_zh":159,"source_url":160},29409,"为什么 PCA、Autoencoder 等算法得出的异常分数与数据点到原点的欧氏距离高度相关（如 0.99）？","这种现象通常是数据依赖性的表现，并不一定代表算法失效。如果您的数据分布本身就没有明显的多簇结构，或者异常点主要表现为远离数据中心（原点），那么基于距离或重构误差的方法自然会与到原点的距离高度相关。这在某些真实数据集（如跨国消费数据）中是正常现象，表明这些数据中的异常主要体现为全局数值的极端化，而非局部簇结构的偏离。","https:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod\u002Fissues\u002F64",{"id":162,"question_zh":163,"answer_zh":164,"source_url":165},29410,"PyOD 是否支持时间序列异常检测？","PyOD 主要专注于多维静态数据的异常检测。对于专门的时间序列异常检测需求，作者团队已经开发了专门的工具包 TODS (Time-series Outlier Detection System)。建议时间序列任务的用户转而使用 TODS 项目。","https:\u002F\u002Fgithub.com\u002Fyzhao062\u002Fpyod\u002Fissues\u002F9",[167,172,177,182,187,192,197,202,207,212,217,222,227,232,237,242,247,252,257,262],{"id":168,"version":169,"summary_zh":170,"released_at":171},198195,"v2.1.0","PyOD v2.1.0：基于基础模型嵌入的多模态异常检测\n\n### 亮点\n\n- **EmbeddingOD**：可将任意嵌入编码器（sentence-transformers、OpenAI、HuggingFace 或自定义可调用对象）与 PyOD 的 50 多种检测器串联，用于文本和图像异常检测。实现了经 NLP-ADBench（EMNLP 2025）验证的两步法。\n\n- **基准测试驱动的预设**：`EmbeddingOD.for_text()` 和 `EmbeddingOD.for_image()` 提供基于 NLP-ADBench 和 AnomalyDINO 结果的质量等级（快速\u002F平衡\u002F最佳）。\n\n- **MultiModalEncoder**（早期融合）：对多种模态（文本 + 图像 + 表格数据）进行编码并拼接嵌入向量。支持通过训练集均值插补处理每样本中的缺失数据。\n\n- **MultiModalOD**（分数融合）：为每种模态分别运行一个检测器，使用训练时的归一化器对分数进行标准化，并通过平均\u002F最大化\u002F中位数等方式进行组合。测试时若存在缺失模态，则以训练集的平均分数进行插补。\n\n- **文档优化**：更新了 README 和 ReadTheDocs，新增 EmbeddingOD 示例，精简资源链接，缩短 API 参考文档（移除了 40 多个模型页面中的继承成员），并在 docs\u002Fexample.rst 中提供了完整的 EmbeddingOD 入门指南。\n\n### 安装\n\n```bash\npip install pyod --upgrade\n\n# 用于文本异常检测：\npip install pyod sentence-transformers\n\n# 用于图像异常检测：\npip install pyod transformers torch\n```\n\n### 快速入门\n\n```python\nfrom pyod.models.embedding import EmbeddingOD\n\n# 三行代码实现文本异常检测\nclf = EmbeddingOD(encoder='all-MiniLM-L6-v2', detector='KNN')\nclf.fit(train_texts)\nlabels = clf.predict(test_texts)\n\n# 或者使用预设\nclf = EmbeddingOD.for_text(quality='fast')\n```\n\n### 多模态示例\n\n```python\nfrom pyod.models.embedding import EmbeddingOD, MultiModalOD\nfrom pyod.models.knn import KNN\n\nclf = MultiModalOD(modalities={\n    'text': EmbeddingOD(encoder='all-MiniLM-L6-v2', detector='KNN'),\n    'tabular': KNN(),\n}, combination='average')\n\nclf.fit({'text': train_texts, 'tabular': X_train})\nscores = clf.decision_function({'text': test_texts, 'tabular': X_test})\n```\n\n### 参考文献\n\n- NLP-ADBench（Li 等人，EMNLP 2025）：两步法优于端到端方法\n- TAD-Bench（Cao 等人，2025）：在 8 种嵌入与 8 种检测器的组合中得到验证\n- AD-LLM（Yang 等人，ACL 2025）：基于大语言模型的异常检测基准\n- AnomalyDINO（WACV 2025）：DINOv2 + KNN 用于图像异常检测","2026-04-06T08:07:16",{"id":173,"version":174,"summary_zh":175,"released_at":176},198196,"v2.0.7","本次发布重点在于修复兼容性问题，并提升核心检测器及打包的一致性。\n\n### 亮点\n\n- 改进了以下模块中 `NearestNeighbors` 的一致性与性能：\n  - `KNN`\n  - `ABOD`\n  - `SOD`\n  - `LUNAR`\n  - （问题 #654）\n\n- 更新了 `VAE` 的默认配置：\n  - 默认输出激活函数改为 `identity`\n  - 新增对 `identity` 激活的支持并添加相关测试\n  - （问题 #651）\n\n- 打包\u002F分发修复：\n  - 修正了 `package_data` 和 `MANIFEST` 配置\n  - 确保自动模型选择器的 JSON 资源被正确包含在分发包中\n  - （问题 #642）\n\n- 深度学习基础行为修复：\n  - 在 `BaseDeepLearningDetector` 中，无监督训练时会显式忽略 `y` 参数\n  - 避免了深度学习检测器中因批次数据为元组或列表而导致的设备错误\n  - （问题 #591）\n\n### 备注\n\n- 版本：`2.0.7`\n- 本次发布属于稳定性与兼容性更新，未进行重大 API 重构。","2026-02-27T09:21:37",{"id":178,"version":179,"summary_zh":180,"released_at":181},198197,"v2.0.6","v\u003C2.0.6>, \u003C09\u002F04\u002F2025> -- 最终添加自动模型选择器 (#616)。\nv\u003C2.0.6>, \u003C12\u002F01\u002F2025> -- 针对新的 scikit-learn API 变更提前采取预防措施 (#649)。\n\n这份迟迟未合并的 PR 同时包含了由大语言模型实现的自动模型选择器，以及针对即将到来的 scikit-learn API 变更的新修复（用户端无任何变化）。","2025-12-02T18:39:27",{"id":183,"version":184,"summary_zh":185,"released_at":186},198198,"v2.0.5","添加轮子","2025-04-29T22:28:31",{"id":188,"version":189,"summary_zh":190,"released_at":191},198199,"v2.0.3","v\u003C2.0.3>, \u003C2024年9月6日> -- 在无监督异常检测中添加拒绝选项（#605）。\nv\u003C2.0.3>, \u003C2024年12月20日> -- 大规模优化文档。","2024-12-22T00:23:34",{"id":193,"version":194,"summary_zh":195,"released_at":196},198200,"v2.0.2","v\u003C2.0.2>, \u003C2024年7月1日> -- 增加AE1SVM。\nv\u003C2.0.2>, \u003C2024年7月4日> -- 从TensorFlow迁移到PyTorch -- 重新实现ALAD。\nv\u003C2.0.2>, \u003C2024年7月4日> -- 从TensorFlow迁移到PyTorch -- 重新实现anogan。\nv\u003C2.0.2>, \u003C2024年7月6日> -- **已完成移除所有TensorFlow和Keras代码。**\nv\u003C2.0.2>, \u003C2024年7月21日> -- 增加DevNet。","2024-09-06T03:26:33",{"id":198,"version":199,"summary_zh":200,"released_at":201},198201,"v2.0.1","v\u003C2.0.0>, \u003C2024年5月21日> -- 从 TensorFlow 迁移到 PyTorch -- 重新实现 SO_GAAL。\nv\u003C2.0.0>, \u003C2024年5月21日> -- 从 TensorFlow 运行到 PyTorch -- 使用更多工具函数实现深度学习基础模块。\nv\u003C2.0.1>, \u003C2024年6月16日> -- 从 TensorFlow 迁移到 PyTorch -- 重新实现 DeepSVDD。\nv\u003C2.0.1>, \u003C2024年6月17日> -- 从 TensorFlow 迁移到 PyTorch -- 重新实现 dl_base。\nv\u003C2.0.1>, \u003C2024年6月21日> -- 从 TensorFlow 迁移到 PyTorch -- 重新实现 MO_GAAL。\nv\u003C2.0.1>, \u003C2024年6月21日> -- 从 TensorFlow 迁移到 PyTorch -- 重新实现 AE 和 VAE。\n\n主要由 @yqin43、@RaymondY、@zhuox5 和 @Yeechin-is-here 按随机顺序推动 :)","2024-06-22T04:46:20",{"id":203,"version":204,"summary_zh":205,"released_at":206},198202,"v1.1.3","v\u003C1.1.3>, \u003C2024年7月2日> -- 修复SUOD变更带来的小问题。","2024-02-09T06:52:05",{"id":208,"version":209,"summary_zh":210,"released_at":211},198203,"v1.1.2","v\u003C1.1.2>, \u003C2023年11月17日> -- 大规模文档优化。\nv\u003C1.1.2>, \u003C2023年11月17日> -- 修复污染问题。\nv\u003C1.1.2>, \u003C2023年11月17日> -- KPCA bug 修复 (#494)。","2023-11-18T08:45:46",{"id":213,"version":214,"summary_zh":215,"released_at":216},198204,"v1.1.1","v\u003C1.1.1>, \u003C2023年7月3日> -- 提升 sklearn 的版本要求，并进行一些热点修复。\nv\u003C1.1.1>, \u003C2023年10月24日> -- 添加深度孤立森林算法 (#506)","2023-10-25T07:38:28",{"id":218,"version":219,"summary_zh":220,"released_at":221},198205,"v1.1.0","v\u003C1.0.9>, \u003C03\u002F19\u002F2023> -- Hot fix for errors in ECOD and COPOD due to the issue of scipy.\r\nv\u003C1.1.0>, \u003C06\u002F19\u002F2023> -- Further integration of PyThresh.","2023-06-25T06:09:29",{"id":223,"version":224,"summary_zh":225,"released_at":226},198206,"v1.0.8","v\u003C1.0.8>, \u003C03\u002F08\u002F2023> -- Improve clone compatibility (#471).\r\nv\u003C1.0.8>, \u003C03\u002F08\u002F2023> -- Add QMCD detector (#452).\r\nv\u003C1.0.8>, \u003C03\u002F08\u002F2023> -- Optimized ECDF and drop Statsmodels dependency (#467).","2023-03-08T22:50:04",{"id":228,"version":229,"summary_zh":230,"released_at":231},198207,"v1.0.7","v\u003C1.0.7>, \u003C12\u002F14\u002F2022> -- Enable automatic thresholding by pythresh (#454).","2022-12-16T21:21:20",{"id":233,"version":234,"summary_zh":235,"released_at":236},198208,"v1.0.6","v\u003C1.0.6>, \u003C09\u002F23\u002F2022> -- Update ADBench benchmark for NeruIPS 2022.\r\nv\u003C1.0.6>, \u003C10\u002F23\u002F2022> -- ADD KPCA.","2022-10-24T01:26:05",{"id":238,"version":239,"summary_zh":240,"released_at":241},198209,"v1.0.5","v\u003C1.0.5>, \u003C07\u002F29\u002F2022> -- Import optimization.\r\nv\u003C1.0.5>, \u003C08\u002F27\u002F2022> -- Code optimization.\r\nv\u003C1.0.5>, \u003C09\u002F14\u002F2022> -- Add ALAD.\r\n\r\nAnoGAN is too slow to run. Consider a removal or refactoring.","2022-09-15T13:09:04",{"id":243,"version":244,"summary_zh":245,"released_at":246},198210,"v1.0.4","v\u003C1.0.4>, \u003C07\u002F29\u002F2022> -- General improvement of code quality and test coverage.\r\nv\u003C1.0.4>, \u003C07\u002F29\u002F2022> -- Add LUNAR (#413).\r\nv\u003C1.0.4>, \u003C07\u002F29\u002F2022> -- Add LUNAR (#415).","2022-07-29T22:36:09",{"id":248,"version":249,"summary_zh":250,"released_at":251},198211,"v1.0.3","v\u003C1.0.3>, \u003C06\u002F27\u002F2022> -- Change default generation to new behaviors (#409).\r\nv\u003C1.0.3>, \u003C07\u002F04\u002F2022> -- Add AnoGAN (#412).","2022-07-05T18:35:18",{"id":253,"version":254,"summary_zh":255,"released_at":256},198212,"v1.0.2","v\u003C1.0.2>, \u003C06\u002F21\u002F2022> -- Add GMM detector (#402).\r\nv\u003C1.0.2>, \u003C06\u002F23\u002F2022> -- Add ADBench Benchmark.","2022-06-23T15:04:53",{"id":258,"version":259,"summary_zh":260,"released_at":261},198213,"v1.0.1","v\u003C1.0.1>, \u003C04\u002F27\u002F2022> -- Add INNE (#396).\r\nv\u003C1.0.1>, \u003C05\u002F13\u002F2022> -- Urgent fix for iForest (#406).\r\n\r\nUrgent fix for \r\n\r\nFile \"lib\u002Fpython3.10\u002Fsite-packages\u002Fpyod\u002Fmodels\u002Fiforest.py\", line 13, in \u003Cmodule>\r\n    from sklearn.utils.fixes import _joblib_parallel_args\r\nImportError: cannot import name '_joblib_parallel_args' from 'sklearn.utils.fixes' (\u002Flib\u002Fpython3.10\u002Fsite-packages\u002Fsklearn\u002Futils\u002Ffixes.py)\r\n","2022-05-13T14:57:40",{"id":263,"version":264,"summary_zh":265,"released_at":266},198214,"v1.0.0","v\u003C1.0.0>, \u003C04\u002F04\u002F2022> -- Add KDE detector (#382).\r\nv\u003C1.0.0>, \u003C04\u002F06\u002F2022> -- Disable the bias term in DeepSVDD (#385).\r\nv\u003C1.0.0>, \u003C04\u002F21\u002F2022> -- Fix a set of issues of autoencoders (#313, #390, #391).\r\nv\u003C1.0.0>, \u003C04\u002F23\u002F2022> -- Add sampling based detector (#384).","2022-04-23T22:47:22"]