[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-scikit-multiflow--scikit-multiflow":3,"tool-scikit-multiflow--scikit-multiflow":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",153609,2,"2026-04-13T11:34:59",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",108322,"2026-04-10T11:39:34",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[52,13,15,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":32,"last_commit_at":59,"category_tags":60,"status":17},4721,"markitdown","microsoft\u002Fmarkitdown","MarkItDown 是一款由微软 AutoGen 团队打造的轻量级 Python 工具，专为将各类文件高效转换为 Markdown 格式而设计。它支持 PDF、Word、Excel、PPT、图片（含 OCR）、音频（含语音转录）、HTML 乃至 YouTube 链接等多种格式的解析，能够精准提取文档中的标题、列表、表格和链接等关键结构信息。\n\n在人工智能应用日益普及的今天，大语言模型（LLM）虽擅长处理文本，却难以直接读取复杂的二进制办公文档。MarkItDown 恰好解决了这一痛点，它将非结构化或半结构化的文件转化为模型“原生理解”且 Token 效率极高的 Markdown 格式，成为连接本地文件与 AI 分析 pipeline 的理想桥梁。此外，它还提供了 MCP（模型上下文协议）服务器，可无缝集成到 Claude Desktop 等 LLM 应用中。\n\n这款工具特别适合开发者、数据科学家及 AI 研究人员使用，尤其是那些需要构建文档检索增强生成（RAG）系统、进行批量文本分析或希望让 AI 助手直接“阅读”本地文件的用户。虽然生成的内容也具备一定可读性，但其核心优势在于为机器",93400,"2026-04-06T19:52:38",[52,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":64,"owner_name":64,"owner_avatar_url":72,"owner_bio":73,"owner_company":73,"owner_location":73,"owner_email":73,"owner_twitter":73,"owner_website":73,"owner_url":74,"languages":75,"stars":99,"forks":100,"last_commit_at":101,"license":102,"difficulty_score":103,"env_os":104,"env_gpu":105,"env_ram":105,"env_deps":106,"category_tags":115,"github_topics":116,"view_count":32,"oss_zip_url":73,"oss_zip_packed_at":73,"status":17,"created_at":123,"updated_at":124,"faqs":125,"releases":155},7128,"scikit-multiflow\u002Fscikit-multiflow","scikit-multiflow","A machine learning package for streaming data in Python. The other ancestor of River.","scikit-multiflow 是一个专为 Python 设计的机器学习库，专注于处理源源不断的“流式数据”。与传统机器学习需要一次性加载全部数据不同，它能让模型随着数据的到来实时增量更新，特别适合应对海量数据且对响应速度要求极高的场景。\n\n该工具主要解决了动态环境中数据分布不断变化（即“概念漂移”）的难题。通过内置的自适应学习算法，它能敏锐察觉数据规律的改变并自动调整策略，确保模型在长期运行中依然保持高精度。同时，它在内存占用和计算效率上进行了深度优化，能够从容应对无边界的数据流，避免资源耗尽。\n\nscikit-multiflow 非常适合数据科学家、研究人员以及需要构建实时预测系统的开发者使用。其接口设计友好，易于上手，既方便快速搭建实验，也支持对现有方法进行灵活扩展。值得一提的是，作为知名流式学习项目 River 的前身之一，scikit-multiflow 奠定了坚实的架构基础。虽然目前开发重心已转移至 River，但 scikit-multiflow 的代码依然公开可用，是理解和学习流式机器学习原理的优质资源。","\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fscikit-multiflow_scikit-multiflow_readme_7eed38b2942c.png\" height=\"100\"\u002F>\n\n[![Build status](https:\u002F\u002Ftravis-ci.org\u002Fscikit-multiflow\u002Fscikit-multiflow.svg?branch=master)](https:\u002F\u002Ftravis-ci.org\u002Fscikit-multiflow\u002Fscikit-multiflow)\n[![Build Status](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fscikit-multiflow_scikit-multiflow_readme_3323fe13e747.png)](https:\u002F\u002Fdev.azure.com\u002Fscikit-multiflow\u002Fscikit-multiflow\u002F_build\u002Flatest?definitionId=1&branchName=master)\n[![codecov](https:\u002F\u002Fcodecov.io\u002Fgh\u002Fscikit-multiflow\u002Fscikit-multiflow\u002Fbranch\u002Fmaster\u002Fgraph\u002Fbadge.svg)](https:\u002F\u002Fcodecov.io\u002Fgh\u002Fscikit-multiflow\u002Fscikit-multiflow)\n![Python version](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002Fpython-3.5%20%7C%203.6%20%7C%203.7%20%7C%203.8-blue.svg)\n[![Anaconda-Server Badge](https:\u002F\u002Fanaconda.org\u002Fconda-forge\u002Fscikit-multiflow\u002Fbadges\u002Fplatforms.svg)](https:\u002F\u002Fanaconda.org\u002Fconda-forge\u002Fscikit-multiflow)\n[![PyPI version](https:\u002F\u002Fbadge.fury.io\u002Fpy\u002Fscikit-multiflow.svg)](https:\u002F\u002Fbadge.fury.io\u002Fpy\u002Fscikit-multiflow)\n[![Anaconda-Server Badge](https:\u002F\u002Fanaconda.org\u002Fconda-forge\u002Fscikit-multiflow\u002Fbadges\u002Fversion.svg)](https:\u002F\u002Fanaconda.org\u002Fconda-forge\u002Fscikit-multiflow)\n[![DockerHub](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002Fdocker-available-blue.svg?logo=docker)](https:\u002F\u002Fhub.docker.com\u002Fr\u002Fskmultiflow\u002Fscikit-multiflow)\n[![License](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FLicense-BSD%203--Clause-blue.svg)](https:\u002F\u002Fopensource.org\u002Flicenses\u002FBSD-3-Clause)\n[![Gitter](https:\u002F\u002Fbadges.gitter.im\u002Fscikit-multiflow\u002Fcommunity.svg)](https:\u002F\u002Fgitter.im\u002Fscikit-multiflow\u002Fcommunity?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)\n\n`scikit-multiflow` is a machine learning package for streaming data in Python.\n\n---\n\n[creme](https:\u002F\u002FMaxHalford.github.io\u002F) and [scikit-multiflow](https:\u002F\u002Fscikit-multiflow.github.io\u002F) are merging into a new project called [River](https:\u002F\u002Fgithub.com\u002Fonline-ml\u002Friver\u002F).\n\nWe feel that both projects share the same vision. We believe that pooling our resources instead of duplicating work will benefit both sides. We are also confident that this will benefit both communities. There will be more people working on the new project, which will allow us to distribute work more efficiently. We will thus be able to work on more features and improve the overall quality of the project.\n\nBoth projects will stop active development. The code for both projects will remain publicly available, although development will only focus on minor maintenance during a transition period. The architecture of the new package is very similar to that of creme. It will focus on single-instance incremental models.\n\nWe encourage users to use River instead of creme. We understand that this transition will require an extra effort in the short term from current users. However, we believe that the result will be better for everyone in the long run.\n\nYou will still be able to install and use `creme` as well as `scikit-multiflow`. Both projects will remain on PyPI, conda-forge and GitHub.\n\n---\n\n### Quick links\n* [Webpage](https:\u002F\u002Fscikit-multiflow.github.io\u002F)\n* [Documentation](https:\u002F\u002Fscikit-multiflow.readthedocs.io\u002Fen\u002Fstable\u002F)\n* [Community](https:\u002F\u002Fscikit-multiflow.github.io\u002Fcommunity\u002F)\n\n# Features\n\n### Incremental Learning\nStream learning models are created incrementally and are updated continuously. They are suitable\nfor big data applications where real-time response is vital.\n\n### Adaptive learning\nChanges in data distribution harm learning. Adaptive methods are specifically designed to be\nrobust to concept drift changes in dynamic environments.\n\n### Resource-wise efficient\nStreaming techniques efficiently handle resources such as memory and processing time given the\nunbounded nature of data streams. \n\n### Easy to use\nscikit-multiflow is designed for users with any experience level. Experiments are easy to design,\nsetup, and run. Existing methods are easy to modify and extend.\n\n### Stream learning tools\nIn its current state, scikit-multiflow contains data generators, multi-output\u002Fmulti-target stream\nlearning methods, change detection methods, evaluation methods, and more.\n\n### Open source\nDistributed under the \n[BSD 3-Clause](https:\u002F\u002Fgithub.com\u002Fscikit-multiflow\u002Fscikit-multiflow\u002Fblob\u002Fmaster\u002FLICENSE), \n`scikit-multiflow` is developed and maintained by an active, diverse and growing [community](\u002Fcommunity).\n\n# Use cases\nThe following tasks are supported in `scikit-multiflow`:\n\n### Supervised learning\nWhen working with labeled data. Depending on the target type can be either classification\n(discrete values) or regression (continuous values)\n\n### Single\u002Fmulti output\nSingle-output methods predict a single target-label (binary or multi-class) for classification or\na single target-value for regression. Multi-output methods simultaneously predict multiple\nvariables given an input.\n\n### Concept drift detection\nChanges in data distribution can harm learning. Drift detection methods are designed to rise an\nalarm in the presence of drift and are used alongside learning methods to improve their robustness\nagainst this phenomenon in evolving data streams.\n\n### Unsupervised learning\nWhen working with unlabeled data. For example, anomaly detection where the goal is the\nidentification of rare events or samples which differ significantly from the majority of the data.\n\n---\n\n#### Jupyter Notebooks\nIn order to display plots from `scikit-multiflow` within a [Jupyter Notebook]() we need to define\nthe proper mathplotlib backend to use. This is done by including the following magic command at the\nbeginning of the Notebook:\n\n```python\n%matplotlib notebook\n```\n\n[JupyterLab](http:\u002F\u002Fjupyterlab.readthedocs.io\u002Fen\u002Fstable\u002F) is the next-generation user interface\nfor Jupyter, currently in beta, it can display interactive plots with some caveats. If you use\nJupyterLab then the current solution is to use the \n[jupyter-matplotlib](https:\u002F\u002Fgithub.com\u002Fmatplotlib\u002Fjupyter-matplotlib) extension:\n\n```python\n%matplotlib widget\n```\n\n## Citing `scikit-multiflow`\n\nIf `scikit-multiflow` has been useful for your research and you would like to cite it in a academic\npublication, please use the following Bibtex entry:\n\n```bibtex\n@article{skmultiflow,\n  author  = {Jacob Montiel and Jesse Read and Albert Bifet and Talel Abdessalem},\n  title   = {Scikit-Multiflow: A Multi-output Streaming Framework },\n  journal = {Journal of Machine Learning Research},\n  year    = {2018},\n  volume  = {19},\n  number  = {72},\n  pages   = {1-5},\n  url     = {http:\u002F\u002Fjmlr.org\u002Fpapers\u002Fv19\u002F18-251.html}\n}\n```\n","\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fscikit-multiflow_scikit-multiflow_readme_7eed38b2942c.png\" height=\"100\"\u002F>\n\n[![构建状态](https:\u002F\u002Ftravis-ci.org\u002Fscikit-multiflow\u002Fscikit-multiflow.svg?branch=master)](https:\u002F\u002Ftravis-ci.org\u002Fscikit-multiflow\u002Fscikit-multiflow)\n[![构建状态](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fscikit-multiflow_scikit-multiflow_readme_3323fe13e747.png)](https:\u002F\u002Fdev.azure.com\u002Fscikit-multiflow\u002Fscikit-multiflow\u002F_build\u002Flatest?definitionId=1&branchName=master)\n[![codecov](https:\u002F\u002Fcodecov.io\u002Fgh\u002Fscikit-multiflow\u002Fscikit-multiflow\u002Fbranch\u002Fmaster\u002Fgraph\u002Fbadge.svg)](https:\u002F\u002Fcodecov.io\u002Fgh\u002Fscikit-multiflow\u002Fscikit-multiflow)\n![Python版本](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002Fpython-3.5%20%7C%203.6%20%7C%203.7%20%7C%203.8-blue.svg)\n[![Anaconda服务器徽章](https:\u002F\u002Fanaconda.org\u002Fconda-forge\u002Fscikit-multiflow\u002Fbadges\u002Fplatforms.svg)](https:\u002F\u002Fanaconda.org\u002Fconda-forge\u002Fscikit-multiflow)\n[![PyPI版本](https:\u002F\u002Fbadge.fury.io\u002Fpy\u002Fscikit-multiflow.svg)](https:\u002F\u002Fbadge.fury.io\u002Fpy\u002Fscikit-multiflow)\n[![Anaconda服务器徽章](https:\u002F\u002Fanaconda.org\u002Fconda-forge\u002Fscikit-multiflow\u002Fbadges\u002Fversion.svg)](https:\u002F\u002Fanaconda.org\u002Fconda-forge\u002Fscikit-multiflow)\n[![DockerHub](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002Fdocker-available-blue.svg?logo=docker)](https:\u002F\u002Fhub.docker.com\u002Fr\u002Fskmultiflow\u002Fscikit-multiflow)\n[![许可证](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FLicense-BSD%203--Clause-blue.svg)](https:\u002F\u002Fopensource.org\u002Flicenses\u002FBSD-3-Clause)\n[![Gitter](https:\u002F\u002Fbadges.gitter.im\u002Fscikit-multiflow\u002Fcommunity.svg)](https:\u002F\u002Fgitter.im\u002Fscikit-multiflow\u002Fcommunity?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)\n\n`scikit-multiflow` 是一个用于 Python 中流数据的机器学习软件包。\n\n---\n\n`creme` (https:\u002F\u002FMaxHalford.github.io\u002F) 和 `scikit-multiflow` (https:\u002F\u002Fscikit-multiflow.github.io\u002F) 正在合并为一个名为 `River` (https:\u002F\u002Fgithub.com\u002Fonline-ml\u002Friver\u002F) 的新项目。\n\n我们认为这两个项目拥有相同的愿景。我们相信，通过整合资源而非重复工作，将使双方受益。同时，我们也坚信这将惠及两个社区。新的项目将有更多的人参与开发，从而能够更高效地分配任务。这样一来，我们将能够开发更多功能，并提升项目的整体质量。\n\n两个项目都将停止积极开发。尽管在过渡期内仅进行少量维护，但两者的代码仍将继续公开可用。新软件包的架构与 creme 非常相似，它将专注于单实例增量式模型。\n\n我们鼓励用户使用 River 代替 creme。我们理解，对于现有用户来说，这一过渡在短期内需要付出额外的努力。然而，从长远来看，我们认为这对所有人都会更有利。\n\n您仍然可以安装和使用 `creme` 以及 `scikit-multiflow`。这两个项目将继续保留在 PyPI、conda-forge 和 GitHub 上。\n\n---\n\n### 快速链接\n* [官网](https:\u002F\u002Fscikit-multiflow.github.io\u002F)\n* [文档](https:\u002F\u002Fscikit-multiflow.readthedocs.io\u002Fen\u002Fstable\u002F)\n* [社区](https:\u002F\u002Fscikit-multiflow.github.io\u002Fcommunity\u002F)\n\n# 特性\n\n### 增量学习\n流式学习模型是逐步创建并持续更新的。它们非常适合对实时响应要求极高的大数据应用。\n\n### 自适应学习\n数据分布的变化会对学习造成损害。自适应方法专门设计用于在动态环境中对概念漂移变化保持鲁棒性。\n\n### 资源高效\n鉴于数据流的无限性，流式技术能够高效地管理内存和处理时间等资源。\n\n### 易于使用\nscikit-multiflow 专为任何经验水平的用户设计。实验的设计、设置和运行都非常简单。现有的方法也易于修改和扩展。\n\n### 流式学习工具\n目前，scikit-multiflow 包含数据生成器、多输出\u002F多目标流式学习方法、变化检测方法、评估方法等。\n\n### 开源\n`scikit-multiflow` 采用 [BSD 3-Clause](https:\u002F\u002Fgithub.com\u002Fscikit-multiflow\u002Fscikit-multiflow\u002Fblob\u002Fmaster\u002FLICENSE) 许可证进行分发，由一个活跃、多元且不断壮大的 [社区](\u002Fcommunity) 开发和维护。\n\n# 使用场景\n以下任务在 `scikit-multiflow` 中得到支持：\n\n### 监督学习\n当处理带标签的数据时。根据目标类型的不同，可以是分类（离散值）或回归（连续值）。\n\n### 单\u002F多输出\n单输出方法为分类预测单一目标标签（二分类或多分类），或为回归预测单一目标值。而多输出方法则可以在给定输入的情况下同时预测多个变量。\n\n### 概念漂移检测\n数据分布的变化可能会损害学习效果。漂移检测方法旨在检测到漂移时发出警报，并与学习方法结合使用，以提高其在不断变化的数据流中的鲁棒性。\n\n### 无监督学习\n当处理未标记的数据时。例如异常检测，其目标是识别出与大多数数据显著不同的罕见事件或样本。\n\n---\n\n#### Jupyter 笔记本\n为了在 [Jupyter Notebook]() 中显示 `scikit-multiflow` 的图表，我们需要定义正确的 matplotlib 后端来使用。这可以通过在笔记本的开头加入以下魔法命令来实现：\n\n```python\n%matplotlib notebook\n```\n\n[JupyterLab](http:\u002F\u002Fjupyterlab.readthedocs.io\u002Fen\u002Fstable\u002F) 是 Jupyter 的下一代用户界面，目前处于 beta 阶段，它可以显示交互式图表，但也有一些限制。如果您使用 JupyterLab，则当前的解决方案是使用 \n[jupyter-matplotlib](https:\u002F\u002Fgithub.com\u002Fmatplotlib\u002Fjupyter-matplotlib) 扩展：\n\n```python\n%matplotlib widget\n```\n\n## 引用 `scikit-multiflow`\n\n如果 `scikit-multiflow` 对您的研究有所帮助，并且您希望在学术出版物中引用它，请使用以下 Bibtex 条目：\n\n```bibtex\n@article{skmultiflow,\n  author  = {Jacob Montiel and Jesse Read and Albert Bifet and Talel Abdessalem},\n  title   = {Scikit-Multiflow: A Multi-output Streaming Framework },\n  journal = {Journal of Machine Learning Research},\n  year    = {2018},\n  volume  = {19},\n  number  = {72},\n  pages   = {1-5},\n  url     = {http:\u002F\u002Fjmlr.org\u002Fpapers\u002Fv19\u002F18-251.html}\n}\n```","# scikit-multiflow 快速上手指南\n\n`scikit-multiflow` 是一个专为 Python 设计的流数据机器学习包，支持增量学习、自适应概念漂移检测及资源高效处理。\n\n> **重要提示**：`scikit-multiflow` 与 `creme` 项目已合并为新项目 **[River](https:\u002F\u002Fgithub.com\u002Fonline-ml\u002Friver\u002F)**。虽然 `scikit-multiflow` 仍可安装使用，但官方建议新用户直接使用 River 以获得更活跃的支持和新特性。本指南仅针对仍需使用 `scikit-multiflow` 的场景。\n\n## 环境准备\n\n*   **操作系统**：Linux, macOS, Windows\n*   **Python 版本**：3.5, 3.6, 3.7, 或 3.8\n*   **前置依赖**：\n    *   `numpy`\n    *   `scipy`\n    *   `pandas`\n    *   `matplotlib` (用于绘图)\n    *   `scikit-learn`\n\n## 安装步骤\n\n推荐使用 `pip` 进行安装。国内用户可指定清华或阿里镜像源以加速下载。\n\n### 方式一：使用 pip 安装（推荐）\n\n```bash\npip install scikit-multiflow -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n```\n\n### 方式二：使用 Conda 安装\n\n如果你使用 Anaconda 或 Miniconda：\n\n```bash\nconda install -c conda-forge scikit-multiflow\n```\n\n### 方式三：Docker 安装\n\n```bash\ndocker pull skmultiflow\u002Fscikit-multiflow\n```\n\n## 基本使用\n\n以下示例演示如何创建一个简单的流数据分类任务：生成模拟数据流，初始化一个增量学习模型（如 Hoeffding Tree），并进行在线训练与评估。\n\n### 1. 在 Jupyter Notebook 中配置绘图\n若需在 Jupyter Notebook 中显示动态图表，请在单元格开头添加：\n\n```python\n%matplotlib notebook\n```\n*(注：若使用 JupyterLab，请使用 `%matplotlib widget` 并确保安装了 `jupyter-matplotlib` 扩展)*\n\n### 2. 代码示例\n\n```python\nfrom skmultiflow.data import AGRAWALGenerator\nfrom skmultiflow.trees import HoeffdingTreeClassifier\nfrom skmultiflow.evaluation import EvaluatePrequential\nfrom skmultiflow.metrics import AccuracyClassifier\n\n# 1. 创建数据流生成器 (模拟带概念漂移的数据流)\nstream = AGRAWALGenerator(classification_function=0, random_state=112)\nstream.prepare_for_use()\n\n# 2. 初始化分类器 (Hoeffding Tree 是经典的流数据决策树)\nclassifier = HoeffdingTreeClassifier()\n\n# 3. 设置评估方法 (预序贯评估 Prequential Evaluation)\nevaluator = EvaluatePrequential(\n    show_plot=True,\n    pretrain_size=200,\n    max_instances=10000,\n    n_wait=200,\n    batch_size=1,\n    output_file=None,\n    metrics=['accuracy', 'kappa'],\n    model_names=['Hoeffding Tree']\n)\n\n# 4. 运行评估\nevaluator.evaluate(stream=stream, model=classifier, model_names=['Hoeffding Tree'])\n```\n\n### 说明\n*   **AGRAWALGenerator**: 内置的数据流生成器，用于测试算法。实际生产中可替换为自定义的数据加载器。\n*   **HoeffdingTreeClassifier**: 一种适用于无限数据流的决策树算法，能够单样本增量更新。\n*   **EvaluatePrequential**: 流数据专用的评估框架，采用“先测试后训练”机制，实时反映模型性能。","某金融风控团队需要实时监测海量信用卡交易流，以便在毫秒级内识别并拦截欺诈行为。\n\n### 没有 scikit-multiflow 时\n- **模型更新滞后**：传统批量训练模式需等待数据积累到一定量才能重新训练，导致无法及时捕捉新型欺诈手段，响应延迟高达数小时。\n- **内存资源崩溃**：面对无边界的数据流，试图将历史全量数据载入内存进行训练，频繁引发服务器内存溢出（OOM）错误。\n- **概念漂移失效**：欺诈模式随时间动态变化（即概念漂移），静态模型无法自适应调整，导致误报率随时间推移急剧上升。\n- **开发验证复杂**：缺乏专用的流式评估框架，工程师需手动编写复杂的代码来模拟数据流和计算实时指标，调试成本极高。\n\n### 使用 scikit-multiflow 后\n- **增量实时更新**：利用其增量学习算法，每笔新交易到来即可单样本更新模型，实现对新欺诈模式的秒级响应与拦截。\n- **资源高效可控**：流式处理机制仅维护必要的统计信息，内存占用恒定且极低，轻松支撑全天候不间断的高吞吐数据流。\n- **自适应抗漂移**：内置的自适应方法能自动检测数据分布变化并动态调整模型权重，确保持续稳定的高准确率。\n- **实验开箱即用**：提供标准化的流式评估器和生成器，几行代码即可搭建完整的实时实验流水线，大幅缩短研发周期。\n\nscikit-multiflow 通过将批处理思维转变为流式增量思维，让机器学习模型在动态变化的大数据环境中实现了真正的实时智能与资源轻量化。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fscikit-multiflow_scikit-multiflow_7eed38b2.png","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fscikit-multiflow_49b51c38.png",null,"https:\u002F\u002Fgithub.com\u002Fscikit-multiflow",[76,80,84,88,92,95],{"name":77,"color":78,"percentage":79},"Python","#3572A5",98.7,{"name":81,"color":82,"percentage":83},"C++","#f34b7d",0.6,{"name":85,"color":86,"percentage":87},"Jupyter Notebook","#DA5B0B",0.3,{"name":89,"color":90,"percentage":91},"Dockerfile","#384d54",0.2,{"name":93,"color":94,"percentage":91},"Shell","#89e051",{"name":96,"color":97,"percentage":98},"Makefile","#427819",0,795,189,"2026-04-01T15:17:03","BSD-3-Clause",1,"Linux, macOS, Windows","未说明",{"notes":107,"python":108,"dependencies":109},"该项目已停止活跃开发，并与 creme 合并为新项目 River。官方建议新用户直接使用 River。若在 Jupyter Notebook 中绘制图表，需根据环境配置 '%matplotlib notebook' 或安装 jupyter-matplotlib 扩展并使用 '%matplotlib widget'。可通过 PyPI、conda-forge 或 Docker 安装。","3.5 | 3.6 | 3.7 | 3.8",[110,111,112,113,114],"numpy","scipy","pandas","scikit-learn","matplotlib",[14,16],[117,113,118,119,120,121,122],"scikit","stream","streaming-data","machine-learning","meka","moa","2026-03-27T02:49:30.150509","2026-04-13T22:44:09.903475",[126,131,135,140,145,150],{"id":127,"question_zh":128,"answer_zh":129,"source_url":130},32044,"为什么在使用 Filestream 读取某些 CSV 文件时会报 'NoneType' object is not subscriptable 错误？","该错误通常发生在尝试从 CSV 文件创建数据流时，特别是当文件包含浮点数或特定格式的数据时。虽然部分数据集（如 moving_squares）可以正常工作，但自定义生成并保存的 CSV 文件可能会触发此错误。建议检查 CSV 文件的格式和数据类型，确保没有缺失值或格式不一致的问题。如果问题持续，建议在用户群组中提供具体的复现代码以获取更详细的帮助。","https:\u002F\u002Fgithub.com\u002Fscikit-multiflow\u002Fscikit-multiflow\u002Fissues\u002F87",{"id":132,"question_zh":133,"answer_zh":134,"source_url":130},32045,"是否支持预quential 交叉验证评估（Prequential Cross-Validation Evaluation）？","目前 scikit-multiflow 原生不支持预quential 交叉验证评估。如果您需要此功能，可能需要自行实现相关逻辑或关注未来的版本更新。",{"id":136,"question_zh":137,"answer_zh":138,"source_url":139},32046,"如何处理具有延迟响应的数据流样本（即预测后无法立即获得真实标签）？","目前的简单示例中，模型在做出预测后会立即进行评估。对于存在延迟响应的场景（即在延迟期间产生新样本并需要预测），框架尚未直接支持延迟评估机制。这是一个已知的需求，开发团队正在讨论适当的设计方案，计划在未来版本中提出解决方案。","https:\u002F\u002Fgithub.com\u002Fscikit-multiflow\u002Fscikit-multiflow\u002Fissues\u002F35",{"id":141,"question_zh":142,"answer_zh":143,"source_url":144},32047,"为什么 Hoeffding Tree 在不同 Python 版本下的叶子节点分布结果不一致？","这是由于 Python 字典（dict）在不同版本中的行为差异导致的。在 Python 3.6+ 中，字典保持插入顺序，而在早期版本中行为不同，导致 `_observed_class_distribution` 的最终值顺序不一致。解决方案是确保字典键始终保持排序状态（例如按升序排列），而不是依赖 `OrderedDict`，因为 `OrderedDict` 仅记录插入顺序，无法满足按键值排序的需求。如果键是连续的整数，Python 3.5 可能会自动排序，但这不可靠，显式排序是关键。","https:\u002F\u002Fgithub.com\u002Fscikit-multiflow\u002Fscikit-multiflow\u002Fissues\u002F91",{"id":146,"question_zh":147,"answer_zh":148,"source_url":149},32048,"如何将运行时间和内存使用量作为评估指标进行监控和可视化？","运行时间和内存使用量已被纳入评估体系。由于时间会单调递增，而内存使用情况因算法而异，建议的处理方式是：\n1. 在图表中以文本形式显示当前的训练时间、测试时间和总时间，以及当前内存占用，而不是绘制随时间变化的曲线。\n2. 详细的时间（训练\u002F测试\u002F总计）和内存数据会记录在生成的 CSV 摘要文件中，供后续详细分析使用。\n这种设计既保持了图表的简洁性，又保留了完整的数据日志。","https:\u002F\u002Fgithub.com\u002Fscikit-multiflow\u002Fscikit-multiflow\u002Fissues\u002F18",{"id":151,"question_zh":152,"answer_zh":153,"source_url":154},32049,"scikit-multiflow 是否计划实现 Streaming Half-Space-Trees (HS-Trees) 用于异常检测？","社区已提出将 Streaming Half-Space-Trees (HS-Trees) 添加到 scikit-multiflow 的建议，这是一种快速的单类异常检测器，特别适用于演化数据流且异常数据稀少的场景。该算法已在 MOA 中实现。虽然这是一个很有价值的扩展方向，但具体实施进度需关注项目的路线图或贡献指南。","https:\u002F\u002Fgithub.com\u002Fscikit-multiflow\u002Fscikit-multiflow\u002Fissues\u002F109",[156,161,166,171,176,181,186,191,196,201],{"id":157,"version":158,"summary_zh":159,"released_at":160},239282,"0.5.3","修复了一个导致 conda-forge Windows 构建流水线报错的 bug。\n\n无功能变更。本版本的功能与 [v0.5.0](https:\u002F\u002Fscikit-multiflow.readthedocs.io\u002Fen\u002Fstable\u002Fwhats_new.html#version-0-5-0) 相同。","2020-06-17T04:59:41",{"id":162,"version":163,"summary_zh":164,"released_at":165},239283,"0.5.0","有关详细信息，请参阅相应的[变更日志条目](https:\u002F\u002Fscikit-multiflow.readthedocs.io\u002Fen\u002Fstable\u002Fwhats_new.html#version-0-5-0)。","2020-06-14T02:57:10",{"id":167,"version":168,"summary_zh":169,"released_at":170},239284,"0.4.1","有关详细信息，请参阅相应的[变更日志条目](https:\u002F\u002Fscikit-multiflow.readthedocs.io\u002Fen\u002Fstable\u002Fwhats_new.html#version-0-4-1)。","2019-09-16T00:27:29",{"id":172,"version":173,"summary_zh":174,"released_at":175},239285,"0.4.0","有关详细信息，请参阅相应的[变更日志条目](https:\u002F\u002Fscikit-multiflow.readthedocs.io\u002Fen\u002Fstable\u002Fwhats_new.html#version-0-4-0)。","2019-09-10T03:05:28",{"id":177,"version":178,"summary_zh":179,"released_at":180},239286,"0.3.0","有关详细信息，请参阅相应的[变更日志条目](https:\u002F\u002Fscikit-multiflow.readthedocs.io\u002Fen\u002Fstable\u002Fwhats_new.html#version-0-3-0)。","2019-05-23T07:12:04",{"id":182,"version":183,"summary_zh":184,"released_at":185},239287,"0.2.0","# 新特性\n* 加性专家集成（[AddExp](https:\u002F\u002Fscikit-multiflow.github.io\u002Fscikit-multiflow\u002F_autosummary\u002Fskmultiflow.meta.additive_expert_ensemble.html#module-skmultiflow.meta.additive_expert_ensemble)）[[论文](https:\u002F\u002Fdl.acm.org\u002Fcitation.cfm?id=1102408)]\n* 动态加权多数集成（[DWE](https:\u002F\u002Fscikit-multiflow.github.io\u002Fscikit-multiflow\u002F_autosummary\u002Fskmultiflow.meta.dynamic_weighted_majority.html#module-skmultiflow.meta.dynamic_weighted_majority)）[[论文](http:\u002F\u002Fwww.jmlr.org\u002Fpapers\u002Fvolume8\u002Fkolter07a\u002Fkolter07a.pdf)]\n\n# 其他\n* 从支持的 Python 版本列表中移除 `Python 3.4`，因为它已达到生命周期结束日期\n* 其他错误修复","2019-04-18T02:04:59",{"id":187,"version":188,"summary_zh":189,"released_at":190},239288,"0.1.4","\r\n### 新方法\r\n\r\n* 准确率加权集成分类器（[论文](https:\u002F\u002Fdl.acm.org\u002Fcitation.cfm?id=956778)）\r\n\r\n### 其他\r\n\r\n* 漂移检测方法的改进和错误修复\r\n* Docker 镜像的示例和文档得到优化\r\n* 其他错误修复\r\n\r\n","2019-04-02T03:36:00",{"id":192,"version":193,"summary_zh":194,"released_at":195},239289,"0.1.3","### 新方法\n* Learn++.NSE 分类器（[论文](https:\u002F\u002Fieeexplore.ieee.org\u002Fdocument\u002F5975223)）\n* 朴素贝叶斯（增量式）\n* 霍夫丁任意时间树或极速决策树（[论文](https:\u002F\u002Fdl.acm.org\u002Fcitation.cfm?id=3219819.3220005)）\n\n### 其他\n* 少量性能优化\n* 各种错误修复\n\n","2019-03-07T10:13:59",{"id":197,"version":198,"summary_zh":199,"released_at":200},239290,"0.1.2","### 新方法\n- Learn++ 分类器（[论文](https:\u002F\u002Fieeexplore.ieee.org\u002Fdocument\u002F983933)）\n\n### 杂项\n- 在将所有预测结果保存到文件时，提升评估器的性能\n- 其他错误修复","2019-02-20T09:57:49",{"id":202,"version":203,"summary_zh":204,"released_at":205},239291,"0.1.0","`scikit-multiflow` 的初始（小型）发布。\n\n自此，`scikit-multiflow` 也将通过 [PyPI](https:\u002F\u002Fpypi.org\u002Fproject\u002Fscikit-multiflow\u002F) 提供。","2019-01-23T18:56:26"]