[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-rasbt--pattern_classification":3,"tool-rasbt--pattern_classification":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",149489,2,"2026-04-10T11:32:46",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",108322,"2026-04-10T11:39:34",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[52,13,15,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":32,"last_commit_at":59,"category_tags":60,"status":17},4721,"markitdown","microsoft\u002Fmarkitdown","MarkItDown 是一款由微软 AutoGen 团队打造的轻量级 Python 工具，专为将各类文件高效转换为 Markdown 格式而设计。它支持 PDF、Word、Excel、PPT、图片（含 OCR）、音频（含语音转录）、HTML 乃至 YouTube 链接等多种格式的解析，能够精准提取文档中的标题、列表、表格和链接等关键结构信息。\n\n在人工智能应用日益普及的今天，大语言模型（LLM）虽擅长处理文本，却难以直接读取复杂的二进制办公文档。MarkItDown 恰好解决了这一痛点，它将非结构化或半结构化的文件转化为模型“原生理解”且 Token 效率极高的 Markdown 格式，成为连接本地文件与 AI 分析 pipeline 的理想桥梁。此外，它还提供了 MCP（模型上下文协议）服务器，可无缝集成到 Claude Desktop 等 LLM 应用中。\n\n这款工具特别适合开发者、数据科学家及 AI 研究人员使用，尤其是那些需要构建文档检索增强生成（RAG）系统、进行批量文本分析或希望让 AI 助手直接“阅读”本地文件的用户。虽然生成的内容也具备一定可读性，但其核心优势在于为机器",93400,"2026-04-06T19:52:38",[52,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":72,"owner_name":73,"owner_avatar_url":74,"owner_bio":75,"owner_company":76,"owner_location":76,"owner_email":76,"owner_twitter":72,"owner_website":77,"owner_url":78,"languages":79,"stars":103,"forks":104,"last_commit_at":105,"license":106,"difficulty_score":107,"env_os":108,"env_gpu":108,"env_ram":108,"env_deps":109,"category_tags":118,"github_topics":119,"view_count":32,"oss_zip_url":76,"oss_zip_packed_at":76,"status":17,"created_at":123,"updated_at":124,"faqs":125,"releases":141},6355,"rasbt\u002Fpattern_classification","pattern_classification","A collection of tutorials and examples for solving and understanding machine learning and pattern classification tasks","pattern_classification 是一个专注于机器学习与模式分类的开源教程合集，旨在帮助学习者系统掌握从数据预处理到模型评估的全流程技能。它通过丰富的代码示例和详细文档，解决了初学者在面对复杂算法时难以理解理论落地、缺乏实战参考的痛点。\n\n该项目内容涵盖特征提取、数据标准化、降维技术（如 PCA 和 LDA）、经典分类算法（贝叶斯、逻辑回归、神经网络）以及集成学习等核心主题。其独特亮点在于提供了大量基于 Python 科学计算栈（特别是 scikit-learn）的 IPython Notebook 交互式教程，并附带清晰的监督学习流程图，让抽象的数学原理变得直观易懂。\n\n无论是刚入门的数据科学学生、需要快速查阅算法实现的开发者，还是希望巩固基础的研究人员，都能从中获益。如果你正在寻找一份结构清晰、注重实践且免费的学习资源来开启或深化机器学习之旅，pattern_classification 将是非常理想的选择。","\n\n\n\n\n![logo](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frasbt_pattern_classification_readme_b4a5a002390f.png)\n\n\u003Chr>\n**Tutorials, examples, collections, and everything else that falls into the categories: pattern classification, machine learning, and data mining.**\n\u003Cbr>\n\u003Cbr>\n\n\n\u003Cbr>\n\u003Cbr>\n\n# Sections\n\n\n- [Introduction to Machine Learning and Pattern Classification](#introduction-to-machine-learning-and-pattern-classification)\n- [Pre-Processing](#pre-processing)\n- [Model Evaluation](#model-evaluation)\n- [Parameter Estimation](#parameter-estimation)\n- [Machine Learning Algorithms](#machine-learning-algorithms)\n\t- [Bayes Classification](#bayes-classification)\n\t- [Logistic Regression](#logistic-regression)\n\t- [Neural Networks](#neural-networks)\n\t- [Ensemble Methods](#ensemble-methods)\n- [Clustering](#clustering)\n- [Collecting Data](#collecting-data)\n- [Data Visualization](#data-visualization)\n- [Statistical Pattern Classification Examples](#statistical-pattern-classification-examples)\n- [Books](#books)\n- [Talks](#talks)\n- [Applications](#applications)\n- [Resources](#resources)\n\n\u003Cbr>\n\n\n\n\u003Cbr>\n\n\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frasbt_pattern_classification_readme_28b4860796a1.png\" style=\"width: 700px; height:600px;\">\n\n[[Download a PDF version](https:\u002F\u002Fgithub.com\u002Frasbt\u002Fpattern_classification\u002Fraw\u002Fmaster\u002FPDFs\u002Fsupervised_learning_flowchart.pdf)] of this flowchart.\n\n\u003Cbr>\n\u003Cbr>\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n### Introduction to Machine Learning and Pattern Classification\n[[back to top](#sections)]\n\n- Predictive modeling, supervised machine learning, and pattern classification - the big picture [[Markdown](.\u002Fmachine_learning\u002Fsupervised_intro\u002Fintroduction_to_supervised_machine_learning.md)]\n\n- Entry Point: Data - Using Python's sci-packages to prepare data for Machine Learning tasks and other data analyses [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fmachine_learning\u002Fscikit-learn\u002Fpython_data_entry_point.ipynb)]\n\n- An Introduction to simple linear supervised classification using `scikit-learn` [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fmachine_learning\u002Fscikit-learn\u002Fscikit_linear_classification.ipynb)]\n\n\n\n\u003Cbr>\n\u003Cbr>\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n### Pre-processing\n\n[[back to top](#sections)]\n\n- **Feature Extraction**\n\t- Tips and Tricks for Encoding Categorical Features in Classification Tasks [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fpreprocessing\u002Ffeature_encoding.ipynb)]\n- **Scaling and Normalization**\n\t- About Feature Scaling: Standardization and Min-Max-Scaling (Normalization) [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fpreprocessing\u002Fabout_standardization_normalization.ipynb)]\n\n\n- **Feature Selection**\n\t- Sequential Feature Selection Algorithms [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fdimensionality_reduction\u002Ffeature_selection\u002Fsequential_selection_algorithms.ipynb)]\n\n- **Dimensionality Reduction**\n\t- Principal Component Analysis (PCA) [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fdimensionality_reduction\u002Fprojection\u002Fprincipal_component_analysis.ipynb)]\n\t- The effect of scaling and mean centering of variables prior to a PCA [[PDF](https:\u002F\u002Fgithub.com\u002Frasbt\u002Fpattern_classification\u002Fraw\u002Fmaster\u002Fdimensionality_reduction\u002Fprojection\u002Fscale_center_pca\u002Fscale_center_pca.pdf)] [[HTML](http:\u002F\u002Fhtmlpreview.github.io\u002F?https:\u002F\u002Fraw.githubusercontent.com\u002Frasbt\u002Fpattern_classification\u002Fmaster\u002Fdimensionality_reduction\u002Fprojection\u002Fscale_center_pca\u002Fscale_center_pca.html)]\n\t- PCA based on the covariance vs. correlation matrix  [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fdimensionality_reduction\u002Fprojection\u002Fpca_cov_cor.ipynb)]\n  - Linear Discriminant Analysis (LDA) [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fdimensionality_reduction\u002Fprojection\u002Flinear_discriminant_analysis.ipynb)]\n\t- Kernel tricks and nonlinear dimensionality reduction via PCA [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fdimensionality_reduction\u002Fprojection\u002Fkernel_pca.ipynb)]\n\n- **Representing Text**\n\t- Tf-idf Walkthrough for scikit-learn [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fmachine_learning\u002Fscikit-learn\u002Ftfidf_scikit-learn.ipynb)]\n\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n### Model Evaluation\n[[back to top](#sections)]\n\n- An Overview of General Performance Metrics of Binary Classifier Systems [[PDF](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1410.5330.pdf)]\n- **Cross-validation**\n\t- Streamline your cross-validation workflow - scikit-learn's Pipeline in action [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fmachine_learning\u002Fscikit-learn\u002Fscikit-pipeline.ipynb)]\n- Model evaluation, model selection, and algorithm selection in machine learning - Part I [[Markdown](evaluation\u002Fmodel-evaluation\u002Fmodel-evaluation-selection-part1.md)]\n- Model evaluation, model selection, and algorithm selection in machine learning - Part II [[Markdown](evaluation\u002Fmodel-evaluation\u002Fmodel-evaluation-selection-part2.md)]\n\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n### Parameter Estimation\n[[back to top](#sections)]\n\n- **Parametric Techniques**\n    - Introduction to the Maximum Likelihood Estimate (MLE) [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fparameter_estimation_techniques\u002Fmaximum_likelihood_estimate.ipynb)]\n    - How to calculate Maximum Likelihood Estimates (MLE) for different distributions [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fparameter_estimation_techniques\u002Fmax_likelihood_est_distributions.ipynb)]\n\n- **Non-Parametric Techniques**\n\t- Kernel density estimation via the Parzen-window technique [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fparameter_estimation_techniques\u002Fparzen_window_technique.ipynb)]\n\t- The K-Nearest Neighbor (KNN) technique\n\n\n- **Regression Analysis**\n\t- Linear Regression\n\t\t- Least-Squares fit [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fdata_fitting\u002Fregression\u002Flinregr_least_squares_fit.ipynb)]\n\n   - Non-Linear Regression\n\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n\n\n\n### Machine Learning Algorithms\n[[back to top](#sections)]\n\n\n#### Bayes Classification\n\n- Naive Bayes and Text Classification I - Introduction and Theory [[PDF](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1410.5329.pdf)]\n#### Logistic Regression\n\n- Out-of-core Learning and Model Persistence using scikit-learn\n[[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fmachine_learning\u002Fscikit-learn\u002Foutofcore_modelpersistence.ipynb)]\n\n#### Neural Networks\n\n- Artificial Neurons and Single-Layer Neural Networks - How Machine Learning Algorithms Work Part 1 [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fmachine_learning\u002Fsinglelayer_neural_networks\u002Fsinglelayer_neural_networks.ipynb)]\n\n- Activation Function Cheatsheet [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fmachine_learning\u002Fneural_networks\u002Fipynb\u002Factivation_functions.ipynb)]\n\n#### Ensemble Methods\n\n- Implementing a Weighted Majority Rule Ensemble Classifier in scikit-learn  [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fmachine_learning\u002Fscikit-learn\u002Fensemble_classifier.ipynb)]\n\n#### Decision Trees\n\n- Cheatsheet for Decision Tree Classification [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fmachine_learning\u002Fdecision_trees\u002Fdecision-tree-cheatsheet.ipynb)]\n\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n### Clustering\n[[back to top](#sections)]\n\n- **Protoype-based clustering**\n- **Hierarchical clustering**\n\t- Complete-Linkage Clustering and Heatmaps in Python [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fclustering\u002Fhierarchical\u002Fclust_complete_linkage.ipynb)]\n- **Density-based clustering**\n- **Graph-based clustering**\n- **Probabilistic-based clustering**\n\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n\n\n## Collecting Data\n[[back to top](#sections)]\n\n- Collecting Fantasy Soccer Data with Python and Beautiful Soup [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fdata_collecting\u002Fparse_dreamteamfc_data.ipynb)]\n\n- Download Your Twitter Timeline and Turn into a Word Cloud Using Python [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fdata_collecting\u002Ftwitter_wordcloud.ipynb)]\n\n- Reading MNIST into NumPy arrays [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fdata_collecting\u002Freading_mnist.ipynb)]\n\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n## Data Visualization\n[[back to top](#sections)]\n\n- Exploratory Analysis of the Star Wars API  [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fdata_viz\u002Fswapi_viz.ipynb)]\n\n![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frasbt_pattern_classification_readme_c07d15f9e496.png)\n\n- Matplotlib examples -Exploratory data analysis of the Iris dataset [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fdata_viz\u002Fmatplotlib_viz_gallery.ipynb)]\n\n![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frasbt_pattern_classification_readme_137119cad066.png)\n\n- Artificial Intelligence publications per country\n\n[[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fdata_viz\u002Fai_publication_chloropleth\u002Fai_publication_chloropleth.ipynb)] [[PDF](.\u002Fdata_viz\u002Fai_publication_chloropleth\u002Fimages\u002Fai_publication_chloropleth_small.pdf)]\n\n![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frasbt_pattern_classification_readme_c0e54e5d8646.png)\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n\n### Statistical Pattern Classification Examples\n[[back to top](#sections)]\n\n- **Supervised Learning**\n\n    - Parametric Techniques\n    \t- Univariate Normal Density\n    \t\t- Ex1: 2-classes, equal variances, equal priors [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fstat_pattern_class\u002Fsupervised\u002Fparametric\u002F1_stat_superv_parametric.ipynb)]\n\t\t\t- Ex2: 2-classes, different variances, equal priors [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fstat_pattern_class\u002Fsupervised\u002Fparametric\u002F2_stat_superv_parametric.ipynb)]\n\t\t\t- Ex3: 2-classes, equal variances, different priors [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fstat_pattern_class\u002Fsupervised\u002Fparametric\u002F3_stat_superv_parametric.ipynb)]\n\t\t\t- Ex4: 2-classes, different variances, different priors, loss function [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fstat_pattern_class\u002Fsupervised\u002Fparametric\u002F4_stat_superv_parametric.ipynb)]\n\t\t\t- Ex5: 2-classes, different variances, equal priors, loss function, cauchy distr. [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fstat_pattern_class\u002Fsupervised\u002Fparametric\u002F5_stat_superv_parametric.ipynb)]\n\n\n\n    \t- Multivariate Normal Density\n\t\t\t- Ex5: 2-classes, different variances, equal priors, loss function [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fstat_pattern_class\u002Fsupervised\u002Fparametric\u002F5_stat_superv_parametric.ipynb)]\n\t\t\t- Ex7: 2-classes, equal variances, equal priors [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fstat_pattern_class\u002Fsupervised\u002Fparametric\u002F7_stat_superv_parametric.ipynb)]\n\n    - Non-Parametric Techniques\n\n\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n## Books\n[[back to top](#sections)]\n\n#### Python Machine Learning\n\n\u003Ca href='http:\u002F\u002Fsebastianraschka.com\u002Fpublications.html'>![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frasbt_pattern_classification_readme_8d589a4e9ae8.png)\u003C\u002Fa>\n\n- [Amazon link](http:\u002F\u002Fwww.amazon.com\u002FPython-Machine-Learning-Sebastian-Raschka\u002Fdp\u002F1783555130\u002Fref=sr_1_2?ie=UTF8&qid=1437754343&sr=8-2&keywords=python+machine+learning+essentials)\n- [Publisher Link](https:\u002F\u002Fwww.packtpub.com\u002Fbig-data-and-business-intelligence\u002Fpython-machine-learning)\n- [GitHub Code Repository](https:\u002F\u002Fgithub.com\u002Frasbt\u002Fpython-machine-learning-book)\n\n\n\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n## Talks\n[[back to top](#sections)]\n\n#### An Introduction to Supervised Machine Learning and Pattern Classification: The Big Picture\n\n\n\u003Ca href='http:\u002F\u002Fwww.slideshare.net\u002FSebastianRaschka\u002Fnextgen-talk-022015'>![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frasbt_pattern_classification_readme_a4c738bab6fb.png)\u003C\u002Fa>\n\n[[View on SlideShare](http:\u002F\u002Fwww.slideshare.net\u002FSebastianRaschka\u002Fnextgen-talk-022015)]\n\n[[Download PDF](https:\u002F\u002Fgithub.com\u002Frasbt\u002Fpattern_classification\u002Fraw\u002Fmaster\u002FPDFs\u002Fnextgentalk022015.pdf)]\n\n\n\u003Cbr>\n\u003Cbr>\n\n\n\n\n\n#### MusicMood - Machine Learning in Automatic Music Mood Prediction Based on Song Lyrics\n\n\u003Ca href='http:\u002F\u002Fwww.slideshare.net\u002FSebastianRaschka\u002Fmusicmood-20140912'> ![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frasbt_pattern_classification_readme_f3c78a73e9de.png)\u003C\u002Fa>\n\n[[View on SlideShare](http:\u002F\u002Fwww.slideshare.net\u002FSebastianRaschka\u002Fmusicmood-20140912)]\n\n\n[[Download PDF](https:\u002F\u002Fgithub.com\u002Frasbt\u002Fpattern_classification\u002Fraw\u002Fmaster\u002FPDFs\u002Fmusicmood20140912.pdf)]\n\n\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n## Applications\n[[back to top](#sections)]\n\n#### MusicMood - Machine Learning in Automatic Music Mood Prediction Based on Song Lyrics\n\nThis project is about building a music recommendation system for users who want to listen to happy songs. Such a system can not only be used to brighten up one's mood on a rainy weekend; especially in hospitals, other medical clinics, or public locations such as restaurants, the MusicMood classifier could be used to spread positive mood among people.\n\n![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frasbt_pattern_classification_readme_ad42649337f8.png)\n\n[[musicmood GitHub Repository](https:\u002F\u002Fgithub.com\u002Frasbt\u002Fmusicmood)]\n\n\u003Cbr>\n\n#### mlxtend - A library of extension and helper modules for Python's data analysis and machine learning libraries.\n\n![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frasbt_pattern_classification_readme_365e2c451ff9.png)\n\n[[mlxtend GitHub Repository](https:\u002F\u002Fgithub.com\u002Frasbt\u002Fmlxtend)]\n\n\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n## Resources\n[[back to top](#sections)]\n\n\n\n- Copy-and-paste ready LaTex equations [[Markdown](.\u002Fresources\u002Flatex_equations.md)]\n\n- Open-source datasets [[Markdown](.\u002Fresources\u002Fdataset_collections.md)]\n\n- Free Machine Learning eBooks [[Markdown](.\u002Fresources\u002Fmachine_learning_ebooks.md)]\n\n- Terms in data science defined in less than 50 words [[Markdown](.\u002Fresources\u002Fdata_glossary.md)]\n\n- Useful libraries for data science in Python [[Markdown](.\u002Fresources\u002Fpython_data_libraries.md)]\n\n- General Tips and Advices [[Markdown](.\u002Fresources\u002Fgeneral_tips_and_advices.md)]\n\n- A matrix cheatsheat for Python, R, Julia, and MATLAB  [[HTML](http:\u002F\u002Fsebastianraschka.com\u002FArticles\u002F2014_matrix_cheatsheet_table.html)]\n","![logo](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frasbt_pattern_classification_readme_b4a5a002390f.png)\n\n\u003Chr>\n**教程、示例、资料合集，以及所有属于模式分类、机器学习和数据挖掘范畴的内容。**\n\u003Cbr>\n\u003Cbr>\n\n\n\u003Cbr>\n\u003Cbr>\n\n# 章节\n\n\n- [机器学习与模式分类简介](#introduction-to-machine-learning-and-pattern-classification)\n- [预处理](#pre-processing)\n- [模型评估](#model-evaluation)\n- [参数估计](#parameter-estimation)\n- [机器学习算法](#machine-learning-algorithms)\n\t- [贝叶斯分类](#bayes-classification)\n\t- [逻辑回归](#logistic-regression)\n\t- [神经网络](#neural-networks)\n\t- [集成方法](#ensemble-methods)\n- [聚类](#clustering)\n- [数据收集](#collecting-data)\n- [数据可视化](#data-visualization)\n- [统计模式分类示例](#statistical-pattern-classification-examples)\n- [书籍](#books)\n- [讲座](#talks)\n- [应用](#applications)\n- [资源](#resources)\n\n\u003Cbr>\n\n\n\n\u003Cbr>\n\n\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frasbt_pattern_classification_readme_28b4860796a1.png\" style=\"width: 700px; height:600px;\">\n\n[[下载PDF版本](https:\u002F\u002Fgithub.com\u002Frasbt\u002Fpattern_classification\u002Fraw\u002Fmaster\u002FPDFs\u002Fsupervised_learning_flowchart.pdf)] 此流程图。\n\n\u003Cbr>\n\u003Cbr>\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n### 机器学习与模式分类简介\n[[返回顶部](#sections)]\n\n- 预测建模、监督式机器学习和模式分类——宏观视角 [[Markdown](.\u002Fmachine_learning\u002Fsupervised_intro\u002Fintroduction_to_supervised_machine_learning.md)]\n\n- 入门：数据——使用Python的科学计算包为机器学习任务及其他数据分析准备数据 [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fmachine_learning\u002Fscikit-learn\u002Fpython_data_entry_point.ipynb)]\n\n- 使用`scikit-learn`进行简单线性监督分类入门 [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fmachine_learning\u002Fscikit-learn\u002Fscikit_linear_classification.ipynb)]\n\n\n\n\u003Cbr>\n\u003Cbr>\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n### 预处理\n\n[[返回顶部](#sections)]\n\n- **特征提取**\n\t- 分类任务中对类别型特征进行编码的技巧与窍门 [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fpreprocessing\u002Ffeature_encoding.ipynb)]\n- **缩放与归一化**\n\t- 关于特征缩放：标准化与最小-最大缩放（归一化） [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fpreprocessing\u002Fabout_standardization_normalization.ipynb)]\n\n\n- **特征选择**\n\t- 顺序特征选择算法 [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fdimensionality_reduction\u002Ffeature_selection\u002Fsequential_selection_algorithms.ipynb)]\n\n- **降维**\n\t- 主成分分析（PCA） [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fdimensionality_reduction\u002Fprojection\u002Fprincipal_component_analysis.ipynb)]\n\t- 变量在进行PCA之前进行缩放和均值中心化的影响 [[PDF](https:\u002F\u002Fgithub.com\u002Frasbt\u002Fpattern_classification\u002Fraw\u002Fmaster\u002Fdimensionality_reduction\u002Fprojection\u002Fscale_center_pca\u002Fscale_center_pca.pdf)] [[HTML](http:\u002F\u002Fhtmlpreview.github.io\u002F?https:\u002F\u002Fraw.githubusercontent.com\u002Frasbt\u002Fpattern_classification\u002Fmaster\u002Fdimensionality_reduction\u002Fprojection\u002Fscale_center_pca\u002Fscale_center_pca.html)]\n\t- 基于协方差矩阵与相关矩阵的PCA [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fdimensionality_reduction\u002Fprojection\u002Fpca_cov_cor.ipynb)]\n\t- 线性判别分析（LDA） [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fdimensionality_reduction\u002Fprojection\u002Flinear_discriminant_analysis.ipynb)]\n\t- 核技巧及基于PCA的非线性降维 [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fdimensionality_reduction\u002Fprojection\u002Fkernel_pca.ipynb)]\n\n- **文本表示**\n\t- scikit-learn中的TF-IDF详解 [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fmachine_learning\u002Fscikit-learn\u002Ftfidf_scikit-learn.ipynb)]\n\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n### 模型评估\n[[返回顶部](#sections)]\n\n- 二分类器系统通用性能指标概述 [[PDF](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1410.5330.pdf)]\n- **交叉验证**\n\t- 简化你的交叉验证工作流——scikit-learn的Pipeline实战 [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fmachine_learning\u002Fscikit-learn\u002Fscikit-pipeline.ipynb)]\n- 机器学习中的模型评估、模型选择与算法选择——第一部分 [[Markdown](evaluation\u002Fmodel-evaluation\u002Fmodel-evaluation-selection-part1.md)]\n- 机器学习中的模型评估、模型选择与算法选择——第二部分 [[Markdown](evaluation\u002Fmodel-evaluation\u002Fmodel-evaluation-selection-part2.md)]\n\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n### 参数估计\n[[返回顶部](#sections)]\n\n- **参数化技术**\n    - 最大似然估计（MLE）简介 [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fparameter_estimation_techniques\u002Fmaximum_likelihood_estimate.ipynb)]\n    - 如何计算不同分布的最大似然估计（MLE） [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fparameter_estimation_techniques\u002Fmax_likelihood_est_distributions.ipynb)]\n\n- **非参数化技术**\n\t- 基于Parzen窗技术的核密度估计 [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fparameter_estimation_techniques\u002Fparzen_window_technique.ipynb)]\n\t- K近邻（KNN）技术\n\n\n- **回归分析**\n\t- 线性回归\n\t\t- 最小二乘法拟合 [[IPython nb](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fdata_fitting\u002Fregression\u002Flinregr_least_squares_fit.ipynb)]\n\n   - 非线性回归\n\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n### 机器学习算法\n[[返回顶部](#sections)]\n\n\n#### 贝叶斯分类\n\n- 朴素贝叶斯与文本分类 I - 简介与理论 [[PDF](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1410.5329.pdf)]\n#### 逻辑回归\n\n- 使用 scikit-learn 进行外存学习和模型持久化\n[[IPython 笔记本](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fmachine_learning\u002Fscikit-learn\u002Foutofcore_modelpersistence.ipynb)]\n\n#### 神经网络\n\n- 人工神经元与单层神经网络 - 机器学习算法的工作原理 第一部分 [[IPython 笔记本](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fmachine_learning\u002Fsinglelayer_neural_networks\u002Fsinglelayer_neural_networks.ipynb)]\n\n- 激活函数速查表 [[IPython 笔记本](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fmachine_learning\u002Fneural_networks\u002Fipynb\u002Factivation_functions.ipynb)]\n\n#### 集成方法\n\n- 在 scikit-learn 中实现加权多数规则集成分类器 [[IPython 笔记本](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fmachine_learning\u002Fscikit-learn\u002Fensemble_classifier.ipynb)]\n\n#### 决策树\n\n- 决策树分类速查表 [[IPython 笔记本](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fmachine_learning\u002Fdecision_trees\u002Fdecision-tree-cheatsheet.ipynb)]\n\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n### 聚类\n[[返回顶部](#sections)]\n\n- **基于原型的聚类**\n- **层次聚类**\n\t- 完全链接聚类与 Python 中的热图 [[IPython 笔记本](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fclustering\u002Fhierarchical\u002Fclust_complete_linkage.ipynb)]\n- **基于密度的聚类**\n- **基于图的聚类**\n- **基于概率的聚类**\n\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n\n\n## 数据收集\n[[返回顶部](#sections)]\n\n- 使用 Python 和 Beautiful Soup 收集梦幻足球数据 [[IPython 笔记本](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fdata_collecting\u002Fparse_dreamteamfc_data.ipynb)]\n\n- 下载你的 Twitter 时间线并用 Python 转换为词云 [[IPython 笔记本](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fdata_collecting\u002Ftwitter_wordcloud.ipynb)]\n\n- 将 MNIST 数据读取到 NumPy 数组中 [[IPython 笔记本](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fdata_collecting\u002Freading_mnist.ipynb)]\n\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n## 数据可视化\n[[返回顶部](#sections)]\n\n- 星球大战 API 的探索性分析 [[IPython 笔记本](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fdata_viz\u002Fswapi_viz.ipynb)]\n\n![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frasbt_pattern_classification_readme_c07d15f9e496.png)\n\n- Matplotlib 示例 - Iris 数据集的探索性数据分析 [[IPython 笔记本](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fdata_viz\u002Fmatplotlib_viz_gallery.ipynb)]\n\n![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frasbt_pattern_classification_readme_137119cad066.png)\n\n- 各国人工智能出版物分布\n\n[[IPython 笔记本](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fdata_viz\u002Fai_publication_chloropleth\u002Fai_publication_chloropleth.ipynb)] [[PDF](.\u002Fdata_viz\u002Fai_publication_chloropleth\u002Fimages\u002Fai_publication_chloropleth_small.pdf)]\n\n![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frasbt_pattern_classification_readme_c0e54e5d8646.png)\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n\n### 统计模式分类示例\n[[返回顶部](#sections)]\n\n- **监督学习**\n\n    - 参数化方法\n    \t- 单变量正态密度\n    \t\t- 例1：2 类，方差相等，先验概率相等 [[IPython 笔记本](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fstat_pattern_class\u002Fsupervised\u002Fparametric\u002F1_stat_superv_parametric.ipynb)]\n\t\t\t- 例2：2 类，方差不等，先验概率相等 [[IPython 笔记本](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fstat_pattern_class\u002Fsupervised\u002Fparametric\u002F2_stat_superv_parametric.ipynb)]\n\t\t\t- 例3：2 类，方差相等，先验概率不等 [[IPython 笔记本](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fstat_pattern_class\u002Fsupervised\u002Fparametric\u002F3_stat_superv_parametric.ipynb)]\n\t\t\t- 例4：2 类，方差不等，先验概率不等，使用损失函数 [[IPython 笔记本](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fstat_pattern_class\u002Fsupervised\u002Fparametric\u002F4_stat_superv_parametric.ipynb)]\n\t\t\t- 例5：2 类，方差不等，先验概率相等，使用损失函数，采用柯西分布 [[IPython 笔记本](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fstat_pattern_class\u002Fsupervised\u002Fparametric\u002F5_stat_superv_parametric.ipynb)]\n\n\n\n    \t- 多变量正态密度\n\t\t\t- 例5：2 类，方差不等，先验概率相等，使用损失函数 [[IPython 笔记本](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fstat_pattern_class\u002Fsupervised\u002Fparametric\u002F5_stat_superv_parametric.ipynb)]\n\t\t\t- 例7：2 类，方差相等，先验概率相等 [[IPython 笔记本](http:\u002F\u002Fnbviewer.ipython.org\u002Fgithub\u002Frasbt\u002Fpattern_classification\u002Fblob\u002Fmaster\u002Fstat_pattern_class\u002Fsupervised\u002Fparametric\u002F7_stat_superv_parametric.ipynb)]\n\n    - 非参数化方法\n\n\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n## 书籍\n[[返回顶部](#sections)]\n\n#### Python 机器学习\n\n\u003Ca href='http:\u002F\u002Fsebastianraschka.com\u002Fpublications.html'>![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frasbt_pattern_classification_readme_8d589a4e9ae8.png)\u003C\u002Fa>\n\n- [亚马逊链接](http:\u002F\u002Fwww.amazon.com\u002FPython-Machine-Learning-Sebastian-Raschka\u002Fdp\u002F1783555130\u002Fref=sr_1_2?ie=UTF8&qid=1437754343&sr=8-2&keywords=python+machine+learning+essentials)\n- [出版社链接](https:\u002F\u002Fwww.packtpub.com\u002Fbig-data-and-business-intelligence\u002Fpython-machine-learning)\n- [GitHub 代码仓库](https:\u002F\u002Fgithub.com\u002Frasbt\u002Fpython-machine-learning-book)\n\n\n\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n## 讲座\n[[返回顶部](#sections)]\n\n#### 监督机器学习与模式分类导论：全局视角\n\n\n\u003Ca href='http:\u002F\u002Fwww.slideshare.net\u002FSebastianRaschka\u002Fnextgen-talk-022015'>![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frasbt_pattern_classification_readme_a4c738bab6fb.png)\u003C\u002Fa>\n\n[[在 SlideShare 上查看](http:\u002F\u002Fwww.slideshare.net\u002FSebastianRaschka\u002Fnextgen-talk-022015)]\n\n[[下载 PDF](https:\u002F\u002Fgithub.com\u002Frasbt\u002Fpattern_classification\u002Fraw\u002Fmaster\u002FPDFs\u002Fnextgentalk022015.pdf)]\n\n\n\u003Cbr>\n\u003Cbr>\n\n\n\n\n\n#### MusicMood - 基于歌词的自动音乐情绪预测中的机器学习\n\n\u003Ca href='http:\u002F\u002Fwww.slideshare.net\u002FSebastianRaschka\u002Fmusicmood-20140912'> ![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frasbt_pattern_classification_readme_f3c78a73e9de.png)\u003C\u002Fa>\n\n[[在 SlideShare 上查看](http:\u002F\u002Fwww.slideshare.net\u002FSebastianRaschka\u002Fmusicmood-20140912)]\n\n\n[[下载 PDF](https:\u002F\u002Fgithub.com\u002Frasbt\u002Fpattern_classification\u002Fraw\u002Fmaster\u002FPDFs\u002Fmusicmood20140912.pdf)]\n\n\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n## 应用\n[[返回顶部](#sections)]\n\n#### MusicMood - 基于歌词的自动音乐情绪预测中的机器学习\n\n该项目旨在为希望聆听欢快歌曲的用户构建一个音乐推荐系统。这样的系统不仅可以在阴雨绵绵的周末用来愉悦心情；特别是在医院、其他医疗机构或餐厅等公共场所，MusicMood 分类器还可以用来在人群中传播积极的情绪。\n\n![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frasbt_pattern_classification_readme_ad42649337f8.png)\n\n[[musicmood GitHub 仓库](https:\u002F\u002Fgithub.com\u002Frasbt\u002Fmusicmood)]\n\n\u003Cbr>\n\n#### mlxtend - 用于 Python 数据分析和机器学习库的扩展及辅助模块库。\n\n![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frasbt_pattern_classification_readme_365e2c451ff9.png)\n\n[[mlxtend GitHub 仓库](https:\u002F\u002Fgithub.com\u002Frasbt\u002Fmlxtend)]\n\n\n\u003Cbr>\n\u003Chr>\n\u003Cbr>\n\n## 资源\n[[返回顶部](#sections)]\n\n\n\n- 可直接复制粘贴的 LaTeX 公式 [[Markdown](.\u002Fresources\u002Flatex_equations.md)]\n\n- 开源数据集 [[Markdown](.\u002Fresources\u002Fdataset_collections.md)]\n\n- 免费的机器学习电子书 [[Markdown](.\u002Fresources\u002Fmachine_learning_ebooks.md)]\n\n- 用不到 50 字解释的数据科学术语 [[Markdown](.\u002Fresources\u002Fdata_glossary.md)]\n\n- Python 中常用的数据科学库 [[Markdown](.\u002Fresources\u002Fpython_data_libraries.md)]\n\n- 通用技巧与建议 [[Markdown](.\u002Fresources\u002Fgeneral_tips_and_advices.md)]\n\n- Python、R、Julia 和 MATLAB 的矩阵速查表 [[HTML](http:\u002F\u002Fsebastianraschka.com\u002FArticles\u002F2014_matrix_cheatsheet_table.html)]","# pattern_classification 快速上手指南\n\n`pattern_classification` 是一个由 Sebastian Raschka 维护的开源知识库，汇集了模式分类、机器学习和数据挖掘领域的教程、示例代码及理论讲解。该项目主要基于 Python 和 `scikit-learn`，通过 Jupyter Notebook 形式提供从数据预处理到模型评估的完整学习路径。\n\n## 环境准备\n\n本项目主要依赖 Python 科学计算生态栈。建议在使用前确保系统已安装以下基础环境：\n\n*   **操作系统**：Windows, macOS, 或 Linux\n*   **Python 版本**：推荐 Python 3.8+\n*   **核心依赖库**：\n    *   `numpy`\n    *   `scipy`\n    *   `scikit-learn`\n    *   `matplotlib`\n    *   `pandas`\n    *   `jupyter` (用于运行 `.ipynb` 笔记文件)\n\n> **提示**：对于国内开发者，建议在安装依赖时使用清华或阿里云镜像源以加速下载。\n\n## 安装步骤\n\n### 1. 克隆项目仓库\n\n首先，将项目代码克隆到本地：\n\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Frasbt\u002Fpattern_classification.git\ncd pattern_classification\n```\n\n### 2. 创建虚拟环境并安装依赖\n\n推荐使用 `conda` 或 `venv` 创建隔离环境。以下是使用 `pip` 配合国内镜像源安装所需库的命令：\n\n```bash\n# 创建虚拟环境 (可选)\npython -m venv ml_env\nsource ml_env\u002Fbin\u002Factivate  # Windows 用户请使用: ml_env\\Scripts\\activate\n\n# 安装核心依赖 (使用清华镜像源)\npip install -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple numpy scipy scikit-learn matplotlib pandas jupyter\n```\n\n*注：由于该项目主要是教程集合，根目录下可能没有统一的 `requirements.txt`，上述命令安装了运行大多数示例所需的核心包。特定章节若有额外需求，请参考对应 Notebook 开头的导入语句。*\n\n## 基本使用\n\n该项目不包含单一的命令行工具，而是由一系列独立的 Jupyter Notebook 组成。你可以根据学习目标直接打开对应的笔记文件进行交互式学习。\n\n### 启动 Jupyter Notebook\n\n在项目根目录下运行以下命令启动服务：\n\n```bash\njupyter notebook\n```\n\n浏览器会自动打开界面，你可以导航至相应的文件夹（如 `machine_learning\u002F`, `preprocessing\u002F` 等）选择笔记。\n\n### 最简单的使用示例：线性分类\n\n以下是一个基于 `scikit-learn` 的简单线性分类示例，对应项目中 `machine_learning\u002Fscikit-learn\u002Fscikit_linear_classification.ipynb` 的核心逻辑。你可以在新的 Python 文件或 Notebook 单元格中直接运行：\n\n```python\nimport numpy as np\nfrom sklearn import datasets\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\n\n# 1. 加载数据 (鸢尾花数据集)\niris = datasets.load_iris()\nX = iris.data[:, [2, 3]]  # 仅取花瓣长度和宽度\ny = iris.target\n\n# 2. 划分训练集和测试集\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)\n\n# 3. 特征标准化 (Pre-processing)\nsc = StandardScaler()\nsc.fit(X_train)\nX_train_std = sc.transform(X_train)\nX_test_std = sc.transform(X_test)\n\n# 4. 训练逻辑回归模型 (Machine Learning Algorithm)\nclf = LogisticRegression(multi_class='ovr', solver='lbfgs')\nclf.fit(X_train_std, y_train)\n\n# 5. 模型评估 (Model Evaluation)\naccuracy = clf.score(X_test_std, y_test)\nprint(f\"模型准确率: {accuracy:.2f}\")\n\n# 6. 预测新样本\nsample = np.array([[5.0, 1.5]])\nsample_std = sc.transform(sample)\nprediction = clf.predict(sample_std)\nprint(f\"预测类别: {iris.target_names[prediction][0]}\")\n```\n\n### 探索更多主题\n\n你可以按照项目目录结构深入学习以下模块：\n\n*   **数据预处理** (`preprocessing\u002F`)：学习特征编码、标准化、PCA 降维等。\n*   **模型评估** (`evaluation\u002F`)：掌握交叉验证、性能指标计算。\n*   **算法实战** (`machine_learning\u002F`)：涵盖贝叶斯分类、神经网络、集成方法等。\n*   **数据可视化** (`data_viz\u002F`)：使用 Matplotlib 进行探索性数据分析。\n\n所有示例均以代码驱动，建议边阅读代码注释边运行单元格，以获得最佳学习效果。","某电商数据团队正试图构建一个用户流失预测模型，但面对杂乱的原始数据和复杂的算法选型感到无从下手。\n\n### 没有 pattern_classification 时\n- 团队成员在处理分类特征编码和特征缩放时频繁出错，缺乏统一的标准化流程参考，导致模型输入质量参差不齐。\n- 面对 PCA、LDA 等多种降维算法，不清楚何时该用协方差矩阵或相关系数矩阵，盲目尝试浪费了数天时间进行无效实验。\n- 在模型评估阶段，由于缺乏系统的参数估计和交叉验证示例，难以判断模型是过拟合还是欠拟合，调优过程如同“盲人摸象”。\n- 新手成员需要从零散的网络博客拼凑知识，学习曲线陡峭，无法快速掌握从数据预处理到集成学习的完整闭环。\n\n### 使用 pattern_classification 后\n- 直接复用其提供的分类特征编码与标准化教程，团队迅速建立了规范的数据预处理流水线，显著提升了数据清洗效率。\n- 参照其中关于 PCA 基于协方差与相关矩阵对比的详细案例，精准选择了适合当前数据分布的降维策略，避免了不必要的试错成本。\n- 利用现成的模型评估与参数估计笔记本代码，快速实施了严谨的交叉验证流程，准确锁定了最优超参数组合。\n- 依托其涵盖贝叶斯分类、神经网络到集成方法的系统化教程，团队成员在短时间内统一了技术认知，大幅缩短了从入门到实战的周期。\n\npattern_classification 通过提供全流程的实战教程与代码范例，将机器学习从理论黑盒转化为可落地的标准化工程实践。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frasbt_pattern_classification_b4a5a002.png","rasbt","Sebastian Raschka","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Frasbt_4eb76c31.jpg","AI Research Engineer working on LLMs.",null,"https:\u002F\u002Fsebastianraschka.com","https:\u002F\u002Fgithub.com\u002Frasbt",[80,84,88,92,96,99],{"name":81,"color":82,"percentage":83},"Jupyter Notebook","#DA5B0B",96.1,{"name":85,"color":86,"percentage":87},"HTML","#e34c26",2.9,{"name":89,"color":90,"percentage":91},"TeX","#3D6117",0.8,{"name":93,"color":94,"percentage":95},"Roff","#ecdebe",0.1,{"name":97,"color":98,"percentage":95},"CSS","#663399",{"name":100,"color":101,"percentage":102},"Python","#3572A5",0,4212,1278,"2026-04-10T06:16:43","GPL-3.0",1,"未说明",{"notes":110,"python":111,"dependencies":112},"该项目主要是一个包含教程、示例代码（IPython Notebook 格式）和文档的集合，而非单一的独立软件工具。内容涵盖模式分类、机器学习和数据挖掘的基础知识与实践。运行示例代码通常需要安装标准的 Python 数据科学栈（如 scikit-learn, NumPy, Matplotlib）。部分旧链接指向 IPython Notebook Viewer，现代环境建议使用 Jupyter Lab 或 Jupyter Notebook 运行本地文件。无特殊 GPU 或大内存需求，具体取决于运行的特定算法示例。","未说明 (基于 IPython Notebook 和 scikit-learn 推断，通常兼容 Python 2.7 或 3.x)",[113,114,115,116,117],"scikit-learn","NumPy","IPython\u002FJupyter Notebook","Matplotlib","Beautiful Soup",[14],[120,121,122],"machine-learning","pattern-classification","machine-learning-algorithms","2026-03-27T02:49:30.150509","2026-04-11T03:22:14.924457",[126,131,136],{"id":127,"question_zh":128,"answer_zh":129,"source_url":130},28770,"皮尔逊相关系数（Pearson Correlation Coefficient）的公式是否有误？","是的，之前的公式存在排版错误。正确的皮尔逊相关系数公式应为：分子是协方差的和，分母是两个变量标准差平方和的乘积再开根号。具体 LaTeX 表达式为：$r = \\frac{\\sum_{i=1}^{n} (x_i - \\overline{x})(y_i - \\overline{y})}{\\sqrt{{\\sum_{i=1}^{n} (x_i - \\overline{x})^2}{\\sum_{i=1}^{n} (y_i - \\overline{y})^2}}}$。维护者已根据反馈更新了文档。","https:\u002F\u002Fgithub.com\u002Frasbt\u002Fpattern_classification\u002Fissues\u002F50",{"id":132,"question_zh":133,"answer_zh":134,"source_url":135},28771,"在绘制层次聚类（Hierarchical Agglomerative Clustering）的热图加行树状图时，树状图显示上下颠倒怎么办？","可以通过两种方法解决：1. 在绘制树状图时，将 `count_sort` 参数设置为 `'descending'`；2. 使用树状图的 leave indices 对 DataFrame 进行逆序排序。此外，建议在隐藏树状图标签前，务必检查标签是否与数据行匹配，以避免对应错误。","https:\u002F\u002Fgithub.com\u002Frasbt\u002Fpattern_classification\u002Fissues\u002F41",{"id":137,"question_zh":138,"answer_zh":139,"source_url":140},28772,"如何识别哪些绘图库依赖外部 Web 服务（如 plot.ly）？","依赖外部 Web 服务的库（例如 plot.ly）通常需要互联网连接才能使用，否则仅作为 API 存在。在项目文档中，这类库会被明确标记出来，提示用户它们不是完全本地的解决方案。如果需要使用离线功能，应避免选择此类库或寻找其本地替代方案。","https:\u002F\u002Fgithub.com\u002Frasbt\u002Fpattern_classification\u002Fissues\u002F40",[]]