[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-cfgranda--ps4ds":3,"tool-cfgranda--ps4ds":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",160784,2,"2026-04-19T11:32:54",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",109154,"2026-04-18T11:18:24",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[52,13,15,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":32,"last_commit_at":59,"category_tags":60,"status":17},4721,"markitdown","microsoft\u002Fmarkitdown","MarkItDown 是一款由微软 AutoGen 团队打造的轻量级 Python 工具，专为将各类文件高效转换为 Markdown 格式而设计。它支持 PDF、Word、Excel、PPT、图片（含 OCR）、音频（含语音转录）、HTML 乃至 YouTube 链接等多种格式的解析，能够精准提取文档中的标题、列表、表格和链接等关键结构信息。\n\n在人工智能应用日益普及的今天，大语言模型（LLM）虽擅长处理文本，却难以直接读取复杂的二进制办公文档。MarkItDown 恰好解决了这一痛点，它将非结构化或半结构化的文件转化为模型“原生理解”且 Token 效率极高的 Markdown 格式，成为连接本地文件与 AI 分析 pipeline 的理想桥梁。此外，它还提供了 MCP（模型上下文协议）服务器，可无缝集成到 Claude Desktop 等 LLM 应用中。\n\n这款工具特别适合开发者、数据科学家及 AI 研究人员使用，尤其是那些需要构建文档检索增强生成（RAG）系统、进行批量文本分析或希望让 AI 助手直接“阅读”本地文件的用户。虽然生成的内容也具备一定可读性，但其核心优势在于为机器",93400,"2026-04-06T19:52:38",[52,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":72,"owner_name":73,"owner_avatar_url":74,"owner_bio":73,"owner_company":73,"owner_location":73,"owner_email":73,"owner_twitter":73,"owner_website":73,"owner_url":75,"languages":76,"stars":88,"forks":89,"last_commit_at":90,"license":73,"difficulty_score":91,"env_os":92,"env_gpu":92,"env_ram":92,"env_deps":93,"category_tags":96,"github_topics":98,"view_count":32,"oss_zip_url":73,"oss_zip_packed_at":73,"status":17,"created_at":116,"updated_at":117,"faqs":118,"releases":119},9820,"cfgranda\u002Fps4ds","ps4ds","Probability and Statistics for Data Science: A self-contained introduction to probability and statistics for data science, including a free pdf, 103 Python notebooks using 23 real-world datasets, 118 videos with slides, and solutions to 200 exercises","ps4ds 是一套专为数据科学设计的概率与统计自学资源，旨在帮助学习者从零开始掌握核心数学基础。它通过“理论 + 实战”的模式，有效解决了传统统计学教材枯燥难懂、缺乏真实场景应用的痛点。\n\n这套资源不仅包含免费的电子书预印本和配套教学视频，更提供了 103 个基于 Python 的交互式笔记本（Jupyter Notebooks）。其独特亮点在于完全依托真实世界的数据集进行教学，涵盖美国国会投票记录、奥运会篮球赛事、NBA 球员罚球数据、全球 GDP 及气温变化等丰富案例。用户可以在代码实践中直观理解经验概率、蒙特卡洛方法、最大似然估计、核密度估计等抽象概念，并配有 200 道习题的详细解答以巩固学习成果。\n\nps4ds 非常适合希望转行数据科学的开发者、需要夯实数理基础的研究人员，以及计算机相关专业的学生。对于想要避开纯理论推导，倾向于通过编程和数据分析来深入理解统计原理的学习者而言，这是一套不可多得的系统化入门指南。","# [Code repository](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds) for [Probability and Statistics for Data Science](https:\u002F\u002Fwww.ps4ds.net\u002F) \n\nCode for the book [Probability and Statistics for Data Science](https:\u002F\u002Fa.co\u002Fd\u002FcAss9mO). A free preprint, videos, code, slides and solutions to exercises are available at https:\u002F\u002Fwww.ps4ds.net \n\n## Probability\n\n  - [Voting by members of the United States House of Representatives](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fprobability\u002FUS_congress_votes.ipynb)\n    Empirical probability, conditional probability, independence, conditional independence\n  - [Random coin flips](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fprobability\u002Frandom_coin_flips_table.ipynb) Sampling\n  - [3X3 basketball Olympics tournament](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fprobability\u002Folympics_3x3_basketball.ipynb) Monte Carlo method\n  - [Boxing championship](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fprobability\u002Fboxing_championship_exercise.ipynb) Monte Carlo method\n  - [Videogame](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fprobability\u002Fvideogame_exercise.ipynb) Monte Carlo method\n\n## Discrete Variables\n\n  - [Die rolls (real data)](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_variables\u002Fdie_histogram.ipynb) Empirical probability mass function\n  - [Fair die rolls](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_variables\u002Ffair_die.ipynb) Empirical probability mass function\n  - [Durant's free throw streaks](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_variables\u002Ffree_throw_streaks_Durant.ipynb) Nonparametric and parametric models, geometric distribution, maximum likelihood\n  - [Maximum likelihood estimation for simulated free throws](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_variables\u002Fsimulated_free_throws_maximum_likelihood.ipynb) Parametric model, geometric distribution, maximum likelihood\n  - [Phone calls](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_variables\u002Fcall_center_parametric_vs_nonparametric_models.ipynb) Nonparametric and parametric models, Poisson distribution, maximum likelihood\n  - [Distribution of the empirical-probability estimator](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_variables\u002Fempirical_probability_distribution.ipynb) Empirical probability, binomial distribution\n\n## Continuous Variables\n\n  - [Height](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fcontinuous_variables\u002Fheight_distribution.ipynb) Cumulative distribution function, quantiles, probability density function, histogram, kernel density estimation, box plot, Gaussian distribution, maximum likelihood estimation, parametric and nonparametric models\n  - [Gross domestic product](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fcontinuous_variables\u002Fgdp.ipynb) Cumulative distribution function, quantiles, probability density function, histogram, kernel density estimation, box plot\n  - [Temperatures in Oxford](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fcontinuous_variables\u002Foxford_temperatures.ipynb) Box plot, quartiles\n  - [Interarrival times of phone calls](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fcontinuous_variables\u002Fcall_center_interarrival_times.ipynb) Kernel density estimation, nonparametric and parametric models, exponential distribution, maximum likelihood\n  - [Simulating an exponential](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fcontinuous_variables\u002Fsimulating_an_exponential.ipynb) Inverse transform sampling\n\n## Multiple Discrete Variables\n\n  - [Movie ratings](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_discrete_variables\u002Fmovie_ratings.ipynb) Joint probability mass function, marginal distribution, conditional distribution\n  - [Precipitation in Oregon (and Hawaii and Rhode Island)](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_discrete_variables\u002Fprecipitation.ipynb) Joint probability mass function, marginal distribution, conditional distribution, independence, conditional independence\n  - [Curse of dimensionality](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_discrete_variables\u002Fcurse_of_dimensionality.ipynb) \n  - [Precipitation time series](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_discrete_variables\u002Fmarkov_chain_precipitation.ipynb) Markov chains, stationarity\n  - [Car rental](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_discrete_variables\u002Fcar_rental.ipynb) Time-homogeneous Markov chains, stationary distribution\n  - [Political affiliation](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_discrete_variables\u002Fnaive_bayes_political_affiliation.ipynb) Naive Bayes, classification\n  \n## Multiple Continuous Variables\n\n- [Temperature in Manhattan and Versailles](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_continuous_variables\u002Ftemperatures.ipynb) Joint probability density function, marginal distribution, conditional distribution\n- [More temperature data](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_continuous_variables\u002Ftemperatures_conditional_independence.ipynb) Joint probability density function, marginal distribution, conditional distribution, conditional independence\n- [Anthropometric data](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_continuous_variables\u002Fanthropometric_data_gaussian.ipynb) Joint probability density function, kernel density estimation, Gaussian random vectors, maximum likelihood, parametric and nonparametric models\n- [2D kernel density estimation](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_continuous_variables\u002Fkernel_density_estimation_2D.ipynb)\n- [Movie duration and earnings](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_continuous_variables\u002Fmovie_duration_earnings.ipynb) Joint probability density function, conditional pdf, independence\n- [Conditional distribution of Gaussian random variables](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_continuous_variables\u002Fgaussian_2D_conditionals.ipynb)\n- [Eigendecomposition analysis of multivariate Gaussian distribution](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_continuous_variables\u002Fgaussian_2D_eigendecomposition.ipynb)\n- [Exotic fruit](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_continuous_variables\u002Fexotic_fruit_exercise.ipynb) Gaussian random vectors\n- [Simulating a lake](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_continuous_variables\u002Fsimulating_lake.ipynb) Inverse-transform sampling, dependence between random variables\n- [Simulating a triangle](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_continuous_variables\u002Fsimulating_triangle.ipynb)\n\n## Discrete and Continuous Variables\n\n- [Temperature and precipitation in Mauna Loa](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_and_continuous_variables\u002Ftemperature_and_precipitation.ipynb) Joint distribution of discrete and continuous variables, marginal distributions, conditional distributions, kernel density estimation\n- [Height and sex](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_and_continuous_variables\u002Fheight_and_sex.ipynb) Mixture model, Gaussian parametric model, joint distribution of discrete and continuous variables, marginal distributions, conditional distributions\n- [Height and handedness](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_and_continuous_variables\u002Fheight_handedness.ipynb) Joint distribution of discrete and continuous variables, independence, kernel density estimation\n- [Alzheimer's diagnostics](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_and_continuous_variables\u002Falzheimers_gaussian_discriminant_analysis.ipynb) Classification, Gaussian random vectors, Gaussian discriminant analysis, quadratic discriminant analysis, linear discriminant analysis, maximum likelihood, parametric models\n- [Clustering according to height](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_and_continuous_variables\u002Fheight_and_sex_gaussian_mixture_model.ipynb) Gaussian mixture model, expectation maximization algorithm, clustering, unsupervised learning\n- [Clustering NBA players](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_and_continuous_variables\u002Fnba_gaussian_mixture_model.ipynb) Gaussian mixture model, expectation maximization algorithm, clustering, unsupervised learning\n- [Election poll](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_and_continuous_variables\u002Felection_Bayesian_analysis.ipynb) Bayesian parametric modeling, beta distribution, prior and posterior distributions, conjugate prior\n- [How not to predict an election](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_and_continuous_variables\u002Fhow_not_to_predict_an_election.ipynb) Bayesian parametric modeling, independence, conditional independence, Monte Carlo method\n\n## Averaging\n\n- [NBA salaries](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Faveraging\u002Fnba_salaries.ipynb) Mean, median, outliers\n- [Movie ratings](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Faveraging\u002Fmovie_ratings_conditional_mean.ipynb) Sample conditional mean, conditional expectation\n- [Temperatures in the United States](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Faveraging\u002Ftemperature_mean_variance.ipynb) Sample mean, sample variance, sample standard deviation\n- [Temperature in Manhattan and Versailles](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Faveraging\u002Ftemperatures_conditional_mean.ipynb) Sample conditional mean, regression\n- [Do private classes improve grades?](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Faveraging\u002Fprivate_classes_average_treatment_effect.ipynb) Causal inference, average treatment effect, confounding factor, adjusting for confounders\n- [Does title capitalization increase YouTube views?](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Faveraging\u002FYouTube_titles_randomized_experiment.ipynb) Causal inference, average treatment effect\n\n## Correlation\n\n- [Height and NBA stats](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fcorrelation\u002Frebounds_assists_height_correlation.ipynb) Correlation coefficient, standardization, explained variance, linear estimation, nonlinear estimation, uncorrelation, independence\n- [Gaussian random vector](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fcorrelation\u002Fcorrelation_gaussian_random_vector.ipynb) Correlation coefficient, explained variance\n- [Uncorrelation implies independence in Gaussian variables](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fcorrelation\u002Fgaussian_independence.ipynb) \n- [Feeding guinea pigs](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fcorrelation\u002Fcorrelation_causation_guinea_pigs.ipynb) Correlation and causation, confounder, causal inference\n- [Unemployment in Spain](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fcorrelation\u002Funemployment_spain.ipynb) Correlation and causation, confounder, causal inference, linear regression, adjusting for confounding factors\n\n## Estimation of Population Parameters\n\n- [Height](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fheight_sample_mean.ipynb) Sample mean, random sampling, law of large numbers, bias, standard error, consistency, Chebyshev bound, convergence in probability, central limit theorem, convergence in distribution\n- [Gross domestic product](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fgdp_sample_mean.ipynb) Sample mean, random sampling, law of large numbers\n- [COVID-19 prevalence](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002FCOVID19_sample_proportion.ipynb) Sample proportion, random sampling, law of large numbers, bias, standard error, consistency, convergence in probability, central limit theorem, convergence in distribution\n- [Gambler's paradox](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fgamblers_paradox.ipynb) Law of large numbers, sample mean\n- [The law of large numbers does not apply to the Cauchy distribution](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fcauchy_sample_mean.ipynb)\n- [Local economic activity](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Flocal_economic_activity.ipynb) Law of large numbers, consistency of the sample mean, outliers\n- [Central limit theorem (discrete variables)](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fclt_discrete_distribution.ipynb) Central limit theorem, convolution, sum of independent random variables\n- [Central limit theorem (continuous variables)](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fclt_continuous_distribution.ipynb) Central limit theorem, convolution, sum of independent random variables\n- [Basketball strategy](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fbasketball_strategy.ipynb) Central limit theorem, Gaussian approximation to the binomial, Monte Carlo method\n- [Financial crisis](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Ffinancial_crisis.ipynb) Central limit theorem, independence, Monte Carlo method\n- [Confidence intervals for the sample mean](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fheight_confidence_intervals.ipynb)\n- [Confidence intervals for precipitation](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fprecipitation_confidence_intervals.ipynb) Confidence intervals, sample proportion, random sampling\n- [Bootstrap sample mean](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fsample_mean_bootstrap.ipynb) The bootstrap, bootstrap standard error, sample mean\n- [Bootstrap Gaussian confidence intervals](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fsample_mean_bootstrap_confidence_intervals.ipynb)\n- [Bootstrap percentile confidence intervals](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fcorrelation_coefficient_bootstrap_confidence_intervals.ipynb)\n- [Height and foot length](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fcorrelation_coefficient_confidence_intervals.ipynb) Correlation coefficient, sample correlation coefficient, Gaussian confidence intervals, the bootstrap, bootstrap percentile confidence intervals, Fisher's transformation\n  \n## Hypothesis Testing\n\n- [Die rolls](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fhypothesis_testing\u002Fdie_rolls.ipynb) Null hypothesis, test statistic, p value, parametric testing, power function\n- [Antetokounmpo's free throws](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fhypothesis_testing\u002Fgiannis_free_throws.ipynb) Null hypothesis, test statistic, p value, two-sample test, z test, one-tailed test, two-tailed test, parametric testing, power function, permutation test, nonparametric testing\n- [Burger prices](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fhypothesis_testing\u002Fburger_prices_permutation_test.ipynb) Permutation test, nonparametric testing, p value\n- [Tom Brady and hurricanes](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fhypothesis_testing\u002Ftom_brady_hurricanes_permutation_test.ipynb) Permutation test, nonparametric testing\n- [Comparing school grades](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fhypothesis_testing\u002Fstudent_grades.ipynb) permutation test, nonparametric testing, median\n- [Clutch three-point shooting](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fhypothesis_testing\u002Fthree_pointers_clutch.ipynb) Multiple testing, Bonferroni's correction, p value\n- [Evaluating NBA players](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fhypothesis_testing\u002Fmultiple_testing_NBA.ipynb) Multiple testing, Bonferroni's correction, p value, permutation test, average treatment effect\n- [Practical significance vs. statistical significance](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fhypothesis_testing\u002Fpractical_significance_confidence_intervals.ipynb)\n- [P-hacking](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fhypothesis_testing\u002Fp-hacking.ipynb) p-hacking, publication bias\n\n## Principal Component Analysis and Low-Rank Models\n\n- [Gaussian random vector](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fpca_and_low_rank_models\u002FGaussian_random_vector.ipynb) Mean of a random vector vector, covariance matrix, directional variance, principal component analysis, spectral theorem\n- [Canadian cities](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fpca_and_low_rank_models\u002Fcanada_cities.ipynb) Sample mean of a vector, sample covariance matrix, principal component analysis, spectral theorem\n- [Faces](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fpca_and_low_rank_models\u002Fface_pca.ipynb) Principal component analysis, dimensionality reduction, sample mean of a vector\n- [Wheat seeds](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fpca_and_low_rank_models\u002Fwheat.ipynb) Sample covariance matrix, principal component analysis, dimensionality reduction\n- [Face classification](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fpca_and_low_rank_models\u002Ffaces_nearest_neighbor.ipynb) Principal component analysis, dimensionality reduction, nearest neighbor\n- [Temperatures in the United States](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fpca_and_low_rank_models\u002Ftemperatures_low_rank_model.ipynb) Sample covariance matrix, singular value decomposition, principal component analysis, low-rank model\n- [Prediction of movie ratings (cartoon example)](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fpca_and_low_rank_models\u002Fcollaborative_filtering_simple_example.ipynb) Low rank model, singular value decomposition, matrix completion, collaborative filtering, singular-value thresholding, imputation\n- [Prediction of movie ratings (real data)](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fpca_and_low_rank_models\u002Fcollaborative_filtering_movie_ratings.ipynb) Low rank model, singular value decomposition, matrix completion, collaborative filtering, singular-value thresholding, imputation\n- [Topic modeling](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fpca_and_low_rank_models\u002Ftopic_modeling.ipynb) Low-rank model, singular value decomposition, nonnegative matrix factorization \n\n## Regression and Classification\n\n- [Premature mortality in US counties](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002FUS_counties.ipynb) Linear regression, ordinary least squares, coefficient of determination, explained variance\n- [Cartoon illustration of overfitting and generalization](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Foverfitting_simple_example.ipynb)\n- [Noise amplification in linear regression 1](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Ftwo_features_collinearity.ipynb) Ordinary least squares, ridge regression\n- [Noise amplification in linear regression 2](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Ftwo_features_OLS_ridge_regression.ipynb) Ordinary least squares, ridge regression\n- [Temperature in Versailles](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Ftemperature_linear_regression.ipynb) Linear regression, ordinary least squares, ridge regression, sparse regression, lasso, regularization, overfitting\n- [Sparse regression 1](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Ftwo_features_lasso.ipynb) Sparsity, lasso, regularization\n- [Sparse regression 2](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Ftwo_features_lasso_cost_function.ipynb) Sparsity, lasso, regularization\n- [Height and sex](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Flogistic_regression_height_and_sex.ipynb) Logistic regression, maximum likelihood, log-likelihood, logistic function\n- [Alzheimer's diagnostics](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Falzheimers_logistic_regression.ipynb) Classification, logistic regression, evaluation of classification models, calibration\n- [Estimating wheat varieties via softmax regression](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Fwheat_softmax_regression.ipynb)\n- [Digit classification](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Fmnist_softmax_regression.ipynb) Softmax regression, overfitting, regularization\n- [Temperature estimation via regression trees](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Ftemperature_regression_tree.ipynb) \n- [Temperature estimation via tree ensembles](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Ftemperature_bagging_random_forest_boosting.ipynb) Bagging, random forests, boosting, overfitting\n- [Log-likelihood of classification tree](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Fclassification_tree_loglikelihood.ipynb)\n- [Temperature estimation via neural networks](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Ftemperature_neural_network.ipynb) Regression, neural networks, deep learning, overfitting, early stopping\n- [Estimating wheat varieties via a classification tree](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Fwheat_classification_tree.ipynb)\n- [Estimating wheat varieties via neural networks](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Fwheat_neural_net.ipynb)\n","# [代码仓库](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds) 用于 [数据科学的概率与统计](https:\u002F\u002Fwww.ps4ds.net\u002F)\n\n本书 [数据科学的概率与统计](https:\u002F\u002Fa.co\u002Fd\u002FcAss9mO) 的代码。免费预印本、视频、代码、幻灯片以及习题解答均可在 https:\u002F\u002Fwww.ps4ds.net 上获取。\n\n## 概率\n\n  - [美国众议院议员投票情况](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fprobability\u002FUS_congress_votes.ipynb)\n    经验概率、条件概率、独立性、条件独立性\n  - [随机硬币抛掷](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fprobability\u002Frandom_coin_flips_table.ipynb) 抽样\n  - [3对3篮球奥运会比赛](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fprobability\u002Folympics_3x3_basketball.ipynb) 蒙特卡洛方法\n  - [拳击冠军赛](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fprobability\u002Fboxing_championship_exercise.ipynb) 蒙特卡洛方法\n  - [电子游戏](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fprobability\u002Fvideogame_exercise.ipynb) 蒙特卡洛方法\n\n## 离散变量\n\n  - [骰子投掷（真实数据）](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_variables\u002Fdie_histogram.ipynb) 经验概率质量函数\n  - [公平骰子投掷](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_variables\u002Ffair_die.ipynb) 经验概率质量函数\n  - [杜兰特的罚球连中纪录](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_variables\u002Ffree_throw_streaks_Durant.ipynb) 非参数与参数模型、几何分布、最大似然估计\n  - [模拟罚球的最大似然估计](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_variables\u002Fsimulated_free_throws_maximum_likelihood.ipynb) 参数模型、几何分布、最大似然估计\n  - [电话呼叫](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_variables\u002Fcall_center_parametric_vs_nonparametric_models.ipynb) 非参数与参数模型、泊松分布、最大似然估计\n  - [经验概率估计量的分布](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_variables\u002Fempirical_probability_distribution.ipynb) 经验概率、二项分布\n\n## 连续变量\n\n  - [身高](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fcontinuous_variables\u002Fheight_distribution.ipynb) 累积分布函数、分位数、概率密度函数、直方图、核密度估计、箱线图、正态分布、最大似然估计、参数与非参数模型\n  - [国内生产总值](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fcontinuous_variables\u002Fgdp.ipynb) 累积分布函数、分位数、概率密度函数、直方图、核密度估计、箱线图\n  - [牛津市气温](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fcontinuous_variables\u002Foxford_temperatures.ipynb) 箱线图、四分位数\n  - [电话呼叫的到达间隔时间](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fcontinuous_variables\u002Fcall_center_interarrival_times.ipynb) 核密度估计、非参数与参数模型、指数分布、最大似然估计\n  - [模拟指数分布](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fcontinuous_variables\u002Fsimulating_an_exponential.ipynb) 逆变换抽样\n\n## 多个离散变量\n\n  - [电影评分](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_discrete_variables\u002Fmovie_ratings.ipynb) 联合概率质量函数、边缘分布、条件分布\n  - [俄勒冈州（以及夏威夷和罗德岛）的降水量](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_discrete_variables\u002Fprecipitation.ipynb) 联合概率质量函数、边缘分布、条件分布、独立性、条件独立性\n  - [维度灾难](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_discrete_variables\u002Fcurse_of_dimensionality.ipynb) \n  - [降水量时间序列](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_discrete_variables\u002Fmarkov_chain_precipitation.ipynb) 马尔可夫链、平稳性\n  - [汽车租赁](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_discrete_variables\u002Fcar_rental.ipynb) 时间齐次马尔可夫链、平稳分布\n  - [政治倾向](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_discrete_variables\u002Fnaive_bayes_political_affiliation.ipynb) 朴素贝叶斯、分类\n\n## 多个连续变量\n\n- [曼哈顿和凡尔赛的气温](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_continuous_variables\u002Ftemperatures.ipynb) 联合概率密度函数、边缘分布、条件分布\n- [更多温度数据](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_continuous_variables\u002Ftemperatures_conditional_independence.ipynb) 联合概率密度函数、边缘分布、条件分布、条件独立性\n- [人体测量数据](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_continuous_variables\u002Fanthropometric_data_gaussian.ipynb) 联合概率密度函数、核密度估计、高斯随机向量、最大似然估计、参数与非参数模型\n- [二维核密度估计](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_continuous_variables\u002Fkernel_density_estimation_2D.ipynb)\n- [电影时长与票房收入](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_continuous_variables\u002Fmovie_duration_earnings.ipynb) 联合概率密度函数、条件概率密度函数、独立性\n- [高斯随机变量的条件分布](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_continuous_variables\u002Fgaussian_2D_conditionals.ipynb)\n- [多元正态分布的特征分解分析](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_continuous_variables\u002Fgaussian_2D_eigendecomposition.ipynb)\n- [奇异水果](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_continuous_variables\u002Fexotic_fruit_exercise.ipynb) 高斯随机向量\n- [模拟湖泊](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_continuous_variables\u002Fsimulating_lake.ipynb) 逆变换抽样、随机变量之间的依赖关系\n- [模拟三角形](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fmultiple_continuous_variables\u002Fsimulating_triangle.ipynb)\n\n## 离散与连续变量\n\n- [冒纳罗亚的温度与降水](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_and_continuous_variables\u002Ftemperature_and_precipitation.ipynb) 离散与连续变量的联合分布、边缘分布、条件分布、核密度估计\n- [身高与性别](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_and_continuous_variables\u002Fheight_and_sex.ipynb) 混合模型、高斯参数模型、离散与连续变量的联合分布、边缘分布、条件分布\n- [身高与惯用手](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_and_continuous_variables\u002Fheight_handedness.ipynb) 离散与连续变量的联合分布、独立性、核密度估计\n- [阿尔茨海默病诊断](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_and_continuous_variables\u002Falzheimers_gaussian_discriminant_analysis.ipynb) 分类、高斯随机向量、高斯判别分析、二次判别分析、线性判别分析、最大似然估计、参数模型\n- [按身高聚类](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_and_continuous_variables\u002Fheight_and_sex_gaussian_mixture_model.ipynb) 高斯混合模型、期望最大化算法、聚类、无监督学习\n- [NBA球员聚类](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_and_continuous_variables\u002Fnba_gaussian_mixture_model.ipynb) 高斯混合模型、期望最大化算法、聚类、无监督学习\n- [选举民调](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_and_continuous_variables\u002Felection_Bayesian_analysis.ipynb) 贝叶斯参数建模、贝塔分布、先验分布与后验分布、共轭先验\n- [如何不预测选举](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fdiscrete_and_continuous_variables\u002Fhow_not_to_predict_an_election.ipynb) 贝叶斯参数建模、独立性、条件独立性、蒙特卡洛方法\n\n## 平均值\n\n- [NBA薪资](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Faveraging\u002Fnba_salaries.ipynb) 均值、中位数、异常值\n- [电影评分](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Faveraging\u002Fmovie_ratings_conditional_mean.ipynb) 样本条件均值、条件期望\n- [美国各州气温](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Faveraging\u002Ftemperature_mean_variance.ipynb) 样本均值、样本方差、样本标准差\n- [曼哈顿与凡尔赛的气温](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Faveraging\u002Ftemperatures_conditional_mean.ipynb) 样本条件均值、回归分析\n- [私人辅导能提高成绩吗？](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Faveraging\u002Fprivate_classes_average_treatment_effect.ipynb) 因果推断、平均处理效应、混杂因素、调整混杂因素\n- [标题大小写会增加YouTube观看量吗？](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Faveraging\u002FYouTube_titles_randomized_experiment.ipynb) 因果推断、平均处理效应\n\n## 相关性\n\n- [身高与NBA数据](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fcorrelation\u002Frebounds_assists_height_correlation.ipynb) 相关系数、标准化、解释方差、线性估计、非线性估计、不相关、独立性\n- [高斯随机向量](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fcorrelation\u002Fcorrelation_gaussian_random_vector.ipynb) 相关系数、解释方差\n- [高斯变量中不相关意味着独立](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fcorrelation\u002Fgaussian_independence.ipynb)\n- [喂养豚鼠](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fcorrelation\u002Fcorrelation_causation_guinea_pigs.ipynb) 相关性与因果关系、混杂因素、因果推断\n- [西班牙失业率](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fcorrelation\u002Funemployment_spain.ipynb) 相关性与因果关系、混杂因素、因果推断、线性回归、调整混杂因素\n\n## 总体参数估计\n\n- [身高](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fheight_sample_mean.ipynb) 样本均值、随机抽样、大数定律、偏误、标准误差、一致性、切比雪夫不等式、依概率收敛、中心极限定理、依分布收敛\n- [国内生产总值](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fgdp_sample_mean.ipynb) 样本均值、随机抽样、大数定律\n- [COVID-19 患病率](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002FCOVID19_sample_proportion.ipynb) 样本比例、随机抽样、大数定律、偏误、标准误差、一致性、依概率收敛、中心极限定理、依分布收敛\n- [赌徒悖论](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fgamblers_paradox.ipynb) 大数定律、样本均值\n- [大数定律不适用于柯西分布](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fcauchy_sample_mean.ipynb)\n- [地方经济活动](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Flocal_economic_activity.ipynb) 大数定律、样本均值的一致性、异常值\n- [中心极限定理（离散型变量）](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fclt_discrete_distribution.ipynb) 中心极限定理、卷积、独立随机变量之和\n- [中心极限定理（连续型变量）](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fclt_continuous_distribution.ipynb) 中心极限定理、卷积、独立随机变量之和\n- [篮球战术](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fbasketball_strategy.ipynb) 中心极限定理、二项分布的正态近似、蒙特卡洛方法\n- [金融危机](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Ffinancial_crisis.ipynb) 中心极限定理、独立性、蒙特卡洛方法\n- [样本均值的置信区间](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fheight_confidence_intervals.ipynb)\n- [降水量的置信区间](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fprecipitation_confidence_intervals.ipynb) 置信区间、样本比例、随机抽样\n- [自助法样本均值](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fsample_mean_bootstrap.ipynb) 自助法、自助法标准误差、样本均值\n- [自助法正态置信区间](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fsample_mean_bootstrap_confidence_intervals.ipynb)\n- [自助法百分位数置信区间](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fcorrelation_coefficient_bootstrap_confidence_intervals.ipynb)\n- [身高与脚长](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Festimation_population_parameters\u002Fcorrelation_coefficient_confidence_intervals.ipynb) 相关系数、样本相关系数、正态置信区间、自助法、自助法百分位数置信区间、费舍尔变换\n\n## 假设检验\n\n- [骰子投掷](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fhypothesis_testing\u002Fdie_rolls.ipynb) 零假设、检验统计量、p值、参数检验、功效函数\n- [字母哥的罚球](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fhypothesis_testing\u002Fgiannis_free_throws.ipynb) 零假设、检验统计量、p值、两样本检验、z检验、单尾检验、双尾检验、参数检验、功效函数、置换检验、非参数检验\n- [汉堡价格](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fhypothesis_testing\u002Fburger_prices_permutation_test.ipynb) 置换检验、非参数检验、p值\n- [汤姆·布雷迪与飓风](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fhypothesis_testing\u002Ftom_brady_hurricanes_permutation_test.ipynb) 置换检验、非参数检验\n- [比较学生成绩](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fhypothesis_testing\u002Fstudent_grades.ipynb) 置换检验、非参数检验、中位数\n- [关键时段三分球命中率](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fhypothesis_testing\u002Fthree_pointers_clutch.ipynb) 多重检验、邦费罗尼校正、p值\n- [NBA球员评估](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fhypothesis_testing\u002Fmultiple_testing_NBA.ipynb) 多重检验、邦费罗尼校正、p值、置换检验、平均处理效应\n- [实际意义与统计意义](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fhypothesis_testing\u002Fpractical_significance_confidence_intervals.ipynb)\n- [p值操纵](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fhypothesis_testing\u002Fp-hacking.ipynb) p值操纵、发表偏倚\n\n## 主成分分析与低秩模型\n\n- [高斯随机向量](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fpca_and_low_rank_models\u002FGaussian_random_vector.ipynb) 随机向量的均值、协方差矩阵、方向方差、主成分分析、谱定理\n- [加拿大城市](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fpca_and_low_rank_models\u002Fcanada_cities.ipynb) 向量的样本均值、样本协方差矩阵、主成分分析、谱定理\n- [人脸](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fpca_and_low_rank_models\u002Fface_pca.ipynb) 主成分分析、降维、向量的样本均值\n- [小麦种子](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fpca_and_low_rank_models\u002Fwheat.ipynb) 模型协方差矩阵、主成分分析、降维\n- [人脸分类](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fpca_and_low_rank_models\u002Ffaces_nearest_neighbor.ipynb) 主成分分析、降维、最近邻法\n- [美国气温](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fpca_and_low_rank_models\u002Ftemperatures_low_rank_model.ipynb) 模型协方差矩阵、奇异值分解、主成分分析、低秩模型\n- [电影评分预测（卡通示例）](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fpca_and_low_rank_models\u002Fcollaborative_filtering_simple_example.ipynb) 低秩模型、奇异值分解、矩阵补全、协同过滤、奇异值阈值化、插值\n- [电影评分预测（真实数据）](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fpca_and_low_rank_models\u002Fcollaborative_filtering_movie_ratings.ipynb) 低秩模型、奇异值分解、矩阵补全、协同过滤、奇异值阈值化、插值\n- [主题建模](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fpca_and_low_rank_models\u002Ftopic_modeling.ipynb) 低秩模型、奇异值分解、非负矩阵分解\n\n## 回归与分类\n\n- [美国各县过早死亡率](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002FUS_counties.ipynb) 线性回归、普通最小二乘法、决定系数、解释方差\n- [过拟合与泛化的卡通示例](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Foverfitting_simple_example.ipynb)\n- [线性回归中的噪声放大1](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Ftwo_features_collinearity.ipynb) 普通最小二乘法、岭回归\n- [线性回归中的噪声放大2](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Ftwo_features_OLS_ridge_regression.ipynb) 普通最小二乘法、岭回归\n- [凡尔赛气温](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Ftemperature_linear_regression.ipynb) 线性回归、普通最小二乘法、岭回归、稀疏回归、Lasso、正则化、过拟合\n- [稀疏回归1](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Ftwo_features_lasso.ipynb) 稀疏性、Lasso、正则化\n- [稀疏回归2](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Ftwo_features_lasso_cost_function.ipynb) 稀疏性、Lasso、正则化\n- [身高与性别](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Flogistic_regression_height_and_sex.ipynb) 逻辑回归、最大似然估计、对数似然、逻辑函数\n- [阿尔茨海默病诊断](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Falzheimers_logistic_regression.ipynb) 分类、逻辑回归、分类模型评估、校准\n- [通过Softmax回归估计小麦品种](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Fwheat_softmax_regression.ipynb)\n- [数字分类](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Fmnist_softmax_regression.ipynb) Softmax回归、过拟合、正则化\n- [通过回归树估计气温](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Ftemperature_regression_tree.ipynb)\n- [通过树集成方法估计气温](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Ftemperature_bagging_random_forest_boosting.ipynb) 装袋法、随机森林、提升法、过拟合\n- [分类树的对数似然](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Fclassification_tree_loglikelihood.ipynb)\n- [通过神经网络估计气温](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Ftemperature_neural_network.ipynb) 回归、神经网络、深度学习、过拟合、早停法\n- [通过分类树估计小麦品种](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Fwheat_classification_tree.ipynb)\n- [通过神经网络估计小麦品种](https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds\u002Fblob\u002Fmain\u002Fregression_classification\u002Fwheat_neural_net.ipynb)","# ps4ds 快速上手指南\n\n`ps4ds` 是书籍《Probability and Statistics for Data Science》（数据科学概率与统计）的配套开源代码库。它包含了一系列基于 Jupyter Notebook 的实战案例，涵盖从基础概率、离散\u002F连续变量分布到贝叶斯推断、因果推断等核心数据科学主题。\n\n## 环境准备\n\n在开始之前，请确保您的系统满足以下要求：\n\n*   **操作系统**：Windows、macOS 或 Linux\n*   **Python 版本**：推荐 Python 3.8 或更高版本\n*   **前置依赖**：\n    *   `pip` (Python 包管理工具)\n    *   `git` (版本控制工具，用于克隆仓库)\n    *   `Jupyter Lab` 或 `Jupyter Notebook` (用于运行交互式代码)\n\n> **国内加速建议**：\n> 推荐使用国内镜像源安装依赖，以提升下载速度。本指南后续步骤将默认配置使用清华大学 (TUNA) 镜像源。\n\n## 安装步骤\n\n### 1. 克隆项目仓库\n\n打开终端（Terminal 或 CMD），执行以下命令将代码库下载到本地：\n\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Fcfgranda\u002Fps4ds.git\ncd ps4ds\n```\n\n### 2. 创建虚拟环境（推荐）\n\n为避免依赖冲突，建议创建一个独立的虚拟环境：\n\n```bash\npython -m venv ps4ds-env\n```\n\n激活虚拟环境：\n*   **Windows**:\n    ```bash\n    ps4ds-env\\Scripts\\activate\n    ```\n*   **macOS \u002F Linux**:\n    ```bash\n    source ps4ds-env\u002Fbin\u002Factivate\n    ```\n\n### 3. 安装依赖库\n\n该项目主要依赖 `numpy`, `pandas`, `matplotlib`, `scipy`, `scikit-learn` 等数据科学常用库。使用国内镜像源快速安装：\n\n```bash\npip install -r requirements.txt -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n```\n\n*注：如果根目录下没有 `requirements.txt` 文件，可以直接安装核心依赖：*\n\n```bash\npip install numpy pandas matplotlib scipy scikit-learn jupyterlab -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n```\n\n## 基本使用\n\n安装完成后，您可以启动 Jupyter Lab 来浏览和运行书中的示例代码。\n\n### 1. 启动 Jupyter Lab\n\n在项目根目录下执行：\n\n```bash\njupyter lab\n```\n\n浏览器会自动打开一个标签页，显示项目文件目录。\n\n### 2. 运行示例 Notebook\n\n根据您想学习的主题，进入对应的文件夹并打开 `.ipynb` 文件。例如，要学习**基础概率**中的“美国国会投票”案例：\n\n1.  在文件浏览器中点击进入 `probability` 文件夹。\n2.  点击打开 `US_congress_votes.ipynb`。\n3.  点击菜单栏的 **Kernel** -> **Restart Kernel and Run All Cells**，即可从头到尾运行代码并查看可视化结果。\n\n### 3. 代码结构速览\n\n项目按统计学主题分类，主要目录如下：\n\n*   `probability\u002F`: 基础概率、条件概率、蒙特卡洛方法\n*   `discrete_variables\u002F`: 离散变量分布（几何分布、泊松分布等）\n*   `continuous_variables\u002F`: 连续变量分布（高斯分布、指数分布等）\n*   `multiple_discrete_variables\u002F`: 多离散变量、马尔可夫链、朴素贝叶斯\n*   `multiple_continuous_variables\u002F`: 多连续变量、联合分布、主成分分析基础\n*   `discrete_and_continuous_variables\u002F`: 混合模型、高斯判别分析、聚类 (GMM)\n*   `averaging\u002F`: 均值、方差、因果推断基础\n*   `correlation\u002F`: 相关性分析、线性回归\n*   `estimation_population_parameters\u002F`: 参数估计、大数定律、中心极限定理\n\n您可以直接修改 Notebook 中的代码单元格，尝试不同的参数或数据集，以加深对统计学概念的理解。","某数据科学团队正在为一家连锁零售店构建销量预测模型，急需验证历史销售数据的分布特征并选择合适的统计方法。\n\n### 没有 ps4ds 时\n- 团队成员需自行搜集零散的统计学教程，难以找到针对真实商业数据集（如电话呼叫间隔或商品销量）的完整代码示例。\n- 在判断数据服从泊松分布还是指数分布时，缺乏直观的对比案例，导致参数估计方法选择错误，模型偏差较大。\n- 遇到“维数灾难”或多变量联合概率等复杂概念时，只能阅读枯燥的理论公式，无法通过可视化笔记本快速理解其实际影响。\n- 内部培训成本高，新人需要数周时间才能掌握从经验概率到最大似然估计的完整推导与实现流程。\n\n### 使用 ps4ds 后\n- 直接复用项目中关于“呼叫中心电话间隔”的 Python 笔记本，快速将理论映射到零售客流到达率的建模中，节省了大量搜索时间。\n- 参考其关于非参数与参数模型对比的实战代码，迅速验证了销量数据符合特定分布假设，显著提升了预测模型的准确性。\n- 利用内置的 118 个视频和交互式幻灯片，团队成员通过可视化手段直观理解了多变量依赖关系，避免了建模陷阱。\n- 借助 200 道习题的详细解答和 23 个真实数据集，新人能在几天内通过动手实践掌握核心统计技能，大幅缩短上手周期。\n\nps4ds 通过将抽象的概率统计理论转化为基于真实数据的可执行代码，极大地降低了数据科学家的学习门槛并提升了建模效率。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fcfgranda_ps4ds_85466c5b.png","cfgranda",null,"https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fcfgranda_7fad0355.png","https:\u002F\u002Fgithub.com\u002Fcfgranda",[77,81,85],{"name":78,"color":79,"percentage":80},"Jupyter Notebook","#DA5B0B",100,{"name":82,"color":83,"percentage":84},"Perl","#0298c3",0,{"name":86,"color":87,"percentage":84},"Shell","#89e051",586,75,"2026-04-19T09:58:08",1,"未说明",{"notes":94,"python":92,"dependencies":95},"该工具是《数据科学的概率与统计》书籍的配套代码库，主要由 Jupyter Notebook (.ipynb) 文件组成，涵盖概率、离散\u002F连续变量、贝叶斯分析等内容。README 中未明确列出具体的运行环境、Python 版本或依赖库要求。通常此类数据科学教学代码需要安装 Jupyter Lab\u002FNotebook 以及基础数据科学栈（如 numpy, pandas, matplotlib, scipy, scikit-learn）。建议查看具体 Notebook 文件开头的导入语句以确定确切依赖。",[92],[97,16,14],"其他",[99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115],"data-science","datascience","education","jupyter-notebook","open-source","probability","probability-statistics","python","statistics","statistics-course","statistics-learning","teaching","teaching-materials","textbook","course-materials","data-analysis","machine-learning","2026-03-27T02:49:30.150509","2026-04-20T07:17:14.007257",[],[]]