[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-ptyadana--Data-Science-and-Machine-Learning-Projects-Dojo":3,"tool-ptyadana--Data-Science-and-Machine-Learning-Projects-Dojo":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",160411,2,"2026-04-18T23:33:24",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",109154,"2026-04-18T11:18:24",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[52,13,15,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":32,"last_commit_at":59,"category_tags":60,"status":17},4721,"markitdown","microsoft\u002Fmarkitdown","MarkItDown 是一款由微软 AutoGen 团队打造的轻量级 Python 工具，专为将各类文件高效转换为 Markdown 格式而设计。它支持 PDF、Word、Excel、PPT、图片（含 OCR）、音频（含语音转录）、HTML 乃至 YouTube 链接等多种格式的解析，能够精准提取文档中的标题、列表、表格和链接等关键结构信息。\n\n在人工智能应用日益普及的今天，大语言模型（LLM）虽擅长处理文本，却难以直接读取复杂的二进制办公文档。MarkItDown 恰好解决了这一痛点，它将非结构化或半结构化的文件转化为模型“原生理解”且 Token 效率极高的 Markdown 格式，成为连接本地文件与 AI 分析 pipeline 的理想桥梁。此外，它还提供了 MCP（模型上下文协议）服务器，可无缝集成到 Claude Desktop 等 LLM 应用中。\n\n这款工具特别适合开发者、数据科学家及 AI 研究人员使用，尤其是那些需要构建文档检索增强生成（RAG）系统、进行批量文本分析或希望让 AI 助手直接“阅读”本地文件的用户。虽然生成的内容也具备一定可读性，但其核心优势在于为机器",93400,"2026-04-06T19:52:38",[52,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":67,"readme_en":68,"readme_zh":69,"quickstart_zh":70,"use_case_zh":71,"hero_image_url":72,"owner_login":73,"owner_name":74,"owner_avatar_url":75,"owner_bio":76,"owner_company":77,"owner_location":78,"owner_email":77,"owner_twitter":77,"owner_website":79,"owner_url":80,"languages":81,"stars":94,"forks":95,"last_commit_at":96,"license":97,"difficulty_score":32,"env_os":98,"env_gpu":99,"env_ram":100,"env_deps":101,"category_tags":115,"github_topics":117,"view_count":32,"oss_zip_url":77,"oss_zip_packed_at":77,"status":17,"created_at":132,"updated_at":133,"faqs":134,"releases":135},9386,"ptyadana\u002FData-Science-and-Machine-Learning-Projects-Dojo","Data-Science-and-Machine-Learning-Projects-Dojo","collections of data science, machine learning and data visualization projects with pandas, sklearn, matplotlib, tensorflow2, Keras, various ML algorithms like random forest classifier, boosting, etc","Data-Science-and-Machine-Learning-Projects-Dojo 是一个汇聚了数据科学、机器学习及数据可视化实战项目的开源资源库。它旨在解决初学者和从业者在理论学习与实际操作之间存在的鸿沟，通过提供从基础统计分析到复杂深度学习的全流程代码示例，帮助用户将抽象的算法理论转化为具体的解决问题的能力。\n\n该项目非常适合数据科学开发者、人工智能研究人员以及希望提升实战技能的学生使用。无论是想入门 Pandas 进行数据清洗，还是希望深入掌握 TensorFlow、Keras 构建神经网络，亦或是学习如何利用 Scikit-learn 实现随机森林等经典算法，都能在这里找到对应的练习场景。其独特的技术亮点在于覆盖范围极广且紧跟行业主流：不仅囊括了 NumPy、Matplotlib、Seaborn 等基础库的应用，还整合了 GeoPandas 地理数据分析、NLTK 自然语言处理以及 Face Recognition 人脸识别等前沿领域案例。此外，项目还展示了如何将模型通过 Streamlit 或 Flask 快速部署为 Web 应用，并利用 Apache Spark","Data-Science-and-Machine-Learning-Projects-Dojo 是一个汇聚了数据科学、机器学习及数据可视化实战项目的开源资源库。它旨在解决初学者和从业者在理论学习与实际操作之间存在的鸿沟，通过提供从基础统计分析到复杂深度学习的全流程代码示例，帮助用户将抽象的算法理论转化为具体的解决问题的能力。\n\n该项目非常适合数据科学开发者、人工智能研究人员以及希望提升实战技能的学生使用。无论是想入门 Pandas 进行数据清洗，还是希望深入掌握 TensorFlow、Keras 构建神经网络，亦或是学习如何利用 Scikit-learn 实现随机森林等经典算法，都能在这里找到对应的练习场景。其独特的技术亮点在于覆盖范围极广且紧跟行业主流：不仅囊括了 NumPy、Matplotlib、Seaborn 等基础库的应用，还整合了 GeoPandas 地理数据分析、NLTK 自然语言处理以及 Face Recognition 人脸识别等前沿领域案例。此外，项目还展示了如何将模型通过 Streamlit 或 Flask 快速部署为 Web 应用，并利用 Apache Spark 处理大规模数据，为用户提供了从数据探索、模型训练到最终落地的完整闭环参考，是打磨数据技能的理想“道场”。","# Data Science, Machine Learning & Visualization Dojo\n\nCollections of Data Science & ML projects and dojo where I practice Data Science, Machine Learning, Deep Learning and Data Visualization related skills, theories, probability, statistics, etc.\n\n### Built with\n#### Machine Learing, Deep Learning, Data Science libraries\n+ [NumPy](https:\u002F\u002Fnumpy.org\u002F) - package for scientific computing with Python\n+ [Pandas](https:\u002F\u002Fpandas.pydata.org\u002F) - fast, powerful, flexible and easy to use open source data analysis and manipulation tool\n+ [Pandas Profiling](https:\u002F\u002Fpypi.org\u002Fproject\u002Fpandas-profiling\u002F) - generate reports from dataframe\n+ [Geo Pandas](https:\u002F\u002Fpypi.org\u002Fproject\u002Fgeopandas\u002F) - support for geographic data to pandas objects.\n+ [Scikit-learn](https:\u002F\u002Fscikit-learn.org\u002Fstable\u002F) - Simple and efficient tools for predictive data analysis\n+ [TensorFlow](https:\u002F\u002Fwww.tensorflow.org\u002F) - An end-to-end open source machine learning platform\n+ [Keras](https:\u002F\u002Fkeras.io\u002F) - Deep Learning framework\n+ [NLTK](https:\u002F\u002Fwww.nltk.org\u002F) - Natural Language Toolkit\n+ [dlib](http:\u002F\u002Fdlib.net\u002F) - A toolkit for making real world machine learning and data analysis applications in C++\n+ [Face Recognition](https:\u002F\u002Fgithub.com\u002Fageitgey\u002Fface_recognition) - The world's simplest facial recognition api for Python and the command line\n\n#### Data Visualization libraries\n+ [Matplotlib](https:\u002F\u002Fmatplotlib.org\u002F) - a comprehensive library for creating static, animated, and interactive visualizations in Python\n+ [Seaborn](https:\u002F\u002Fseaborn.pydata.org\u002F) - statistical data visualization\n+ [Bokeh](https:\u002F\u002Fdocs.bokeh.org\u002Fen\u002Flatest\u002Findex.html) - interactive visualization library for modern web browsers\n+ [Plotly](https:\u002F\u002Fplotly.com\u002F) - The front-end for ML and data science models\n+ [Cufflinks](https:\u002F\u002Fpypi.org\u002Fproject\u002Fcufflinks\u002F) - Productivity Tools for Plotly + Pandas\n\n#### Turning into Web applications\n+ [Streamlit](https:\u002F\u002Fwww.streamlit.io\u002F) - The fastest way to build and share data apps\n+ [Flask](https:\u002F\u002Fflask.palletsprojects.com\u002Fen\u002F1.1.x\u002F) - a micro web framework written in Python\n\n#### Spark\n+ [Apache Spark](https:\u002F\u002Fspark.apache.org\u002F) - a unified analytics engine for large-scale data processing.\n+ [Spark with pyspark](https:\u002F\u002Fspark.apache.org\u002Fdocs\u002Flatest\u002Fapi\u002Fpython\u002Findex.html) - PySpark is the collaboration of Apache Spark and Python\n+ [Databricks](https:\u002F\u002Fdatabricks.com\u002F) - Unified Data Analytics Platform - One cloud platform for massive scale data engineering and collaborative data science.\n\n#### Tools and Datasources\n+ [Jupyter Notebook](https:\u002F\u002Fjupyter.org\u002F) - Notebook system for data analysis\n+ [Google Colab](https:\u002F\u002Fcolab.research.google.com\u002F) - Great Notebook system by google, which give free access to GPUs\n+ [Kaggle](https:\u002F\u002Fwww.kaggle.com\u002F) - Source of Dataset collections\n+ [Plotly Chart Studio](https:\u002F\u002Fplotly.com\u002Fchart-studio\u002F) - The fastest way to publish & embed interactive charts online\n\n------------------------------------------------------------------\n\n# Projects\n\n## [Breast Cancer Tumor Diagnostic - Classification Project](Project%20-%20Breast%20Cancer%20Classification%20Project%20-%20SVM\u002FBreast_Cancer_Classification_Project.ipynb)\n+ The project is to build a machine learning model to predict whether the tumor is benign or malignant basedon several observations\u002Ffeatures.\n+ using data from [Breast Cancer Wisconsin (Diagnostic) Data Set - UCI](https:\u002F\u002Farchive.ics.uci.edu\u002Fml\u002Fdatasets\u002FBreast+Cancer+Wisconsin+(Diagnostic))\n\n## [Fandango movie ratings - Capstone Project](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F06-Capstone-Project\u002F00-Capstone-Project.ipynb)\nData Analysis and Visualization Capstone project from Machine Learning and Datascience Masterclass Course.\n+ This is the data behind the story [Be Suspicious Of Online Movie Ratings, Especially Fandango’s](http:\u002F\u002Ffivethirtyeight.com\u002Ffeatures\u002Ffandango-movies-ratings\u002F)\n+ using data from [538](https:\u002F\u002Fgithub.com\u002Ffivethirtyeight\u002Fdata)\n+ If you are planning on going out to see a movie, how well can you trust online reviews and ratings? Especially if the same company showing the rating also makes money by selling movie tickets.\n+ Do they have a bias towards rating movies higher than they should be rated?\n+ etc..\n\n## [Supervised Learning Capstone Project - Cohort Analysis & Customer Churn Predictions](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F17-Supervised-Learning-Capstone-Project\u002F00-Supervised-Learning-Capstone-Customer-Churn-Predictions-Project.ipynb)\n+ This project is to build a machine learning model to predict whether or not a customer will Churn or not.\n+ Includes cohort analysis based on Telco subsriber's contract type, etc.\n\n## [Predicting Heart Disease - Classification Project](Complete%20Machine%20Learning%20and%20Data%20Science%20-%20Zero%20to%20Mastery%20-%20AN\u002F11.Heart%20Disease%20Project\u002FMilestone%20Project%20-%20Heart%20Disease%20Classification.ipynb)\nMilestone project from Complete Machine Learning and Data Science - Zero to Mastery course.\n+ The project is to build a machine learning model capable of predicting whether or not someone has a Heart Disease based on their medical attributes.\n+ using data from [Heart Disease Data Set of UCI - kaggle version](https:\u002F\u002Fwww.kaggle.com\u002Fronitf\u002Fheart-disease-uci)\n\n## [Predicting Bulldozer Sale Price - Regression Project](Complete%20Machine%20Learning%20and%20Data%20Science%20-%20Zero%20to%20Mastery%20-%20AN\u002F12.Bulldozer%20Sales%20Price%20Prediction%20Project\u002FMilestone%20Project%20-%20Bluebook%20Bulldozer%20Price%20Prediction.ipynb)\nMilestone project from Complete Machine Learning and Data Science - Zero to Mastery course.\n+ The project is to build a machine learning model to predict the sale price of bulldozers based on the past prices.\n+ using data from [Blue Book for Bulldozers - kaggle version](https:\u002F\u002Fwww.kaggle.com\u002Fc\u002Fbluebook-for-bulldozers\u002Foverview)\n\n## [Deep Learning ANN Project - Dog breed predictions](Complete%20Machine%20Learning%20and%20Data%20Science%20-%20Zero%20to%20Mastery%20-%20AN\u002F14.Neural%20Network%20-%20Deep%20Learning%20-%20Transfer%20Learning%20with%20Tensorflow%202\u002FDog_Breed_Vision.ipynb)\nProject from Complete Machine Learning and Data Science - Zero to Mastery course.\n+ The project is to build deep learning model with Tensorflow to predict the dog breeds.\n+ using data from [Dog Breed Identification - kaggle version](https:\u002F\u002Fwww.kaggle.com\u002Fc\u002Fdog-breed-identification\u002Foverview)\n\n\n## [911 Calls - Data Capstone Project](Data%20Science%20and%20Machine%20Learning%20Bootcamp%20-%20JP\u002F10-Data-Capstone-Projects\u002FProject%20-%20911%20Calls\u002F01-911%20Calls%20Data%20Capstone%20Project%20-%20My%20Solutions.ipynb)\nData Analysis and Visualization Capstone project from Data Science and Machine Learning Bootcamp Course.\n+ analyzing 911 calls data from [kaggle](https:\u002F\u002Fwww.kaggle.com\u002Fmchirico\u002Fmontcoalert)\n+ top 5 zips code for 911 calls\n+ top 5 townships for 911 calls\n+ most common Reason for a 911\n+ different types of visualizations based on the findings\n+ etc..\n\n\n## [ML App - Random Forest Algorithm - ML Project](https:\u002F\u002Fgithub.com\u002Fptyadana\u002Fml-app)\n+ Machine learning app using streamlit, for building a regression model using the Random Forest algorithm.\n\n\n## Machine Learning & Data Science Projects\n### Masterclass Projects\n+ [Ames Housing Data Project - Linear Regression](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F10-Cross-Val-and-LinReg-Project\u002F02-Linear-Regression-Project-Exercise-MySolutions.ipynb)\n+ [Heart Disease Detection Project - Logistic Regression](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F11-Logistic-Regression-Models\u002F02-Logistic-Regression-Project-Exercise-MySolution%20.ipynb)\n+ [Sona Data - Detecting Rock or Mine Project - KNN](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F12-K-Nearest-Neighbors\u002F01-KNN-Exercise-MySolutions.ipynb)\n+ [Wine Fraud Detection Project - SVM](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F13-Support-Vector-Machines\u002F02-SVM-Project-Exercise-MySolutions.ipynb)\n+ [Mushroom Edible or Poisonous Prediction Project - with AdaBoost](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F16-Boosted-Trees\u002F00-AdaBoost.ipynb)\n+ [Mushroom Edible or Poisonous Prediction Project - with Gradient Boosting](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F16-Boosted-Trees\u002F01-Gradient-Boosting.ipynb)\n+ [Ecommerce Project - Linear Regression](Data%20Science%20and%20Machine%20Learning%20Bootcamp%20-%20JP\u002F11-Linear-Regression\u002F02-Linear%20Regression%20-%20Ecommerce%20Project.ipynb)\n+ [Advertisement Project - Logistic Regression](Data%20Science%20and%20Machine%20Learning%20Bootcamp%20-%20JP\u002F13-Logistic-Regression\u002F02-Logistic%20Regression%20-%20Advertisement%20Project.ipynb)\n+ [Anonymized Data Project - KNN](Data%20Science%20and%20Machine%20Learning%20Bootcamp%20-%20JP\u002F14-K-Nearest-Neighbors\u002F02-K%20Nearest%20Neighbors%20-%20Anonymized%20Data%20Project.ipynb)\n+ [Supervised Learning Capstone Project - Cohort Analysis & Customer Churn Predictions](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F17-Supervised-Learning-Capstone-Project\u002F00-Supervised-Learning-Capstone-Customer-Churn-Predictions-Project.ipynb)\n+ [NLP - Flight Tweets Sentiment Analysis - Classification](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F18-Naive-Bayes-and-NLP\u002F01-Text-Classification%20-%20Flight%20tweets.ipynb)\n+ [NLP - Moview Reivew Sentiment Analysis - Classification](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F18-Naive-Bayes-and-NLP\u002F02-Text-Classification-Movie_Reviews_Sentiment_Analysis.ipynb)\n+ [Color Quantization - KMeans](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F19-K-Means-Clustering\u002F01-Kmeans-Color-Quantization.ipynb)\n+ [CIA Country Analysis and Clustering - KMeans](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F19-K-Means-Clustering\u002F02-Kmeans-Clustering-Project-Exercise.ipynb)\n+ [Cars Model - Hierarchical Clustering](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F21-Hierarchical-Clustering\u002F00-Hierarchical-Clustering.ipynb)\n+ [Wholesale Customers - DBSCAN Clustering](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F22-DBSCAN\u002F02-DBSCAN-Project-Exercise.ipynb)\n+ [Breast Cancer - PCA Manual Implementation](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F23-Principal-Component-Analysis\u002F00-PCA-Manual-Implementation.ipynb)\n+ [Breast Cancer - PCA with sklearn](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F23-Principal-Component-Analysis\u002F01-PCA-with-ScikitLearn.ipynb)\n\n### Other Projects\n+ [Project - Used Car Price Prediction with XG-Boost](Project%20-%20Used%20Car%20Price%20Prediction%20-%20XG-Boost\u002FUsed_Car_Price_Prediction_using_XGBoost.ipynb)\n+ [Project - Predict Career Longevity for NBA Rookies with Binary Classification - Logistic Regression](Project%20-%20Predict%20Career%20Longevity%20for%20NBA%20Rookies%20with%20Binary%20Classification%20-%20Logistic%20Regression\u002FNBA_Rookies_Logistic_Regression.ipynb)\n+ [Project - Facial Classification - SVM](Project%20-%20Facial%20Classification%20-%20SVM)\n+ [Project - Predict Sales Revenue with Interaction Term - Multiple Linear Regression](Project%20-%20Predict%20Sales%20Revenue%20with%20Interaction%20Term%20-%20Multiple%20Linear%20Regression\u002FSales_Revenue_Prediction_with_Multiple_Linear_Regression.ipynb)\n+ [Project - Predict Sales Revenue - Simple Linear Regression](Project%20-%20Predict%20Sales%20Revenue%20-%20Simple%20Linear%20Regression\u002FSales_Revenue_Prediction_with_Simple_Linear_Regression.ipynb)\n+ [Project - Breast Cancer Tumor Diagnostic Classification  - SVM](Project%20-%20Breast%20Cancer%20Classification%20Project%20-%20SVM\u002FBreast_Cancer_Classification_Project.ipynb)\n+ [Project - Music Recommender](https:\u002F\u002Fgithub.com\u002Fptyadana\u002FML-Music-Recommender)\n+ [Project - Smarty Brain Image Prediction](https:\u002F\u002Fgithub.com\u002Fptyadana\u002FPython-Projects-Dojo\u002Ftree\u002Fmaster\u002F03.Complete%20Python%20Developer%20-%20Zero%20to%20Mastery%20-%20AN\u002F17.Machine%20Learning%20and%20Data%20Science\u002FML_SmartyBrain_Project)\n\n### Deep Learning Projects\n+ [Iris Flower Predictions App on Flask](https:\u002F\u002Firis-flower-app-by-ptyadana.herokuapp.com\u002F)\n+ [ANN - Loan Default Prediction Prediction Project](Data%20Science%20and%20Machine%20Learning%20Bootcamp%20-%20JP\u002F21.%20Neural%20Nets%20and%20Deep%20Learning\u002F04_Tensorflow_Keras_Project_Loan_Predictions.ipynb)\n+ [ANN - Predict House Price for House Sales in King County, USA Project](Data%20Science%20and%20Machine%20Learning%20Bootcamp%20-%20JP\u002F21.%20Neural%20Nets%20and%20Deep%20Learning\u002F02_Tensorflow_Regression_Housing_Price_Prediction.ipynb)\n+ [ANN - Breast Cancer Wisconsin (Diagnostic) Project](Data%20Science%20and%20Machine%20Learning%20Bootcamp%20-%20JP\u002F21.%20Neural%20Nets%20and%20Deep%20Learning\u002F03_Tensorflow_Classification_Breast_Cancer_Wisconsin_(Diagnostic).ipynb)\n+ [CNN - Convolutional Neural Networks for Image Classification - MNIST data Project](Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F04-CNNs\u002F01-Keras-CNN-MNIST%20(Grey%20Scale%20Images).ipynb)\n+ [CNN - Convolutional Neural Networks for Image Classification - CIFAR 10 data Project](Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F04-CNNs\u002F02-Keras-CNN-CIFAR-10%20(Color%20Images).ipynb)\n+ [CNN - Convolutional Neural Networks for Image Classification - Real Image - Malaria Detection Project](Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F04-CNNs\u002F03-Deep-Learning-Custom-Images-Malaria.ipynb)\n+ [CNN - Convolutional Neural Networks for Image Classification - Fashion MNIST Data Project](Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F04-CNNs\u002F04-DL-CV-Assessment%20-%20Fashion%20MNIST%20Data%20Project.ipynb)\n+ [RNN - Forzen Dessert Sales Forecasting with LSTM](Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F05-RNNs\u002F02-RNN-Forzen-Dessert-Montly-Sales-Forecasting.ipynb)\n+ [NLP - Yelp Reviews Classification - Natural Language Processing Project](Data%20Science%20and%20Machine%20Learning%20Bootcamp%20-%20JP\u002F22.%20Natural%20Language%20Processing\u002F02-NLP%20Project%20-%20Yelp.ipynb)\n+ [Average Eating Habits of UK Countries - Autoencoders](Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F07-AutoEncoders\u002F02-UK%20countries%20average%20eating%20habits%20-%20Autoencoder.ipynb)\n\n\n# Data Analysis and Visualization Projects\n+ [Data Visualization with Python - Project](https:\u002F\u002Fgithub.com\u002Fptyadana\u002FDV-Data-Visualization-with-Python): Data analysis and Data Visualization using Pandas, Matplotlib for Countries's GDP, Life Expectancy comparison across continents, GDP per Capita Relative Growth, Population Reative Growth comparison etc.\n+ [Fuel Economy Case Study - Project](Data%20Analysis\u002FProject%20-%20Case%20Study%202%20-%20Fuel%20Economy\u002FCase%20Study%20-%20Fuel%20Economy.ipynb): Analyzing [Fuel Economy Data provied by EPA](https:\u002F\u002Fwww.epa.gov\u002Fcompliance-and-fuel-economy-data\u002Fdata-cars-used-testing-fuel-economy) for distributions of greenhouse gas score, combined mpg in 2008 and 2018, correlation between displacement and combined mpg ,greenhouse gas score and combined mpg. Are more unique models using alternative fuels in 2018 compared to 2008? By how much? How much have vehicle classes improved in fuel economy (increased in mpg)? What are the characteristics of SmartWay vehicles? Have they changed over time? (mpg, greenhouse gas) What features are associated with better fuel economy (mpg)? What is the top vehicle which improved the most in terms of combined mpg from 2008 to 2018?\n+ [Wine Quality Case Study - Project](Data%20Analysis\u002FProject%20-%20Case%20Study%201%20-%20Wine%20Quality\u002FCase%20Study%20-%20Wine%20Quality.ipynb): Analyzing [wine data](https:\u002F\u002Farchive.ics.uci.edu\u002Fml\u002Fdatasets\u002FWine+Quality) for the following points for wine businesses to model better wine. Is a certain type of wine (red or white) associated with higher quality? What level of acidity (pH value) receives the highest average rating? Do wines with higher alcoholic content receive better ratings? Do sweeter wines (more residual sugar) receive better ratings? White Vs Red Wine Proportions by Color & Quality\n+ [TV, Halftime Shows, and the Big Game - Project](Project%20-%20TV%2C%20Halftime%20Shows%2C%20and%20the%20Big%20Game): Analyzing [Superbowls](https:\u002F\u002Fwww.datacamp.com\u002Fprojects\u002F684) data and answering questions like - What are the most extreme game outcomes? How does the game affect television viewership? How have viewership, TV ratings, and ad cost evolved over time? Who are the most prolific musicians in terms of halftime show performances?\n+ [Weather Trend - Project](Data%20Analysis\u002FProject%20-%20Weathaer%20Trend\u002FExploring%20Weather%20Trends%20Project.ipynb): Analyzing Global weather trends, Singapore weather trends, Comparing Global vs Singapore 10 years Moving Average trends\n+ [Real-time Insights from Social Media Data - Project](Project%20-%20Real-time%20Insights%20from%20Social%20Media%20Data): Analyzing [Twitter](https:\u002F\u002Fwww.datacamp.com\u002Fprojects\u002F760) data and answering questions like: What are gobal trend and local trends?, finding the common trends\n+ frequency analysis on tweets and hashtags, etc.\n+ [Statistics From Stock Data](AI%20Programming%20with%20Python\u002F01.Mini%20Project%20UDACITY%20-%20Statistics%20From%20Stock%20Data\u002FStatistics%20from%20Stock%20Data.ipynb): Analyzing google, apple and amzon stock prices and checking the rolling mean.\n+ [Android Play Store App Data Analysis - Project](Project%20-%20Android%20Play%20Store%20App%20Data%20Analysis): Analyzing andriod play store data and answering questions like - How many apps are paid? How much money are they making? When were these apps released?\n\n------------------------------------------------------------------\n\n# Bootcamps\n\n## [RL - Practical AI with Python and Reinforcement Learning - JP - On Hold](RL%20-%20Practical%20AI%20with%20Python%20and%20Reinforcement%20Learning%20-%20JP)\n+ [x] 00. NumPy Crash Course\n+ [x] 01. Matplotlib Visualization\n+ [x] 02. Pandas and Scikit-learn\n+ [x] 03. ANNs\n+ [x] 04. CNNs\n+ [x] 05. Introduction to gym\n+ [ ] 06. Classical Q Learning\n+ [ ] 07. Deep Q Learning\n+ [ ] 08. Deep Q Learning on Images\n+ [ ] 09. Creating Custom Open AI Gym Environment\n\n## [Tensorflow 2.0: Deep Learning and Artificial Intelligence - LP](Tensorflow%20-%20TF2%20Deep%20Learning%20and%20Artificial%20Intelligence%20-%20LP)\n+ [x] Section 2 - Google Colab\n+ [ ] Section 3 - Machine Learning and Neurons\n+ [ ] Section 4 - Feedforward Artifical Neural Networks\n+ [ ] Section 5 - CNN Convolutional Neural Networks\n+ [ ] Section 6 - RNN - Recurrent Neural Networks, Time Series, Sequence Data\n+ [ ] Section 7 - NLP\n+ [ ] Section 8 - Recommender Systems\n+ [ ] Section 9 - Transfer Learning for Computer Vision\n+ [ ] Section 10 - GANs\n+ [ ] Section 11 - Deep Reinforcement Learning (Theory)\n+ [ ] Section 12 - Stock Trading Project with DL\n+ [ ] Section 13: Advanced Tensorflow Usage\n+ [ ] Section 14: Low - Level Tensorflow\n+ [ ] Section 15: In-Depth: Loss Functions\n+ [ ] Section 16: In-Depth: Gradient Descent\n+ [ ] Section 17 - 21: Misc\n\n\n## [DeepLearning.AI - Course 04.Sequences, Time Series and Predictions in Tensorflow](Tensorflow%20-%20DeepLearning.AI%20-%2004.Sequences%2C%20Time%20Series%20and%20Predictions)\n+ [x] Week 01 - Sequences and Prediction\n+ [ ] Week 02 - Deep Neural Networks for Time Series\n+ [ ] Week 03 - Recurrent Neural Networks for Time Series\n+ [ ] Week 04 - Real-world time series data\n\n## [DeepLearning.AI - Course 03.Netural Language Processing in Tensorflow](Tensorflow%20-%20DeepLearning.AI%20-%2003.Natural%20Language%20Processing%20in%20TensorFlow)\n+ [x] Week 01 - Sentiment in Text\n+ [x] Week 02 - Word Embeddings\n+ [x] Week 03 - Sequence Models\n+ [x] Week 04 - Sequence Models and Literature\n\n## [DeepLearning.AI - Course 02.Convolutional Neural Networks in TensorFlow](Tensorflow%20-%20DeepLearning.AI%20-%2002.Convolutional%20Neural%20Networks%20in%20TensorFlow)\n+ [x] Week 01 - Exploring a Larger Dataset\n+ [x] Week 02 - Augmentation: A technique to avoid overfitting\n+ [x] Week 03 - Transfer Learning\n+ [x] Week 04 - Multiclass Classification\n\n## [DeepLearning.AI - Course 01.Introduction to TensorFlow for Artificial Intelligence, Machine Learning, and Deep Learning](Tensorflow%20-%20DeepLearning.AI%20-%2001.Introduction%20to%20TensorFlow%20for%20AI%2C%20ML%20and%20DL_v2)\n+ [x] Week 01 - A New Programming Paradigm\n+ [x] Week 02 - Introduction to Computer Vision\n+ [x] Week 03 - Enhancing Vision with CNN\n+ [x] Week 04 - Using Real-world images\n\n## [Deep Learning TensorFlow Developer Certificate - ZTM - IN PROGRESS](Tensorflow%20-%20Deep%20Learning%20TensorFlow%20Developer%20Certificate%20-%20ZTM)\n+ [x] 01. Introduction\n+ [x] 02. Deep Learning and Tensorflow Fundamentals\n+ [ ] 03. Neural Network Regression with Tensorflow\n+ [ ] 04. Neural Network Classification with Tensorflow\n+ [ ] 05. Computer Vision and Convolutional Neural Networks in Tensorflow\n+ [ ] 06. Transfer Learning - Feature Extraction\n+ [ ] 07. Transfer Learning - Fine Tuning\n+ [ ] 08. Transfer Learning - Scaling up\n+ [ ] 09. Milestone Project 1 - Food Vision Big\n+ [ ] 10. NLP Fundamentals in Tensorflow\n+ [ ] 11. Milestone Project 2 - SkimLit\n+ [ ] 12. Timseries Fundamentals + Milestone Project 3 - BitPredict\n+ [ ] 13. Passing Tensorflow Certificate Exam\n+ [ ] 15. Appendix - Machine Learning Primer\n+ [ ] 16. Appendix - Machine Learning Framework\n+ [ ] 14, 17-19. Misc\n\n\n## [Complete Tensorflow 2 and Keras Deep Learning Bootcamp - JP](Tensorflow%20-%20Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP)\n+ NumPy Crash Course\n+ Pandas Crash Course\n+ Visualization Crash Course\n+ Basic Artifical Neural Networks - ANNs\n\t+ [Basic Keras Project](Tensorflow%20-%20Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F03-ANNs\u002F00-Keras-Syntax-Basics.ipynb)\n\t+ [Predict House Price for House Sales in King County, USA - Regression Project](Tensorflow%20-%20Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F03-ANNs\u002F01-Keras-Regression.ipynb)\n\t+ [Breast Cancer Wisconsin (Diagnostic) - Classification Project](Tensorflow%20-%20Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F03-ANNs\u002F02-Keras-Classification.ipynb)\n\t+ [Loan Default Prediction Prediction - Classification Project](Tensorflow%20-%20Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F03-ANNs\u002F03-Keras-Project-Exercise-Loan-Predictions.ipynb)\n\t+ Tensorboard\n+ Convolutional Neural Networks - CNNs\n\t+ [Convolutional Neural Networks for Image Classification - MNIST data](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F04-CNNs\u002F01-Keras-CNN-MNIST%20(Grey%20Scale%20Images).ipynb)\n\t+ [Convolutional Neural Networks for Image Classification - CIFAR 10 data](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F04-CNNs\u002F02-Keras-CNN-CIFAR-10%20(Color%20Images).ipynb)\n\t+ [Convolutional Neural Networks for Image Classification - Real Image - Malaria Detection Project](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F04-CNNs\u002F03-Deep-Learning-Custom-Images-Malaria.ipynb)\n\t+ [Convolutional Neural Networks for Image Classification - Fashion MNIST Data Project](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F04-CNNs\u002F04-DL-CV-Assessment%20-%20Fashion%20MNIST%20Data%20Project.ipynb)\n+ Recurrent Neural Networks - RNNs\n\t+ [Sinewave Example](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F05-RNNs\u002F00-RNN-SineWave-Example.ipynb)\n\t+ [RNN Example for Time Series - Advance Monthly Sales for Retail and Food Services](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F05-RNNs\u002F01-RNN-Time-Series-Example-MonthlySalesRetailFood.ipynb)\n\t+ [RNN Forzen Dessert Sales Forecasting with LSTM](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F05-RNNs\u002F02-RNN-Forzen-Dessert-Montly-Sales-Forecasting.ipynb)\n\t+ [Multivariate Time Series with RNN](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F05-RNNs\u002F04-Multivariate-Time-Series-RNN.ipynb)\n+ Natural Language Processing - NLP\n\t+ [Generating Text with RNNs - Shakespeare](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F06-NLP-and-Text-Data\u002F00_Generating_Text_with_RNNs_Shakespears.ipynb)\n+ Auto Encoders\n\t+ [AutoEncoders for Dimensionality Reduction](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F07-AutoEncoders\u002F00-Autoencoders.ipynb)\n\t+ [AutoEncoders on Image Data - Dimensionality Reduction & Noise Removal](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F07-AutoEncoders\u002F01-Autoencoders-on-Image-Data.ipynb)\n\t+ [Average Eating Habits of UK Countries - Autoencoders](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F07-AutoEncoders\u002F02-UK%20countries%20average%20eating%20habits%20-%20Autoencoder.ipynb)\n+ Generative Adverserial Networks - GANs\n\t+ [GANs - Generative Adverserial Networks with Dense Layers](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F08-GANs\u002F00-Generative-Adversarial-Networks-with-Dense-Layers.ipynb)\n\t+ [DCGANs - Deep Convolutional Generative Adverserial Networks](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F08-GANs\u002F01-DCGANS.ipynb)\n+ Deployment\n\t+ [Iris Flower Predictions Project](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F09-Deployment\u002F00-Iris-Flower-Predictions-Basic-Model.ipynb)\n\t+ [Iris Flower Predictions App on Flask](https:\u002F\u002Firis-flower-app-by-ptyadana.herokuapp.com\u002F)\n\n## [Machine Learning & Data Science Masterclass - JP](Machine%20Learning%20&%20Data%20Science%20Masterclass%20-%20JP)\n+ new track [2021 Python for Machine Learning & Data Science Masterclass](https:\u002F\u002Fwww.udemy.com\u002Fcourse\u002Fpython-for-machine-learning-data-science-masterclass\u002F)\n+ Python Crash Course\n+ NumPy\n+ Pandas\n+ Matplotlib\n+ Seaborn Data Visualizations\n+ Data Analysis and Data Visualization Capstone Project\n    + [Fandango Vs other sites movie ratings](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F06-Capstone-Project)\n+ Linear Regression Models\n+ Feature Engineering and Data Preparation\n+ Cross Validation, Grid Search and Linear Regression Project\n\t+ [Ames Housing Data Project](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F10-Cross-Val-and-LinReg-Project\u002F02-Linear-Regression-Project-Exercise-MySolutions.ipynb)\n+ Logistic Regression Models\n\t+ [Heart Disease Detection Project](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F11-Logistic-Regression-Models\u002F02-Logistic-Regression-Project-Exercise-MySolution%20.ipynb)\n+ KNN - K Nearest Neighbors\n\t+ [Sona Data - Detecting Rock or Mine Project](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F12-K-Nearest-Neighbors\u002F01-KNN-Exercise-MySolutions.ipynb)\n+ SVM - Support Vector Machines\n\t+ [Wine Fraud Detection Project](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F13-Support-Vector-Machines\u002F02-SVM-Project-Exercise-MySolutions.ipynb)\n+ Tree Based Methods - Decision Tree Learning\n+ Random Forests\n+ Boosting Methods\n\t+ [Mushroom Edible or Poisonous Prediction Project - with AdaBoost](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F16-Boosted-Trees\u002F00-AdaBoost.ipynb)\n\t+ [Mushroom Edible or Poisonous Prediction Project - with Gradient Boosting](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F16-Boosted-Trees\u002F01-Gradient-Boosting.ipynb)\n+ [Supervised Learning Capstone Project - Cohort Analysis & Customer Churn Predictions](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F17-Supervised-Learning-Capstone-Project\u002F00-Supervised-Learning-Capstone-Customer-Churn-Predictions-Project.ipynb)\n+ Naive Bayes Classification and Natural Language Processing (Supervised Learning)\n\t+ [NLP - Feature Extraction](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F18-Naive-Bayes-and-NLP\u002F00-Feature-Extraction-From-Text.ipynb)\n\t+ [Flight Tweets Sentiment Analysis - Classification](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F18-Naive-Bayes-and-NLP\u002F01-Text-Classification%20-%20Flight%20tweets.ipynb)\n\t+ [Moview Reivew Sentiment Analysis - Classification](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F18-Naive-Bayes-and-NLP\u002F02-Text-Classification-Movie_Reviews_Sentiment_Analysis.ipynb)\n+ K Means Clustering (Unsupervised Learning)\n\t+ [Color Quantization](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F19-K-Means-Clustering\u002F01-Kmeans-Color-Quantization.ipynb)\n\t+ [CIA Country Analysis and Clustering](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F19-K-Means-Clustering\u002F02-Kmeans-Clustering-Project-Exercise.ipynb)\n+ Hierarchical Clustering (Unsupervised Learning)\n\t+ [Cars Model Clustering](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F21-Hierarchical-Clustering\u002F00-Hierarchical-Clustering.ipynb)\n+ DBSCAN (Unsupervised Learning)\n\t+ [DBSCAN - Theory and Inituation](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F22-DBSCAN\u002F00-DBSCAN.ipynb)\n\t+ [Hyperparameter Tuning](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F22-DBSCAN\u002F01-DBSCAN-Hyperparameters.ipynb)\n\t+ [Wholesale Customers - Clustering](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F22-DBSCAN\u002F02-DBSCAN-Project-Exercise.ipynb)\n+ Principal Component Analysis (Unsupervised Learning)\n\t+ [PCA Manual Implementation](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F23-Principal-Component-Analysis\u002F00-PCA-Manual-Implementation.ipynb)\n\t+ [PCA with sklearn](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F23-Principal-Component-Analysis\u002F01-PCA-with-ScikitLearn.ipynb)\n\t+ [PCA - Handwritten Digits classifications](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F23-Principal-Component-Analysis\u002F02-PCA-Exercise-Project.ipynb)\n+ Model Deployment\n\t+ Serving model as API with Flask\n\n## [Complete Machine Learning and Data Science - Zero to Mastery](Complete%20Machine%20Learning%20and%20Data%20Science%20-%20Zero%20to%20Mastery%20-%20AN)\n+ Data Analysis with Pandas\n+ Data Analysis with NumPy\n+ Linear Regression with Polyfit - Data 36\n+ Matplotlib - Data Visualizations\n+ Scikit-learn - Creating Machine Learning Models\n+ [Milestone Project - Supervised Learning (Classification)- Heart Disease Detection](Complete%20Machine%20Learning%20and%20Data%20Science%20-%20Zero%20to%20Mastery%20-%20AN\u002F11.Heart%20Disease%20Project\u002FMilestone%20Project%20-%20Heart%20Disease%20Classification.ipynb)\n+ [Milestone Project - Supervised Learning (Regression)- Bulldozer Sales Price Prediction](Complete%20Machine%20Learning%20and%20Data%20Science%20-%20Zero%20to%20Mastery%20-%20AN\u002F12.Bulldozer%20Sales%20Price%20Prediction%20Project\u002FMilestone%20Project%20-%20Bluebook%20Bulldozer%20Price%20Prediction.ipynb)\n+ [Deep Learning Project - Dog breed predictions](Complete%20Machine%20Learning%20and%20Data%20Science%20-%20Zero%20to%20Mastery%20-%20AN\u002F14.Neural%20Network%20-%20Deep%20Learning%20-%20Transfer%20Learning%20with%20Tensorflow%202\u002FDog_Breed_Vision.ipynb)\n\n\n## [ML - Machine Learning & Data Science A-Z Hands-on Python - NS](ML%20-%20Machine%20Learning%20%26%20Data%20Science%20A-Z%20Hands-on%20Python%20-%20NS)\n+ [x] 03. Preprocessing\n+ [x] 04. Machine Learning Types\n+ [x] 05. Supervised Learning - Classification\n+ [x] 06. Supervised Learning - Regression\n+ [x] 07. Unsupervised Learning - Clustering\n+ [x] 08. Hyper Parameters Optimization\n\n\n\n## [Data Science and Machine Learning Bootcamp](Data%20Science%20and%20Machine%20Learning%20Bootcamp%20-%20JP)\n+ Python Crash Course\n+ Python for Data Analysis - NumPy\n+ Python for Data Analysis - Pandas\n+ Python for Data Visualization - Matplotlib\n+ Python for Data Visualization - Seaborn\n+ Pandas Built In Data Visualization\n+ Visualization with Plotly and Cufflinks\n+ Data Capstone Projects\n    + [911 Calls - Data Capstone Project](Data%20Science%20and%20Machine%20Learning%20Bootcamp%20-%20JP\u002F10-Data-Capstone-Projects\u002FProject%20-%20911%20Calls\u002F01-911%20Calls%20Data%20Capstone%20Project%20-%20My%20Solutions.ipynb)\n+ Linear Regression\n\t+ [Ecommerce Project](Data%20Science%20and%20Machine%20Learning%20Bootcamp%20-%20JP\u002F11-Linear-Regression\u002F02-Linear%20Regression%20-%20Ecommerce%20Project.ipynb)\n+ Logistic Regression\n\t+ [20Advertisement Project](Data%20Science%20and%20Machine%20Learning%20Bootcamp%20-%20JP\u002F13-Logistic-Regression\u002F02-Logistic%20Regression%20-%20Advertisement%20Project.ipynb)\n+ K Nearest Neighbors (KNN)\n\t+ [20Anonymized Data Project](Data%20Science%20and%20Machine%20Learning%20Bootcamp%20-%20JP\u002F14-K-Nearest-Neighbors\u002F02-K%20Nearest%20Neighbors%20-%20Anonymized%20Data%20Project.ipynb)\n+ Decision Tree and Random Forest\n\t+ [Loan Prediction Project](Data%20Science%20and%20Machine%20Learning%20Bootcamp%20-%20JP\u002F15-Decision-Trees-and-Random-Forests\u002F02-Decision%20Trees%20and%20Random%20Forest%20-%20Loan%20Prediction%20Project.ipynb)\n+ Support Vector Machine (SVM)\n\t+\t[Iris Flower Project](Data%20Science%20and%20Machine%20Learning%20Bootcamp%20-%20JP\u002F16-Support-Vector-Machines\u002F02-Support%20Vector%20Machines%20Project.ipynb)\n+ K Means Clustering\n+ Principal Component Analysis\n+ Recommender Systems\n+ Natural Language Processing\n\t+ [Yelp Reviews Classification](Data%20Science%20and%20Machine%20Learning%20Bootcamp%20-%20JP\u002F22.%20Natural%20Language%20Processing\u002F02-NLP%20Project%20-%20Yelp.ipynb)\n+ Neural Nets and Deep Learning\n\t+ [Regression Project - Predict House Price for House Sales in King County, USA](Data%20Science%20and%20Machine%20Learning%20Bootcamp%20-%20JP\u002F21.%20Neural%20Nets%20and%20Deep%20Learning\u002F02_Tensorflow_Regression_Housing_Price_Prediction.ipynb)\n\t+ [Classification Project - Breast Cancer Wisconsin (Diagnostic)](Data%20Science%20and%20Machine%20Learning%20Bootcamp%20-%20JP\u002F21.%20Neural%20Nets%20and%20Deep%20Learning\u002F03_Tensorflow_Classification_Breast_Cancer_Wisconsin_(Diagnostic).ipynb)\n\t+ [Final Project - Classification - Loan Default Prediction](Data%20Science%20and%20Machine%20Learning%20Bootcamp%20-%20JP\u002F21.%20Neural%20Nets%20and%20Deep%20Learning\u002F04_Tensorflow_Keras_Project_Loan_Predictions.ipynb)\n\t+ TensorBoard\n+ Big Data and Spark with Python\n+ SciPy\n\n## Complete Data Science Bootcamp - 365\n+ [x] Part 1 - The Field of Data Science\n+ [x] Part 2 - Probability\n+ [ ] Part 3 - Statistics (Descriptive & Inferential)\n+ [x] Part 4 - Python\n+ [ ] Part 5 - Advanced Statistical Methods in Python \u002F Machine Learning in Python\n+ [x] Part 6 - Mathematics\n+ [ ] Part 7 - Deep Learning\n+ [ ] Software Integration\n+ [ ] Case Study - Absenteeism\n\n------------------------------------------------------------------\n# Books\n\n## [Hands-On Machine Learning with Scikit-Learn, Keras, and TensorFlow (in progress)](Hands-On%20Machine%20Learning%20with%20Scikit-Learn%2C%20Keras%2C%20and%20TensorFlow)\n+ [x] The Fundamentals of Machine Learning\n+ [x] The Machine Learning Landscape\n+ [x] End-to-End Machine Learning Project\n+ [x] Classification\n+ [ ] Training Models\n\n## The Hundreded page - Machine Learning book\n- [x] Introduction\n- [x] Notation and Definitions\n- [x] Fundamental Algorithms\n- [x] Anatomy of a Learning Algorithm\n- [x] Basic Practice \n- [ ] Neural Networks and Deep Learning \n- [ ] Problems and Solutions\n- [ ] Advanced Practice\n- [ ] Unsupervised Learning\n- [ ] Unsupervised Learning - in-depth material\n- [ ] Other Forms of Learning\n- [ ] Conclusion\n------------------------------------------------------------------\n\n# Advancing Machine Learning & Data Science Journey - (In Progress)\nTo skill up my ML & DS related skills in specific areas and topics:\n\n## [Applied Machine Learning - Ensemble Learning](ML%20-%20Applied%20Machine%20Learning%20-%20Ensemble%20Learning)\n+ Project: Titanic dataset\n+ 01.ML Basic\n+ 02.Preparing the Data\n+ 03.Ensemble Learning\n+ 04.Boosting\n+ 05.Bagging\n+ 06.Stacking\n+ 07.Evaluation and Selection of  Models\n\n## [Applied Machine Learning - Feature Engineering](ML%20-%20Applied%20Machine%20Learning%20-%20Feature%20Engineering)\n+ Project: Titanic dataset\n+ 01.ML Basic\n+ 02.Intro to Feature Engineering\n+ 03.Explore Data\n+ 04.Create and Clean Features\n+ 05.Prepare Features for Modelling\n+ 06.Compare and Evaluate Models\n\n## [Applied Machine Learning - Algorithms](ML%20-%20Applied%20Machine%20Learning%20-%20Algorithms)\n+ Project: Titanic dataset\n+ 01.Review of Foundation\n+ 02.Logistic Regression\n+ 03.Support Vector Machine\n+ 04.Multi-layer Perceptron\n+ 05.Random Forest\n+ 06.Boosting\n+ 07.Final Model Selection and Evaluation\n\n## [Applied Machine Learning - Foundation](ML%20-%20Applied%20Machine%20Learning%20-%20Foundation)\n+ Project: Titanic dataset\n+ 01.ML Basic\n+ 02.Exploratory Data Analysis and Data Cleaning\n+ 03.Evaluation - Measuring Success\n+ 04.Optimizing a Model\n+ 05.End to End Pipeline\n\n## [ML - Mistakes to avoid in Machine Learning](ML%20-%20Mistakes%20to%20avoid%20in%20Machine%20Learning\u002F01_00_Mistakes%20to%20avoid%20in%20Machine%20Learning.ipynb)\n+ [x] Assuming Data is good to go\n+ [x] Neglecting to consult subject matter experts\n+ [x] Overtiffing your models\n+ [x] Not standardizing your data\n+ [x] Focusing on Wrong Factors\n+ [x] Data Leakage\n+ [x] Forgetting traditional statistics tools\n+ [x] Assuming Deployment is a breeze\n+ [x] Assuming Machine Learning is the answer\n+ [x] Developing in a silo\n+ [x] Not treating for imbalanced sampling\n+ [x] Interpreting your coefficients without properly treating for multicollinearity\n+ [x] Evaluating by accuracy alone\n+ [x] Giving overly technical presentations\n\n\n## Deep Learning , Machine Learning, AI & Data Science\n- [x] [Deep Learning - Natural Language Processing with TensorFlow](Tensorflow%20-%20Natural%20Language%20Proceessing%20with%20Tensorflow)\n- [ ] [Deep Learning - Face Recognition](DL%20-%20Deep%20Learning%20-%20Face%20Recognition\u002FmyNotes.ipynb)\n- [x] [Deep Learning - Image Recognition](DL%20-%20Deep%20Learning%20-%20Image%20Recognition)\n- [x] [Deep Learning - Buliding Deep Learning Applications with Keras 2.0](DL%20-%20Deep%20Learning%20-%20Building%20Deep%20Learning%20Applications)\n- [x] [Applied Machine Learning - Ensemble Learning](ML%20-%20Applied%20Machine%20Learning%20-%20Ensemble%20Learning)\n- [x] [Applied Machine Learning - Feature Engineering](ML%20-%20Applied%20Machine%20Learning%20-%20Feature%20Engineering)\n- [x] [Applied Machine Learning - Algorithms](ML%20-%20Applied%20Machine%20Learning%20-%20Algorithms)\n- [x] [Applied Machine Learning - Foundation](ML%20-%20Applied%20Machine%20Learning%20Foundation)\n- [x] [Machine Learning with Python - 03_k-Means Clustering](ML%20-%20Machine%20Learning%20with%20Python%20-%2003_k-Means%20Clustering)\n- [x] [Machine Learning with Python - 02_Decision Trees](ML%20-%20Machine%20Learning%20with%20Python%20-%2002_Decision%20Trees)\n- [x] [Machine Learning with Python - 01_Foundations](ML%20-%20Machine%20Learning%20with%20Python%20-%2001_Foundations)\n- [x] [ML - Mistakes to avoid in Machine Learning](ML%20-%20Mistakes%20to%20avoid%20in%20Machine%20Learning\u002F01_00_Mistakes%20to%20avoid%20in%20Machine%20Learning.ipynb)\n- [x] [ML - Classification Modelling with Iris flowers](Project%20-%20Classification%20Modelling%20with%20Iris%20flowers%20-%20Pinata%20Data)\n- [x] [Data Science A-Z Modeling](DS%20-%20Data%20Science%20A-Z)\n- [x] [Designing for Neural Networks and AI Interfaces](ML%20-%20Designing%20for%20Neural%20Networks%20and%20AI%20Interfaces\u002Fmynotes.md)\n- [x] Introduction to GPT-3: A Leap in Artificial Intelligence\n\n\n## Data Analysis, Manipulation & Data Visualization\n+ [ ] [DA & DV - Python Data Analysis & Visualization Masterclass](DV%20-%20Python%20Data%20Analysis%20%26%20Visualization%20Masterclass%20-%20CS)\n+ [x] [Pandas - Pandas Code Challenges](Pandas%20-%20Pandas%20Code%20Challenge)\n+ [x] [Pandas - Advanced Pandas](Pandas%20-%20Advanced%20Pandas)\n+ [x] [DV - Data Visualizations with Plotly](DV%20-%20Data%20Visualizations%20with%20Plotly)\n+ [x] [DA - Data Analysis with Pandas and Python - BP](Pandas%20-%20Data%20Analysis%20with%20Pandas%20and%20Python%20-%20BP)\n+ [x] [DA - Python Data Playbook - Cleaning Data](Pandas%20-%20Python%20Data%20Playbook%20-%20Cleaning%20Data)\n+ [x] [Pandas - Pandas Playbook - Manipulating Data](Pandas%20-%20Pandas%20Playbook%20-%20Manipulating%20Data)\n+ [x] [More Python Data Tools - Microsoft](More%20Python%20Data%20Tools%20-%20Microsoft)\n\n## Apache Spark & PySpark\n- [x] [Intro to Spark SQL and DataFrames](Spark%20-%20Spark%20SQL%20and%20Data%20Frames)\n- [x] [Apache Spark Essential Training](Spark%20-%20Apache%20Spark%20Essential%20Training)\n- [ ] Spark for Machine Learning & AI\n- [x] [Apache PySpark by Example](Spark%20-%20Apache%20PySpark%20by%20Example)\n- [x] [Apache Spark Deep Learning Essential Training](Spark%20-%20Apache%20Spark%20Deep%20Learning%20Essential%20Training)\n\n## Data Scientist Reading Materials\n+ Supervised Learning\n\t+ [x] Lesson 01: Machine Learning Bird's Eye View\n\t+ [ ] Lesson 02: Linear Regression\n\t+ [ ] Lesson 03: Perceptron Algorithm\n\t+ [ ] Lesson 04: Decision Trees\n\t+ [ ] Lesson 05: Naive Bayes\n\t+ [ ] Lesson 06: Support Vector Machines\n\t+ [ ] Lesson 07: Ensemble Methods\n\t+ [x] Lesson 08: Model Evaluation Metrics\n\t+ [ ] Lesson 09: Training and Tuning\n\t+ [ ] Lesson 10: Finding Donors Project\n\t\n## [Kaggle Courses](https:\u002F\u002Fwww.kaggle.com\u002Flearn)\n- [x] Python\n- [x] Pandas\n- [x] Data Cleaning\n- [x] Introduction to Machine Learning\n- [x] Machine Learning Intermediate\n- [ ] Feature Engineering\n- [ ] Machine Learning Explaniability\n- [x] Data Visualization\n- [ ] Intro to Deep Learning\n- [ ] Intro to Game AI and Reinforcement Learning\n- [ ] Natural Language Processing\n- [ ] Micro-challenges\n- [ ] Computer Vision\n- [ ] Intro to SQL\n- [ ] Advanced SQL\n\n## [Google ML courses](https:\u002F\u002Fdevelopers.google.com\u002Fmachine-learning\u002Fcrash-course)\n- [ ] ML Crash Course\n- [ ] Problem Framing\n- [ ] Data Prep\n- [ ] Clustering\n- [ ] Recommendation\n- [ ] Testing and Debugging\n- [ ] GANs\n\n\n## [Probability & Statistics (in progress)](Probability%20%26%20Statistics)\n+ Linear Regression Analysis\n+ Multi Regression Analysis\n+ Pratical Statistics\n    + [Admission Case Study with Python (Simpson's Paradox)](Probability%20%26%20Statistics\u002FPratical%20Statistics\u002FAdmission%20Case%20Study)\n\t+ Simulating Coin Flips & Probability\n\t+ Stimulating multiple Coin Flips & Bionmial Distribution\n\t+ Cancer Test Results\n\t+ Conditional Probability & Bayes Rules\n+ Excel Data Manipulation, Analysis and Visualization\n\n\n## [Data Science Math Skills - Duke University](https:\u002F\u002Fwww.coursera.org\u002Flearn\u002Fdatasciencemathskills)\nTopics include:\n+ Set theory, including Venn diagrams\n+ Properties of the real number line\n+ etc\n\n\n## License\nThis project is licensed under the MIT License - see the [LICENSE.md](LICENSE) file for details\n","# 数据科学、机器学习与可视化道场\n\n数据科学与机器学习项目的集合，也是我练习数据科学、机器学习、深度学习和数据可视化相关技能、理论、概率、统计等内容的道场。\n\n### 使用的技术栈\n#### 机器学习、深度学习、数据科学库\n+ [NumPy](https:\u002F\u002Fnumpy.org\u002F) - 用于Python科学计算的包\n+ [Pandas](https:\u002F\u002Fpandas.pydata.org\u002F) - 快速、强大、灵活且易于使用的开源数据分析和操作工具\n+ [Pandas Profiling](https:\u002F\u002Fpypi.org\u002Fproject\u002Fpandas-profiling\u002F) - 从DataFrame生成报告\n+ [Geo Pandas](https:\u002F\u002Fpypi.org\u002Fproject\u002Fgeopandas\u002F) - 为Pandas对象提供地理数据支持。\n+ [Scikit-learn](https:\u002F\u002Fscikit-learn.org\u002Fstable\u002F) - 简单高效的预测性数据分析工具\n+ [TensorFlow](https:\u002F\u002Fwww.tensorflow.org\u002F) - 一个端到端的开源机器学习平台\n+ [Keras](https:\u002F\u002Fkeras.io\u002F) - 深度学习框架\n+ [NLTK](https:\u002F\u002Fwww.nltk.org\u002F) - 自然语言处理工具包\n+ [dlib](http:\u002F\u002Fdlib.net\u002F) - 用于在C++中构建实际机器学习和数据分析应用的工具包\n+ [Face Recognition](https:\u002F\u002Fgithub.com\u002Fageitgey\u002Fface_recognition) - 全球最简单的Python和命令行人脸识别API\n\n#### 数据可视化库\n+ [Matplotlib](https:\u002F\u002Fmatplotlib.org\u002F) - 一个功能全面的库，用于在Python中创建静态、动画和交互式可视化\n+ [Seaborn](https:\u002F\u002Fseaborn.pydata.org\u002F) - 统计数据可视化\n+ [Bokeh](https:\u002F\u002Fdocs.bokeh.org\u002Fen\u002Flatest\u002Findex.html) - 面向现代Web浏览器的交互式可视化库\n+ [Plotly](https:\u002F\u002Fplotly.com\u002F) - ML和数据科学模型的前端\n+ [Cufflinks](https:\u002F\u002Fpypi.org\u002Fproject\u002Fcufflinks\u002F) - Plotly + Pandas的生产力工具\n\n#### 转化为Web应用\n+ [Streamlit](https:\u002F\u002Fwww.streamlit.io\u002F) - 构建和分享数据应用的最快方式\n+ [Flask](https:\u002F\u002Fflask.palletsprojects.com\u002Fen\u002F1.1.x\u002F) - 用Python编写的微型Web框架\n\n#### Spark\n+ [Apache Spark](https:\u002F\u002Fspark.apache.org\u002F) - 用于大规模数据处理的统一分析引擎。\n+ [Spark with pyspark](https:\u002F\u002Fspark.apache.org\u002Fdocs\u002Flatest\u002Fapi\u002Fpython\u002Findex.html) - PySpark是Apache Spark与Python的结合\n+ [Databricks](https:\u002F\u002Fdatabricks.com\u002F) - 统一的数据分析平台 - 一个云平台，适用于大规模数据工程和协作式数据科学。\n\n#### 工具与数据源\n+ [Jupyter Notebook](https:\u002F\u002Fjupyter.org\u002F) - 用于数据分析的Notebook系统\n+ [Google Colab](https:\u002F\u002Fcolab.research.google.com\u002F) - Google提供的优秀Notebook系统，可免费使用GPU\n+ [Kaggle](https:\u002F\u002Fwww.kaggle.com\u002F) - 数据集资源库\n+ [Plotly Chart Studio](https:\u002F\u002Fplotly.com\u002Fchart-studio\u002F) - 在线发布和嵌入交互式图表的最快方式\n\n------------------------------------------------------------------\n\n# 项目\n\n## [乳腺癌肿瘤诊断 - 分类项目](Project%20-%20Breast%20Cancer%20Classification%20Project%20-%20SVM\u002FBreast_Cancer_Classification_Project.ipynb)\n+ 该项目旨在基于多项观察和特征，构建一个机器学习模型来预测肿瘤是良性还是恶性。\n+ 使用来自[威斯康星州乳腺癌（诊断）数据集 - UCI](https:\u002F\u002Farchive.ics.uci.edu\u002Fml\u002Fdatasets\u002FBreast+Cancer+Wisconsin+(Diagnostic))的数据。\n\n## [Fandango电影评分 - 终身项目](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F06-Capstone-Project\u002F00-Capstone-Project.ipynb)\n来自机器学习和数据科学大师班课程的数据分析与可视化终身项目。\n+ 这是故事[对在线电影评分要保持警惕，尤其是Fandango的评分](http:\u002F\u002Ffivethirtyeight.com\u002Ffeatures\u002Ffandango-movies-ratings\u002F)背后的数据。\n+ 使用来自[538](https:\u002F\u002Fgithub.com\u002Ffivethirtyeight\u002Fdata)的数据。\n+ 如果你打算去看电影，你能多大程度信任在线评论和评分呢？特别是当提供评分的公司同时也通过销售电影票获利时。\n+ 他们是否倾向于将电影评分定得比实际更高？\n+ 等等。\n\n## [监督学习终身项目 - 队列分析与客户流失预测](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F17-Supervised-Learning-Capstone-Project\u002F00-Supervised-Learning-Capstone-Customer-Churn-Predictions-Project.ipynb)\n+ 该项目旨在构建一个机器学习模型，以预测客户是否会流失。\n+ 包括基于电信用户合同类型等因素的队列分析。\n\n## [心脏病预测 - 分类项目](Complete%20Machine%20Learning%20and%20Data%20Science%20-%20Zero%20to%20Mastery%20-%20AN\u002F11.Heart%20Disease%20Project\u002FMilestone%20Project%20-%20Heart%20Disease%20Classification.ipynb)\n来自“零基础到精通”完整机器学习和数据科学课程的里程碑项目。\n+ 该项目旨在构建一个能够根据个人医疗属性预测其是否患有心脏病的机器学习模型。\n+ 使用来自[UCI心脏病数据集 - Kaggle版本](https:\u002F\u002Fwww.kaggle.com\u002Fronitf\u002Fheart-disease-uci)的数据。\n\n## [推土机售价预测 - 回归项目](Complete%20Machine%20Learning%20and%20Data%20Science%20-%20Zero%20to%20Mastery%20-%20AN\u002F12.Bulldozer%20Sales%20Price%20Prediction%20Project\u002FMilestone%20Project%20-%20Bluebook%20Bulldozer%20Price%20Prediction.ipynb)\n来自“零基础到精通”完整机器学习和数据科学课程的里程碑项目。\n+ 该项目旨在构建一个机器学习模型，以根据历史价格预测推土机的售价。\n+ 使用来自[推土机蓝皮书 - Kaggle版本](https:\u002F\u002Fwww.kaggle.com\u002Fc\u002Fbluebook-for-bulldozers\u002Foverview)的数据。\n\n## [深度学习ANN项目 - 狗品种预测](Complete%20Machine%20Learning%20and%20Data%20Science%20-%20Zero%20to%20Mastery%20-%20AN\u002F14.Neural%20Network%20-%20Deep%20Learning%20-%20Transfer%20Learning%20with%20Tensorflow%202\u002FDog_Breed_Vision.ipynb)\n来自“零基础到精通”完整机器学习和数据科学课程的项目。\n+ 该项目旨在使用TensorFlow构建深度学习模型，以预测狗的品种。\n+ 使用来自[狗品种识别 - Kaggle版本](https:\u002F\u002Fwww.kaggle.com\u002Fc\u002Fdog-breed-identification\u002Foverview)的数据。\n\n## [911报警电话数据Capstone项目](Data%20Science%20and%20Machine%20Learning%20Bootcamp%20-%20JP\u002F10-Data-Capstone-Projects\u002FProject%20-%20911%20Calls\u002F01-911%20Calls%20Data%20Capstone%20Project%20-%20My%20Solutions.ipynb)\n数据科学与机器学习训练营课程中的数据分析与可视化Capstone项目。\n+ 分析来自[kaggle](https:\u002F\u002Fwww.kaggle.com\u002Fmchirico\u002Fmontcoalert)的911报警电话数据\n+ 911报警电话数量最多的前5个邮政编码\n+ 911报警电话数量最多的前5个乡镇\n+ 911报警最常见的原因\n+ 基于分析结果的不同类型可视化图表\n+ 等等。\n\n\n## [ML应用 - 随机森林算法 - ML项目](https:\u002F\u002Fgithub.com\u002Fptyadana\u002Fml-app)\n+ 使用Streamlit构建的机器学习应用，用于利用随机森林算法建立回归模型。\n\n\n## 机器学习与数据科学项目\n### 大师班项目\n+ [Ames住房数据项目 - 线性回归](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F10-Cross-Val-and-LinReg-Project\u002F02-Linear-Regression-Project-Exercise-MySolutions.ipynb)\n+ [心脏病检测项目 - 逻辑回归](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F11-Logistic-Regression-Models\u002F02-Logistic-Regression-Project-Exercise-MySolution%20.ipynb)\n+ [Sona数据 - 检测岩石或矿石项目 - KNN](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F12-K-Nearest-Neighbors\u002F01-KNN-Exercise-MySolutions.ipynb)\n+ [葡萄酒欺诈检测项目 - SVM](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F13-Support-Vector-Machines\u002F02-SVM-Project-Exercise-MySolutions.ipynb)\n+ [蘑菇可食用性预测项目 - AdaBoost](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F16-Boosted-Trees\u002F00-AdaBoost.ipynb)\n+ [蘑菇可食用性预测项目 - 梯度提升](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F16-Boosted-Trees\u002F01-Gradient-Boosting.ipynb)\n+ [电商项目 - 线性回归](Data%20Science%20and%20Machine%20Learning%20Bootcamp%20-%20JP\u002F11-Linear-Regression\u002F02-Linear%20Regression%20-%20Ecommerce%20Project.ipynb)\n+ [广告项目 - 逻辑回归](Data%20Science%20and%20Machine%20Learning%20Bootcamp%20-%20JP\u002F13-Logistic-Regression\u002F02-Logistic%20Regression%20-%20Advertisement%20Project.ipynb)\n+ [匿名化数据项目 - KNN](Data%20Science%20and%20Machine%20Learning%20Bootcamp%20-%20JP\u002F14-K-Nearest-Neighbors\u002F02-K%20Nearest%20Neighbors%20-%20Anonymized%20Data%20Project.ipynb)\n+ [监督学习Capstone项目 - 队列分析与客户流失预测](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F17-Supervised-Learning-Capstone-Project\u002F00-Supervised-Learning-Capstone-Customer-Churn-Predictions-Project.ipynb)\n+ [NLP - 航班推文情感分析 - 分类](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F18-Naive-Bayes-and-NLP\u002F01-Text-Classification%20-%20Flight%20tweets.ipynb)\n+ [NLP - 电影评论情感分析 - 分类](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F18-Naive-Bayes-and-NLP\u002F02-Text-Classification-Movie_Reviews_Sentiment_Analysis.ipynb)\n+ [颜色量化 - KMeans](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F19-K-Means-Clustering\u002F01-Kmeans-Color-Quantization.ipynb)\n+ [CIA国家分析与聚类 - KMeans](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F19-K-Means-Clustering\u002F02-Kmeans-Clustering-Project-Exercise.ipynb)\n+ [汽车型号 - 层次聚类](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F21-Hierarchical-Clustering\u002F00-Hierarchical-Clustering.ipynb)\n+ [批发客户 - DBSCAN聚类](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F22-DBSCAN\u002F02-DBSCAN-Project-Exercise.ipynb)\n+ [乳腺癌 - PCA手动实现](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F23-Principal-Component-Analysis\u002F00-PCA-Manual-Implementation.ipynb)\n+ [乳腺癌 - 使用sklearn的PCA](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F23-Principal-Component-Analysis\u002F01-PCA-with-ScikitLearn.ipynb)\n\n### 其他项目\n+ [项目 - 使用XG-Boost预测二手车价格](Project%20-%20Used%20Car%20Price%20Prediction%20-%20XG-Boost\u002FUsed_Car_Price_Prediction_using_XGBoost.ipynb)\n+ [项目 - 使用二分类 - 逻辑回归预测NBA新秀的职业生涯长度](Project%20-%20Predict%20Career%20Longevity%20for%20NBA%20Rookies%20with%20Binary%20Classification%20-%20Logistic%20Regression\u002FNBA_Rookies_Logistic_Regression.ipynb)\n+ [项目 - 面部分类 - SVM](Project%20-%20Facial%20Classification%20-%20SVM)\n+ [项目 - 使用交互项预测销售收入 - 多元线性回归](Project%20-%20Predict%20Sales%20Revenue%20with%20Interaction%20Term%20-%20Multiple%20Linear%20Regression\u002FSales_Revenue_Prediction_with_Multiple_Linear_Regression.ipynb)\n+ [项目 - 预测销售收入 - 简单线性回归](Project%20-%20Predict%20Sales%20Revenue%20-%20Simple%20Linear%20Regression\u002FSales_Revenue_Prediction_with_Simple_Linear_Regression.ipynb)\n+ [项目 - 乳腺癌肿瘤诊断分类 - SVM](Project%20-%20Breast%20Cancer%20Classification%20Project%20-%20SVM\u002FBreast_Cancer_Classification_Project.ipynb)\n+ [项目 - 音乐推荐系统](https:\u002F\u002Fgithub.com\u002Fptyadana\u002FML-Music-Recommender)\n+ [项目 - Smarty Brain图像预测](https:\u002F\u002Fgithub.com\u002Fptyadana\u002FPython-Projects-Dojo\u002Ftree\u002Fmaster\u002F03.Complete%20Python%20Developer%20-%20Zero%20to%20Mastery%20-%20AN\u002F17.Machine%20Learning%20and%20Data%20Science\u002FML_SmartyBrain_Project)\n\n### 深度学习项目\n+ [基于Flask的鸢尾花预测应用](https:\u002F\u002Firis-flower-app-by-ptyadana.herokuapp.com\u002F)\n+ [ANN - 贷款违约预测项目](数据科学与机器学习训练营 - JP\u002F21.神经网络与深度学习\u002F04_Tensorflow_Keras_Project_Loan_Predictions.ipynb)\n+ [ANN - 美国金县房屋销售房价预测项目](数据科学与机器学习训练营 - JP\u002F21.神经网络与深度学习\u002F02_Tensorflow_Regression_Housing_Price_Prediction.ipynb)\n+ [ANN - 威斯康星州乳腺癌（诊断）项目](数据科学与机器学习训练营 - JP\u002F21.神经网络与深度学习\u002F03_Tensorflow_Classification_Breast_Cancer_Wisconsin_(Diagnostic).ipynb)\n+ [CNN - 用于图像分类的卷积神经网络 - MNIST数据项目](完整TensorFlow 2与Keras深度学习训练营 - JP\u002F04-CNNs\u002F01-Keras-CNN-MNIST（灰度图像）.ipynb)\n+ [CNN - 用于图像分类的卷积神经网络 - CIFAR 10数据项目](完整TensorFlow 2与Keras深度学习训练营 - JP\u002F04-CNNs\u002F02-Keras-CNN-CIFAR-10（彩色图像）.ipynb)\n+ [CNN - 用于图像分类的卷积神经网络 - 实际图像 - 疟疾检测项目](完整TensorFlow 2与Keras深度学习训练营 - JP\u002F04-CNNs\u002F03-Deep-Learning-Custom-Images-Malaria.ipynb)\n+ [CNN - 用于图像分类的卷积神经网络 - Fashion MNIST数据项目](完整TensorFlow 2与Keras深度学习训练营 - JP\u002F04-CNNs\u002F04-DL-CV-Assessment - Fashion MNIST数据项目.ipynb)\n+ [RNN - 使用LSTM进行冷冻甜品销售预测](完整TensorFlow 2与Keras深度学习训练营 - JP\u002F05-RNNs\u002F02-RNN-Forzen-Dessert-Montly-Sales-Forecasting.ipynb)\n+ [NLP - Yelp评论分类 - 自然语言处理项目](数据科学与机器学习训练营 - JP\u002F22.自然语言处理\u002F02-NLP项目 - Yelp.ipynb)\n+ [英国各国家平均饮食习惯 - 自编码器](完整TensorFlow 2与Keras深度学习训练营 - JP\u002F07-AutoEncoders\u002F02-英国各国平均饮食习惯 - 自编码器.ipynb)\n\n\n# 数据分析与可视化项目\n+ [Python数据可视化项目](https:\u002F\u002Fgithub.com\u002Fptyadana\u002FDV-Data-Visualization-with-Python)：使用Pandas和Matplotlib对各国GDP、各大洲间预期寿命对比、人均GDP相对增长、人口相对增长等进行数据分析与可视化。\n+ [燃油经济性案例研究 - 项目](数据分析\u002F项目 - 案例研究2 - 燃油经济性\u002F案例研究 - 燃油经济性.ipynb)：分析[美国环保署提供的燃油经济性数据](https:\u002F\u002Fwww.epa.gov\u002Fcompliance-and-fuel-economy-data\u002Fdata-cars-used-testing-fuel-economy)，探讨温室气体评分分布、2008年与2018年的综合油耗、排量与综合油耗之间的相关性、温室气体评分与综合油耗的关系。2018年相比2008年，使用替代燃料的独特车型是否更多？多了多少？不同车辆级别的燃油经济性（油耗提升）有何改善？SmartWay车辆有哪些特征？这些特征随时间有变化吗？（油耗、温室气体）哪些因素与更好的燃油经济性（油耗）相关？从2008年到2018年，综合油耗提升最多的车型是哪一款？\n+ [葡萄酒质量案例研究 - 项目](数据分析\u002F项目 - 案例研究1 - 红酒质量\u002F案例研究 - 红酒质量.ipynb)：分析[葡萄酒数据](https:\u002F\u002Farchive.ics.uci.edu\u002Fml\u002Fdatasets\u002FWine+Quality)，为葡萄酒企业更好地酿造葡萄酒提供参考。特定类型的葡萄酒（红葡萄酒或白葡萄酒）是否与更高的品质相关？酸度（pH值）处于哪个水平时获得最高平均评分？酒精含量较高的葡萄酒是否评分更高？含糖量较高的甜型葡萄酒是否评分更高？按颜色与质量划分的红白葡萄酒比例\n+ [电视、中场秀与超级碗 - 项目](项目 - 电视、中场秀与超级碗)：分析[超级碗](https:\u002F\u002Fwww.datacamp.com\u002Fprojects\u002F684)的数据，并回答以下问题——比赛结果最悬殊的是哪些？比赛对电视收视率有何影响？收视率、电视评分及广告成本随时间如何变化？在中场秀表演方面，哪些音乐人最为活跃？\n+ [天气趋势 - 项目](数据分析\u002F项目 - 天气趋势\u002F探索天气趋势项目.ipynb)：分析全球天气趋势、新加坡天气趋势，比较全球与新加坡过去10年的移动平均趋势。\n+ [社交媒体数据实时洞察 - 项目](项目 - 社交媒体数据实时洞察)：分析[Twitter](https:\u002F\u002Fwww.datacamp.com\u002Fprojects\u002F760)的数据，并回答诸如“全球趋势和本地趋势分别是什么？”、“寻找共同趋势”以及“对推文和标签进行频率分析”等问题。\n+ [股票数据统计](AI编程与Python\u002F01.迷你项目UDACITY - 股票数据统计\u002F股票数据统计.ipynb)：分析谷歌、苹果和亚马逊的股价，并计算滚动平均值。\n+ [Android Play商店应用数据分析 - 项目](项目 - Android Play商店应用数据分析)：分析Android Play商店的数据，并回答诸如“有多少应用是付费的？它们赚了多少钱？这些应用是什么时候发布的？”等问题。\n\n------------------------------------------------------------------\n\n# 训练营\n\n## [RL - Python与强化学习实践AI - JP - 暂停](RL - 实践AI与Python和强化学习 - JP)\n+ [x] 00. NumPy速成课程\n+ [x] 01. Matplotlib可视化\n+ [x] 02. Pandas与Scikit-learn\n+ [x] 03. ANNs\n+ [x] 04. CNNs\n+ [x] 05. gym入门\n+ [ ] 06. 经典Q学习\n+ [ ] 07. 深度Q学习\n+ [ ] 08. 图像上的深度Q学习\n+ [ ] 09. 创建自定义Open AI Gym环境\n\n## [TensorFlow 2.0：深度学习与人工智能 - LP](Tensorflow%20-%20TF2%20Deep%20Learning%20and%20Artificial%20Intelligence%20-%20LP)\n+ [x] 第2节 - Google Colab\n+ [ ] 第3节 - 机器学习与神经元\n+ [ ] 第4节 - 前馈人工神经网络\n+ [ ] 第5节 - CNN 卷积神经网络\n+ [ ] 第6节 - RNN - 循环神经网络、时间序列、序列数据\n+ [ ] 第7节 - NLP\n+ [ ] 第8节 - 推荐系统\n+ [ ] 第9节 - 面向计算机视觉的迁移学习\n+ [ ] 第10节 - GAN\n+ [ ] 第11节 - 深度强化学习（理论）\n+ [ ] 第12节 - 使用深度学习的股票交易项目\n+ [ ] 第13节：TensorFlow 高级用法\n+ [ ] 第14节：低级别 TensorFlow\n+ [ ] 第15节：深入解析——损失函数\n+ [ ] 第16节：深入解析——梯度下降\n+ [ ] 第17至21节：其他\n\n## [DeepLearning.AI - 课程 04. TensorFlow 中的序列、时间序列与预测](Tensorflow%20-%20DeepLearning.AI%20-%2004.Sequences%2C%20Time%20Series%20and%20Predictions)\n+ [x] 第01周 - 序列与预测\n+ [ ] 第02周 - 用于时间序列的深度神经网络\n+ [ ] 第03周 - 用于时间序列的循环神经网络\n+ [ ] 第04周 - 真实世界的时间序列数据\n\n## [DeepLearning.AI - 课程 03. TensorFlow 中的自然语言处理](Tensorflow%20-%20DeepLearning.AI%20-%2003.Natural%20Language%20Processing%20in%20TensorFlow)\n+ [x] 第01周 - 文本情感分析\n+ [x] 第02周 - 词嵌入\n+ [x] 第03周 - 序列模型\n+ [x] 第04周 - 序列模型与文学\n\n## [DeepLearning.AI - 课程 02. TensorFlow 中的卷积神经网络](Tensorflow%20-%20DeepLearning.AI%20-%2002.Convolutional%20Neural%20Networks%20in%20TensorFlow)\n+ [x] 第01周 - 探索更大的数据集\n+ [x] 第02周 - 数据增强：一种避免过拟合的技术\n+ [x] 第03周 - 迁移学习\n+ [x] 第04周 - 多分类问题\n\n## [DeepLearning.AI - 课程 01. 面向人工智能、机器学习和深度学习的 TensorFlow 入门](Tensorflow%20-%20DeepLearning.AI%20-%2001.Introduction%20to%20TensorFlow%20for%20AI%2C%20ML%20and%20DL_v2)\n+ [x] 第01周 - 一种新的编程范式\n+ [x] 第02周 - 计算机视觉入门\n+ [x] 第03周 - 使用 CNN 提升视觉能力\n+ [x] 第04周 - 使用真实世界图像\n\n## [深度学习 TensorFlow 开发者证书 - ZTM - 进行中](Tensorflow%20-%20Deep%20Learning%20TensorFlow%20Developer%20Certificate%20-%20ZTM)\n+ [x] 01. 导言\n+ [x] 02. 深度学习与 TensorFlow 基础\n+ [ ] 03. 使用 TensorFlow 的神经网络回归\n+ [ ] 04. 使用 TensorFlow 的神经网络分类\n+ [ ] 05. TensorFlow 中的计算机视觉与卷积神经网络\n+ [ ] 06. 迁移学习——特征提取\n+ [ ] 07. 迁移学习——微调\n+ [ ] 08. 迁移学习——扩展规模\n+ [ ] 09. 阶段性项目 1 - Food Vision Big\n+ [ ] 10. TensorFlow 中的 NLP 基础\n+ [ ] 11. 阶段性项目 2 - SkimLit\n+ [ ] 12. 时间序列基础 + 阶段性项目 3 - BitPredict\n+ [ ] 13. 通过 TensorFlow 证书考试\n+ [ ] 15. 附录 - 机器学习入门\n+ [ ] 16. 附录 - 机器学习框架\n+ [ ] 14、17至19. 其他\n\n## [TensorFlow 2 和 Keras 深度学习完全训练营 - 日语版](Tensorflow%20-%20Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP)\n+ NumPy 急救课程\n+ Pandas 急救课程\n+ 可视化急救课程\n+ 基础人工神经网络 - ANNs\n\t+ [基础 Keras 项目](Tensorflow%20-%20Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F03-ANNs\u002F00-Keras-Syntax-Basics.ipynb)\n\t+ [预测美国金县房屋销售价格 - 回归项目](Tensorflow%20-%20Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F03-ANNs\u002F01-Keras-Regression.ipynb)\n\t+ [威斯康星州乳腺癌（诊断）数据 - 分类项目](Tensorflow%20-%20Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F03-ANNs\u002F02-Keras-Classification.ipynb)\n\t+ [贷款违约预测 - 分类项目](Tensorflow%20-%20Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F03-ANNs\u002F03-Keras-Project-Exercise-Loan-Predictions.ipynb)\n\t+ TensorBoard\n+ 卷积神经网络 - CNNs\n\t+ [用于图像分类的卷积神经网络 - MNIST 数据](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F04-CNNs\u002F01-Keras-CNN-MNIST%20(Grey%20Scale%20Images).ipynb)\n\t+ [用于图像分类的卷积神经网络 - CIFAR 10 数据](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F04-CNNs\u002F02-Keras-CNN-CIFAR-10%20(Color%20Images).ipynb)\n\t+ [用于图像分类的卷积神经网络 - 真实图像 - 疟疾检测项目](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F04-CNNs\u002F03-Deep-Learning-Custom-Images-Malaria.ipynb)\n\t+ [用于图像分类的卷积神经网络 - 时尚 MNIST 数据项目](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F04-CNNs\u002F04-DL-CV-Assessment%20-%20Fashion%20MNIST%20Data%20Project.ipynb)\n+ 循环神经网络 - RNNs\n\t+ [正弦波示例](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F05-RNNs\u002F00-RNN-SineWave-Example.ipynb)\n\t+ [时间序列的 RNN 示例 - 零售和餐饮业月度销售额](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F05-RNNs\u002F01-RNN-Time-Series-Example-MonthlySalesRetailFood.ipynb)\n\t+ [使用 LSTM 进行冷冻甜点月度销售预测的 RNN](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F05-RNNs\u002F02-RNN-Forzen-Dessert-Montly-Sales-Forecasting.ipynb)\n\t+ [多变量时间序列与 RNN](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F05-RNNs\u002F04-Multivariate-Time-Series-RNN.ipynb)\n+ 自然语言处理 - NLP\n\t+ [使用 RNN 生成文本 - 莎士比亚](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F06-NLP-and-Text-Data\u002F00_Generating_Text_with_RNNs_Shakespears.ipynb)\n+ 自编码器\n\t+ [用于降维的自编码器](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F07-AutoEncoders\u002F00-Autoencoders.ipynb)\n\t+ [基于图像数据的自编码器 - 降维与去噪](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F07-AutoEncoders\u002F01-Autoencoders-on-Image-Data.ipynb)\n\t+ [英国各地区平均饮食习惯 - 自编码器](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F07-AutoEncoders\u002F02-UK%20countries%20average%20eating%20habits%20-%20Autoencoder.ipynb)\n+ 生成对抗网络 - GANs\n\t+ [GANs - 使用密集层的生成对抗网络](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F08-GANs\u002F00-Generative-Adversarial-Networks-with-Dense-Layers.ipynb)\n\t+ [DCGANs - 深度卷积生成对抗网络](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F08-GANs\u002F01-DCGANS.ipynb)\n+ 部署\n\t+ [鸢尾花预测项目](Tensorflow%20-%20Complete%20Tensorflow%202%20and%20Keras%20Deep%20Learning%20Bootcamp%20-%20JP\u002F09-Deployment\u002F00-Iris-Flower-Predictions-Basic-Model.ipynb)\n\t+ [基于 Flask 的鸢尾花预测应用](https:\u002F\u002Firis-flower-app-by-ptyadana.herokuapp.com\u002F)\n\n## [机器学习与数据科学大师班 - JP](Machine%20Learning%20&%20Data%20Science%20Masterclass%20-%20JP)\n+ 新课程 [2021 Python 机器学习与数据科学大师班](https:\u002F\u002Fwww.udemy.com\u002Fcourse\u002Fpython-for-machine-learning-data-science-masterclass\u002F)\n+ Python 快速入门\n+ NumPy\n+ Pandas\n+ Matplotlib\n+ Seaborn 数据可视化\n+ 数据分析与数据可视化综合项目\n    + [Fandango 与其他网站的电影评分对比](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F06-Capstone-Project)\n+ 线性回归模型\n+ 特征工程与数据预处理\n+ 交叉验证、网格搜索与线性回归项目\n\t+ [Ames 房屋数据项目](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F10-Cross-Val-and-LinReg-Project\u002F02-Linear-Regression-Project-Exercise-MySolutions.ipynb)\n+ 逻辑回归模型\n\t+ [心脏病检测项目](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F11-Logistic-Regression-Models\u002F02-Logistic-Regression-Project-Exercise-MySolution%20.ipynb)\n+ KNN - K 最近邻\n\t+ [Sona 数据 - 检测岩石或矿石项目](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F12-K-Nearest-Neighbors\u002F01-KNN-Exercise-MySolutions.ipynb)\n+ SVM - 支持向量机\n\t+ [葡萄酒欺诈检测项目](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F13-Support-Vector-Machines\u002F02-SVM-Project-Exercise-MySolutions.ipynb)\n+ 基于树的方法 - 决策树学习\n+ 随机森林\n+ 提升方法\n\t+ [蘑菇可食用性预测项目 - 使用 AdaBoost](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F16-Boosted-Trees\u002F00-AdaBoost.ipynb)\n\t+ [蘑菇可食用性预测项目 - 使用梯度提升](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F16-Boosted-Trees\u002F01-Gradient-Boosting.ipynb)\n+ [监督学习综合项目 - 队列分析与客户流失预测](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F17-Supervised-Learning-Capstone-Project\u002F00-Supervised-Learning-Capstone-Customer-Churn-Predictions-Project.ipynb)\n+ 朴素贝叶斯分类与自然语言处理（监督学习）\n\t+ [NLP - 特征提取](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F18-Naive-Bayes-and-NLP\u002F00-Feature-Extraction-From-Text.ipynb)\n\t+ [航班推文情感分析 - 分类](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F18-Naive-Bayes-and-NLP\u002F01-Text-Classification%20-%20Flight%20tweets.ipynb)\n\t+ [电影评论情感分析 - 分类](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F18-Naive-Bayes-and-NLP\u002F02-Text-Classification-Movie_Reviews_Sentiment_Analysis.ipynb)\n+ K 均值聚类（无监督学习）\n\t+ [颜色量化](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F19-K-Means-Clustering\u002F01-Kmeans-Color-Quantization.ipynb)\n\t+ [CIA 国家分析与聚类](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F19-K-Means-Clustering\u002F02-Kmeans-Clustering-Project-Exercise.ipynb)\n+ 层次聚类（无监督学习）\n\t+ [汽车型号聚类](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F21-Hierarchical-Clustering\u002F00-Hierarchical-Clustering.ipynb)\n+ DBSCAN（无监督学习）\n\t+ [DBSCAN - 理论与入门](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F22-DBSCAN\u002F00-DBSCAN.ipynb)\n\t+ [超参数调优](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F22-DBSCAN\u002F01-DBSCAN-Hyperparameters.ipynb)\n\t+ [批发客户 - 聚类](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F22-DBSCAN\u002F02-DBSCAN-Project-Exercise.ipynb)\n+ 主成分分析（无监督学习）\n\t+ [PCA 手动实现](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F23-Principal-Component-Analysis\u002F00-PCA-Manual-Implementation.ipynb)\n\t+ [使用 sklearn 的 PCA](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F23-Principal-Component-Analysis\u002F01-PCA-with-ScikitLearn.ipynb)\n\t+ [PCA - 手写数字分类](Machine%20Learning%20%26%20Data%20Science%20Masterclass%20-%20JP\u002F23-Principal-Component-Analysis\u002F02-PCA-Exercise-Project.ipynb)\n+ 模型部署\n\t+ 使用 Flask 将模型作为 API 提供服务\n\n## [完整机器学习与数据科学 - 从零到精通](Complete%20Machine%20Learning%20and%20Data%20Science%20-%20Zero%20to%20Mastery%20-%20AN)\n+ 使用 Pandas 进行数据分析\n+ 使用 NumPy 进行数据分析\n+ 使用 Polyfit 进行线性回归 - 数据 36\n+ Matplotlib - 数据可视化\n+ Scikit-learn - 构建机器学习模型\n+ [里程碑项目 - 监督学习（分类）- 心脏病检测](Complete%20Machine%20Learning%20and%20Data%20Science%20-%20Zero%20to%20Mastery%20-%20AN\u002F11.Heart%20Disease%20Project\u002FMilestone%20Project%20-%20Heart%20Disease%20Classification.ipynb)\n+ [里程碑项目 - 监督学习（回归）- 推土机销售价格预测](Complete%20Machine%20Learning%20and%20Data%20Science%20-%20Zero%20to%20Mastery%20-%20AN\u002F12.Bulldozer%20Sales%20Price%20Prediction%20Project\u002FMilestone%20Project%20-%20Bluebook%20Bulldozer%20Price%20Prediction.ipynb)\n+ [深度学习项目 - 犬种预测](Complete%20Machine%20Learning%20and%20Data%20Science%20-%20Zero%20to%20Mastery%20-%20AN\u002F14.Neural%20Network%20-%20Deep%20Learning%20-%20Transfer%20Learning%20with%20Tensorflow%202\u002FDog_Breed_Vision.ipynb)\n\n\n## [ML - 机器学习与数据科学 A-Z 实战 Python - NS](ML%20-%20Machine%20Learning%20%26%20Data%20Science%20A-Z%20Hands-on%20Python%20-%20NS)\n+ [x] 03. 数据预处理\n+ [x] 04. 机器学习类型\n+ [x] 05. 监督学习 - 分类\n+ [x] 06. 监督学习 - 回归\n+ [x] 07. 无监督学习 - 聚类\n+ [x] 08. 超参数优化\n\n## [数据科学与机器学习训练营](数据科学与机器学习训练营 - JP)\n+ Python速成课程\n+ 用于数据分析的Python - NumPy\n+ 用于数据分析的Python - Pandas\n+ 用于数据可视化的Python - Matplotlib\n+ 用于数据可视化的Python - Seaborn\n+ Pandas内置数据可视化\n+ 使用Plotly和Cufflinks进行可视化\n+ 数据综合项目\n    + [911报警电话 - 数据综合项目](数据科学与机器学习训练营 - JP\u002F10-数据综合项目\u002F项目 - 911报警电话\u002F01-911报警电话数据综合项目 - 我的解决方案.ipynb)\n+ 线性回归\n\t+ [电子商务项目](数据科学与机器学习训练营 - JP\u002F11-线性回归\u002F02-线性回归 - 电子商务项目.ipynb)\n+ 逻辑回归\n\t+ [广告投放项目](数据科学与机器学习训练营 - JP\u002F13-逻辑回归\u002F02-逻辑回归 - 广告投放项目.ipynb)\n+ K近邻算法 (KNN)\n\t+ [匿名化数据项目](数据科学与机器学习训练营 - JP\u002F14-K近邻算法\u002F02-K近邻算法 - 匿名化数据项目.ipynb)\n+ 决策树与随机森林\n\t+ [贷款预测项目](数据科学与机器学习训练营 - JP\u002F15-决策树与随机森林\u002F02-决策树与随机森林 - 贷款预测项目.ipynb)\n+ 支持向量机 (SVM)\n\t+ [鸢尾花分类项目](数据科学与机器学习训练营 - JP\u002F16-支持向量机\u002F02-支持向量机项目.ipynb)\n+ K均值聚类\n+ 主成分分析\n+ 推荐系统\n+ 自然语言处理\n\t+ [Yelp评论分类](数据科学与机器学习训练营 - JP\u002F22. 自然语言处理\u002F02-NLP项目 - Yelp.ipynb)\n+ 神经网络与深度学习\n\t+ [回归项目 - 预测美国金县房屋销售价格](数据科学与机器学习训练营 - JP\u002F21. 神经网络与深度学习\u002F02_Tensorflow_Regression_Housing_Price_Prediction.ipynb)\n\t+ [分类项目 - 威斯康星州乳腺癌诊断数据](数据科学与机器学习训练营 - JP\u002F21. 神经网络与深度学习\u002F03_Tensorflow_Classification_Breast_Cancer_Wisconsin_(Diagnostic).ipynb)\n\t+ [最终项目 - 分类 - 贷款违约预测](数据科学与机器学习训练营 - JP\u002F21. 神经网络与深度学习\u002F04_Tensorflow_Keras_Project_Loan_Predictions.ipynb)\n\t+ TensorBoard\n+ 大数据与Spark结合Python\n+ SciPy\n\n## 完整数据科学训练营 - 365\n+ [x] 第一部分 - 数据科学领域\n+ [x] 第二部分 - 概率论\n+ [ ] 第三部分 - 统计学（描述性与推断性）\n+ [x] 第四部分 - Python\n+ [ ] 第五部分 - Python中的高级统计方法 \u002F Python中的机器学习\n+ [x] 第六部分 - 数学\n+ [ ] 第七部分 - 深度学习\n+ [ ] 软件集成\n+ [ ] 案例研究 - 缺勤问题\n\n------------------------------------------------------------------\n# 书籍\n\n## [动手学机器学习：使用Scikit-Learn、Keras和TensorFlow（进行中）](动手学机器学习与Scikit-Learn、Keras和TensorFlow)\n+ [x] 机器学习基础\n+ [x] 机器学习全景\n+ [x] 端到端机器学习项目\n+ [x] 分类\n+ [ ] 训练模型\n\n## 百页机器学习书\n- [x] 引言\n- [x] 符号与定义\n- [x] 基础算法\n- [x] 学习算法的构成\n- [x] 基本实践 \n- [ ] 神经网络与深度学习 \n- [ ] 问题与解答\n- [ ] 高级实践\n- [ ] 无监督学习\n- [ ] 无监督学习 - 深入内容\n- [ ] 其他形式的学习\n- [ ] 结论\n------------------------------------------------------------------\n\n# 推进机器学习与数据科学之旅 - （进行中）\n为了在特定领域和主题上提升我的ML和DS相关技能：\n\n## [应用机器学习 - 集成学习](ML - 应用机器学习 - 集成学习)\n+ 项目：泰坦尼克号数据集\n+ 01.ML基础\n+ 02.数据准备\n+ 03.集成学习\n+ 04.提升\n+ 05.装袋\n+ 06.堆叠\n+ 07.模型评估与选择\n\n## [应用机器学习 - 特征工程](ML - 应用机器学习 - 特征工程)\n+ 项目：泰坦尼克号数据集\n+ 01.ML基础\n+ 02.特征工程简介\n+ 03.探索数据\n+ 04.创建并清理特征\n+ 05.为建模准备特征\n+ 06.比较和评估模型\n\n## [应用机器学习 - 算法](ML - 应用机器学习 - 算法)\n+ 项目：泰坦尼克号数据集\n+ 01.回顾基础\n+ 02.逻辑回归\n+ 03.支持向量机\n+ 04.多层感知器\n+ 05.随机森林\n+ 06.提升\n+ 07.最终模型选择和评估\n\n## [应用机器学习 - 基础](ML - 应用机器学习 - 基础)\n+ 项目：泰坦尼克号数据集\n+ 01.ML基础\n+ 02.探索性数据分析和数据清洗\n+ 03.评估 - 衡量成功\n+ 04.优化模型\n+ 05.端到端流程\n\n## [ML - 机器学习中应避免的错误](ML - 应避免的机器学习错误\u002F01_00_机器学习中应避免的错误.ipynb)\n+ [x] 假设数据可以直接使用\n+ [x] 忽视咨询领域专家\n+ [x] 过度拟合模型\n+ [x] 未对数据进行标准化\n+ [x] 关注错误的因素\n+ [x] 数据泄露\n+ [x] 忘记使用传统统计工具\n+ [x] 认为部署轻而易举\n+ [x] 认为机器学习是万能的\n+ [x] 孤立开发\n+ [x] 不处理样本不平衡问题\n+ [x] 在未妥善处理多重共线性的情况下解释系数\n+ [x] 仅以准确率来评估\n+ [x] 过于技术性的展示\n\n## 深度学习、机器学习、人工智能与数据科学\n- [x] [深度学习 - 使用 TensorFlow 进行自然语言处理](Tensorflow%20-%20Natural%20Language%20Proceessing%20with%20Tensorflow)\n- [ ] [深度学习 - 人脸识别](DL%20-%20Deep%20Learning%20-%20Face%20Recognition\u002FmyNotes.ipynb)\n- [x] [深度学习 - 图像识别](DL%20-%20Deep%20Learning%20-%20Image%20Recognition)\n- [x] [深度学习 - 使用 Keras 2.0 构建深度学习应用](DL%20-%20Deep%20Learning%20-%20Building%20Deep%20Learning%20Applications)\n- [x] [应用机器学习 - 集成学习](ML%20-%20Applied%20Machine%20Learning%20-%20Ensemble%20Learning)\n- [x] [应用机器学习 - 特征工程](ML%20-%20Applied%20Machine%20Learning%20-%20Feature%20Engineering)\n- [x] [应用机器学习 - 算法](ML%20-%20Applied%20Machine%20Learning%20-%20Algorithms)\n- [x] [应用机器学习 - 基础](ML%20-%20Applied%20Machine%20Learning%20Foundation)\n- [x] [Python 机器学习 - 03_k-Means 聚类](ML%20-%20Machine%20Learning%20with%20Python%20-%2003_k-Means%20Clustering)\n- [x] [Python 机器学习 - 02_决策树](ML%20-%20Machine%20Learning%20with%20Python%20-%2002_Decision%20Trees)\n- [x] [Python 机器学习 - 01_基础](ML%20-%20Machine%20Learning%20with%20Python%20-%2001_Foundations)\n- [x] [机器学习 - 机器学习中应避免的错误](ML%20-%20Mistakes%20to%20avoid%20in%20Machine%20Learning\u002F01_00_Mistakes%20to%20avoid%20in%20Machine%20Learning.ipynb)\n- [x] [机器学习 - 使用 Iris 花进行分类建模](Project%20-%20Classification%20Modelling%20with%20Iris%20flowers%20-%20Pinata%20Data)\n- [x] [数据科学 A-Z 建模](DS%20-%20Data%20Science%20A-Z)\n- [x] [面向神经网络和 AI 界面的设计](ML%20-%20Designing%20for%20Neural%20Networks%20and%20AI%20Interfaces\u002Fmynotes.md)\n- [x] GPT-3 简介：人工智能的一次飞跃\n\n\n## 数据分析、数据处理与数据可视化\n+ [ ] [数据分析与可视化 - Python 数据分析与可视化大师班](DV%20-%20Python%20Data%20Analysis%20%26%20Visualization%20Masterclass%20-%20CS)\n+ [x] [Pandas - Pandas 代码挑战](Pandas%20-%20Pandas%20Code%20Challenge)\n+ [x] [Pandas - 高级 Pandas](Pandas%20-%20Advanced%20Pandas)\n+ [x] [数据可视化 - 使用 Plotly 进行数据可视化](DV%20-%20Data%20Visualizations%20with%20Plotly)\n+ [x] [数据分析 - 使用 Pandas 和 Python 进行数据分析 - BP](Pandas%20-%20Data%20Analysis%20with%20Pandas%20and%20Python%20-%20BP)\n+ [x] [数据分析 - Python 数据手册 - 数据清洗](Pandas%20-%20Python%20Data%20Playbook%20-%20Cleaning%20Data)\n+ [x] [Pandas - Pandas 手册 - 数据操作](Pandas%20-%20Pandas%20Playbook%20-%20Manipulating%20Data)\n+ [x] [更多 Python 数据工具 - Microsoft](More%20Python%20Data%20Tools%20-%20Microsoft)\n\n## Apache Spark & PySpark\n- [x] [Spark SQL 和 DataFrame 入门](Spark%20-%20Spark%20SQL%20and%20Data%20Frames)\n- [x] [Apache Spark 必备培训](Spark%20-%20Apache%20Spark%20Essential%20Training)\n- [ ] 用于机器学习和人工智能的 Spark\n- [x] [Apache PySpark 示例教程](Spark%20-%20Apache%20PySpark%20by%20Example)\n- [x] [Apache Spark 深度学习必备培训](Spark%20-%20Apache%20Spark%20Deep%20Learning%20Essential%20Training)\n\n## 数据科学家阅读材料\n+ 监督学习\n\t+ [x] 第 01 课：机器学习概览\n\t+ [ ] 第 02 课：线性回归\n\t+ [ ] 第 03 课：感知器算法\n\t+ [ ] 第 04 课：决策树\n\t+ [ ] 第 05 课：朴素贝叶斯\n\t+ [ ] 第 06 课：支持向量机\n\t+ [ ] 第 07 课：集成方法\n\t+ [x] 第 08 课：模型评估指标\n\t+ [ ] 第 09 课：训练与调优\n\t+ [ ] 第 10 课：寻找捐赠者项目\n\n## [Kaggle 课程](https:\u002F\u002Fwww.kaggle.com\u002Flearn)\n- [x] Python\n- [x] Pandas\n- [x] 数据清洗\n- [x] 机器学习入门\n- [x] 机器学习中级\n- [ ] 特征工程\n- [ ] 机器学习可解释性\n- [x] 数据可视化\n- [ ] 深度学习入门\n- [ ] 游戏 AI 和强化学习入门\n- [ ] 自然语言处理\n- [ ] 微型挑战\n- [ ] 计算机视觉\n- [ ] SQL 入门\n- [ ] 高级 SQL\n\n## [Google ML 课程](https:\u002F\u002Fdevelopers.google.com\u002Fmachine-learning\u002Fcrash-course)\n- [ ] ML 冲刺课程\n- [ ] 问题定义\n- [ ] 数据准备\n- [ ] 聚类\n- [ ] 推荐系统\n- [ ] 测试和调试\n- [ ] GANs\n\n\n## [概率与统计（进行中）](Probability%20%26%20Statistics)\n+ 线性回归分析\n+ 多元回归分析\n+ 实用统计\n    + [使用 Python 的招生案例研究（辛普森悖论）](Probability%20%26%20Statistics\u002FPratical%20Statistics\u002FAdmission%20Case%20Study)\n\t+ 模拟抛硬币与概率\n\t+ 模拟多次抛硬币与二项分布\n\t+ 癌症检测结果\n\t+ 条件概率与贝叶斯法则\n+ Excel 数据处理、分析和可视化\n\n\n## [杜克大学数据科学数学技能](https:\u002F\u002Fwww.coursera.org\u002Flearn\u002Fdatasciencemathskills)\n主题包括：\n+ 集合论，包括文氏图\n+ 实数轴的性质\n+ 等等\n\n\n## 许可证\n本项目采用 MIT 许可证授权 - 详情请参阅 [LICENSE.md](LICENSE) 文件","# Data-Science-and-Machine-Learning-Projects-Dojo 快速上手指南\n\n本仓库是一个数据科学、机器学习与可视化的实战演练场（Dojo），汇集了从基础统计分析到深度学习、自然语言处理及 Web 应用部署的完整项目案例。\n\n## 环境准备\n\n### 系统要求\n- **操作系统**: Windows, macOS 或 Linux\n- **Python 版本**: 推荐 Python 3.8 - 3.10 (部分旧版依赖可能不兼容 Python 3.11+)\n- **包管理器**: `pip` 或 `conda` (推荐 Anaconda\u002FMiniconda 以简化科学计算库安装)\n\n### 前置依赖\n本项目涉及大量科学计算与深度学习库，建议优先使用国内镜像源加速下载。\n- **核心库**: NumPy, Pandas, Scikit-learn, Matplotlib, Seaborn\n- **深度学习**: TensorFlow, Keras\n- **其他工具**: Jupyter Notebook, Streamlit, NLTK, GeoPandas 等\n\n## 安装步骤\n\n### 1. 克隆项目\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Fptyadana\u002FData-Science-and-Machine-Learning-Projects-Dojo.git\ncd Data-Science-and-Machine-Learning-Projects-Dojo\n```\n\n### 2. 创建虚拟环境 (推荐)\n使用 `conda` 创建隔离环境（需预先安装 Anaconda 或 Miniconda）：\n```bash\nconda create -n ds_dojo python=3.9\nconda activate ds_dojo\n```\n\n或使用 `venv`:\n```bash\npython -m venv ds_dojo_env\n# Windows\nds_dojo_env\\Scripts\\activate\n# macOS\u002FLinux\nsource ds_dojo_env\u002Fbin\u002Factivate\n```\n\n### 3. 安装依赖包\n由于项目涵盖面广，建议按需安装或一次性安装主要科学计算栈。以下命令配置了清华大学开源软件镜像源以加速下载：\n\n```bash\npip config set global.index-url https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n\n# 安装核心数据科学与可视化库\npip install numpy pandas matplotlib seaborn scikit-learn geopandas pandas-profiling\n\n# 安装深度学习与自然语言处理库\npip install tensorflow keras nltk dlib face_recognition\n\n# 安装 Web 应用框架\npip install streamlit flask plotly cufflinks bokeh\n\n# 安装 Spark 支持 (可选，如需运行大数据相关项目)\npip install pyspark\n```\n\n> **注意**: `dlib` 和 `face_recognition` 在 Windows 上可能需要预编译的二进制文件或 C++ 构建环境。若安装失败，可尝试先安装 `cmake` 或使用 `conda install -c conda-forge dlib`。\n\n## 基本使用\n\n本项目主要由一系列 Jupyter Notebook (`.ipynb`) 文件组成，每个文件对应一个独立的实战案例。\n\n### 1. 启动 Jupyter Notebook\n在项目根目录下运行：\n```bash\njupyter notebook\n```\n浏览器将自动打开，导航至具体的项目文件夹（例如 `Project - Breast Cancer Classification Project - SVM`）。\n\n### 2. 运行示例项目：乳腺癌肿瘤分类\n这是最经典的二分类入门项目。\n1. 在 Jupyter 界面中点击打开 `Breast_Cancer_Classification_Project.ipynb`。\n2. 依次执行单元格（Cell），代码将自动完成以下流程：\n   - 加载 UCI 乳腺癌数据集。\n   - 进行数据探索与可视化 (EDA)。\n   - 使用 `Scikit-learn` 预处理数据并划分训练集\u002F测试集。\n   - 训练支持向量机 (SVM) 模型。\n   - 输出评估报告（准确率、混淆矩阵等）。\n\n### 3. 运行 Streamlit Web 应用示例\n对于包含 `ml-app` 或类似 Streamlit 项目的文件夹，可通过以下命令启动交互式 Web 应用：\n\n```bash\n# 假设当前位于包含 app.py 的项目目录\nstreamlit run app.py\n```\n系统将提供一个本地 URL（通常是 `http:\u002F\u002Flocalhost:8501`），在浏览器中访问即可体验随机森林回归模型的交互界面。\n\n### 4. 探索其他领域\n- **NLP**: 查看 `Naive-Bayes-and-NLP` 文件夹下的航班推文情感分析项目。\n- **深度学习**: 进入 `Neural Network - Deep Learning` 文件夹，运行基于 TensorFlow 的狗品种识别项目。\n- **大数据**: 若已配置 Spark 环境，可尝试 `PySpark` 相关笔记进行大规模数据处理练习。","某医疗科技公司的数据分析师正急需构建一个乳腺癌肿瘤良恶性预测模型，以辅助医生进行早期诊断。\n\n### 没有 Data-Science-and-Machine-Learning-Projects-Dojo 时\n- **从零摸索架构**：面对复杂的医疗数据，需手动搭建从数据清洗（Pandas）到模型训练（Scikit-learn\u002FTensorFlow）的完整流程，极易在代码结构上犯错。\n- **可视化效率低下**：缺乏成熟的绘图模板，使用 Matplotlib 或 Seaborn 制作专业的统计图表耗时费力，难以快速向医疗团队展示数据分布特征。\n- **算法选型困难**：在不清楚随机森林、SVM 或提升算法（Boosting）哪种更适合当前小样本医疗数据时，缺乏参考案例，只能盲目试错。\n- **部署门槛高**：模型训练完成后，不知道如何利用 Flask 或 Streamlit 快速将其转化为医生可交互的 Web 应用，导致成果停留在笔记本阶段。\n\n### 使用 Data-Science-and-Machine-Learning-Projects-Dojo 后\n- **复用成熟范式**：直接参考项目中“乳腺癌分类”的完整代码逻辑，快速复用其基于 SVM 的特征工程与模型构建流程，将开发周期缩短数天。\n- **一键生成洞察**：利用集成的 Pandas Profiling 和 Plotly 组件，迅速生成包含交互式图表的数据报告，直观呈现肿瘤半径、纹理等关键特征的差异。\n- **精准算法对标**：通过对比项目中随机森林与神经网络在不同数据集的表现，迅速确定最适合当前任务的算法组合，显著提升预测准确率。\n- **快速应用落地**：套用项目提供的 Streamlit 或 Flask 模板，仅需少量修改即可将本地模型封装为在线诊断工具，实现从代码到临床辅助的无缝衔接。\n\nData-Science-and-Machine-Learning-Projects-Dojo 通过提供全栈式的实战项目库，将原本数周的研发探索过程压缩为高效的模块化组装，极大降低了医疗 AI 落地的技术门槛。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fptyadana_Data-Science-and-Machine-Learning-Projects-Dojo_e53123d2.png","ptyadana","Phone Thiri Yadana","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fptyadana_a8f08528.jpg","Machine Learning Enthusiast | Lifelong learner",null,"Singapore","www.linkedin.com\u002Fin\u002Fphonethiriyadana","https:\u002F\u002Fgithub.com\u002Fptyadana",[82,86,90],{"name":83,"color":84,"percentage":85},"Jupyter Notebook","#DA5B0B",90.7,{"name":87,"color":88,"percentage":89},"HTML","#e34c26",9.3,{"name":91,"color":92,"percentage":93},"Python","#3572A5",0,570,107,"2026-04-17T23:32:32","MIT","未说明","非必需（基础项目可在 CPU 运行）；深度学习项目（如 TensorFlow\u002FKeras 狗品种预测）及 Google Colab 环境建议使用支持 CUDA 的 NVIDIA GPU；具体型号、显存大小及 CUDA 版本未说明","未说明（建议至少 8GB，处理大数据集或 Spark 项目推荐 16GB+）",{"notes":102,"python":103,"dependencies":104},"该项目为数据科学与机器学习练习集合，涵盖从基础统计分析到深度学习的多个项目。部分项目依赖特定数据集（需从 UCI 或 Kaggle 下载）。若运行涉及地理数据的项目需安装 GeoPandas 及其系统级依赖（如 GDAL）；涉及人脸识别的项目需安装 dlib（C++ 编译环境可能必需）；大规模数据处理项目需配置 Apache Spark 或 Databricks 环境。推荐使用 Jupyter Notebook 或 Google Colab 运行。","未说明（兼容主流 Python 3.x 版本）",[105,106,107,108,109,110,111,112,113,114],"NumPy","Pandas","Scikit-learn","TensorFlow","Keras","Matplotlib","Seaborn","Plotly","Streamlit","PySpark",[35,14,16,116],"其他",[118,119,120,121,122,123,124,125,126,127,128,129,130,131],"machine-learning","pandas","data-science","tensorflow","keras","scikit-learn","deep-learning","seaborn","data-analysis","data-visualization","natural-language-processing","boosting-algorithms","machine-learning-algorithms","probability-statistics","2026-03-27T02:49:30.150509","2026-04-19T09:06:49.975022",[],[]]