[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-academic--awesome-datascience":3,"tool-academic--awesome-datascience":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",144730,2,"2026-04-07T23:26:32",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",107888,"2026-04-06T11:32:50",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},4721,"markitdown","microsoft\u002Fmarkitdown","MarkItDown 是一款由微软 AutoGen 团队打造的轻量级 Python 工具，专为将各类文件高效转换为 Markdown 格式而设计。它支持 PDF、Word、Excel、PPT、图片（含 OCR）、音频（含语音转录）、HTML 乃至 YouTube 链接等多种格式的解析，能够精准提取文档中的标题、列表、表格和链接等关键结构信息。\n\n在人工智能应用日益普及的今天，大语言模型（LLM）虽擅长处理文本，却难以直接读取复杂的二进制办公文档。MarkItDown 恰好解决了这一痛点，它将非结构化或半结构化的文件转化为模型“原生理解”且 Token 效率极高的 Markdown 格式，成为连接本地文件与 AI 分析 pipeline 的理想桥梁。此外，它还提供了 MCP（模型上下文协议）服务器，可无缝集成到 Claude Desktop 等 LLM 应用中。\n\n这款工具特别适合开发者、数据科学家及 AI 研究人员使用，尤其是那些需要构建文档检索增强生成（RAG）系统、进行批量文本分析或希望让 AI 助手直接“阅读”本地文件的用户。虽然生成的内容也具备一定可读性，但其核心优势在于为机器",93400,"2026-04-06T19:52:38",[52,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":10,"last_commit_at":59,"category_tags":60,"status":17},4487,"LLMs-from-scratch","rasbt\u002FLLMs-from-scratch","LLMs-from-scratch 是一个基于 PyTorch 的开源教育项目，旨在引导用户从零开始一步步构建一个类似 ChatGPT 的大型语言模型（LLM）。它不仅是同名技术著作的官方代码库，更提供了一套完整的实践方案，涵盖模型开发、预训练及微调的全过程。\n\n该项目主要解决了大模型领域“黑盒化”的学习痛点。许多开发者虽能调用现成模型，却难以深入理解其内部架构与训练机制。通过亲手编写每一行核心代码，用户能够透彻掌握 Transformer 架构、注意力机制等关键原理，从而真正理解大模型是如何“思考”的。此外，项目还包含了加载大型预训练权重进行微调的代码，帮助用户将理论知识延伸至实际应用。\n\nLLMs-from-scratch 特别适合希望深入底层原理的 AI 开发者、研究人员以及计算机专业的学生。对于不满足于仅使用 API，而是渴望探究模型构建细节的技术人员而言，这是极佳的学习资源。其独特的技术亮点在于“循序渐进”的教学设计：将复杂的系统工程拆解为清晰的步骤，配合详细的图表与示例，让构建一个虽小但功能完备的大模型变得触手可及。无论你是想夯实理论基础，还是为未来研发更大规模的模型做准备",90106,"2026-04-06T11:19:32",[35,15,13,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":72,"owner_name":73,"owner_avatar_url":74,"owner_bio":75,"owner_company":76,"owner_location":76,"owner_email":77,"owner_twitter":76,"owner_website":78,"owner_url":79,"languages":76,"stars":80,"forks":81,"last_commit_at":82,"license":83,"difficulty_score":84,"env_os":85,"env_gpu":86,"env_ram":86,"env_deps":87,"category_tags":96,"github_topics":98,"view_count":32,"oss_zip_url":76,"oss_zip_packed_at":76,"status":17,"created_at":109,"updated_at":110,"faqs":111,"releases":152},5350,"academic\u002Fawesome-datascience","awesome-datascience",":memo: An awesome Data Science repository to learn and apply for real world problems.","awesome-datascience 是一个专为数据科学爱好者打造的开源学习资源库，旨在帮助初学者和从业者系统性地掌握数据科学知识并解决现实世界的问题。面对“什么是数据科学”以及“该从何学起”的常见困惑，它提供了一条清晰的学习路径，涵盖从基础概念到高级算法的全方位内容。\n\n该资源库解决了学习资源分散、入门门槛高的问题，将教程、免费课程、大学项目、机器学习算法（如监督学习、深度学习）、主流工具包（PyTorch、TensorFlow 等）、模型评估方案以及可视化利器整合在一处。此外，它还收录了丰富的行业文献、播客、技术博客、数据集甚至趣味漫画，帮助用户在理论学习之外，也能通过社区交流和实战竞赛提升技能。\n\n无论是刚入门的学生、希望转型的开发者，还是寻求灵感的研究人员，都能在这里找到适合自己的成长阶梯。其独特的亮点在于不仅关注技术硬实力，还精心整理了社交媒体账号、通讯期刊和行业活动信息，构建了一个完整的数据科学生态圈。跟随 awesome-datascience 的步骤，你可以轻松开启数据科学之旅，将理论知识转化为解决实际问题的强大能力。","\u003Cdiv align=\"center\" markdown=\"1\">\n   \u003Csup>Special thanks to Sponsors:\u003C\u002Fsup>\n   \u003Cbr \u002F>\n   \u003Cbr \u002F>\n   \u003Ca href=\"https:\u002F\u002Frequestly.com\u002Fawesomedatascience\">\n      \u003Cimg alt=\"Requestly sponsorship\" width=\"400\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Facademic_awesome-datascience_readme_0f75b519af94.png\">\n   \u003C\u002Fa>\n   \u003Cbr>\n   \n   ### [Requestly - Free & Open-Source alternative to Postman](https:\u002F\u002Frequestly.com\u002Fawesomedatascience)\n   [All-in-one platform to Test, Mock and Intercept APIs](https:\u002F\u002Frequestly.com\u002Fawesomedatascience)\n   \u003Cbr>\n\u003C\u002Fdiv>\n\n\u003Chr>\n\n\u003Cdiv align=\"center\">\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Facademic_awesome-datascience_readme_3237deef3f14.jpg\">\u003C\u002Fdiv>\n\n# AWESOME DATA SCIENCE\n\n[![Awesome](https:\u002F\u002Fcdn.jsdelivr.net\u002Fgh\u002Fsindresorhus\u002Fawesome@d7305f38d29fed78fa85652e3a63e154dd8e8829\u002Fmedia\u002Fbadge.svg)](https:\u002F\u002Fgithub.com\u002Fsindresorhus\u002Fawesome) \n\nContributions are welcome - see [`CONTRIBUTING.md`](CONTRIBUTING.md).\n\n**An open-source Data Science repository to learn and apply concepts toward solving real- world problems.**\n\nThis is a shortcut path to start studying **Data Science**. Just follow the steps to answer the questions, \"What is Data Science, and what should I study to learn Data Science?\"\n\n\n\u003Cbr>\n\n\n## Sponsors\n\n| Sponsor | Pitch |\n| --- | --- |\n| --- | Be the first to sponsor! `github@academic.io` |\n\n\n\n## Table of Contents\n\n- [What is Data Science?](#what-is-data-science)\n- [Where do I Start?](#where-do-i-start)\n- [Agents](#agents)\n- [Training Resources](#training-resources)\n  - [Tutorials](#tutorials)\n  - [Free Courses](#free-courses)\n  - [Massively Open Online Courses](#moocs)\n  - [Intensive Programs](#intensive-programs)\n  - [Colleges](#colleges)\n- [The Data Science Toolbox](#the-data-science-toolbox)\n\n  - [Algorithms](#algorithms)\n    - [Supervised Learning](#supervised-learning)\n    - [Unsupervised Learning](#unsupervised-learning)\n    - [Semi-Supervised Learning](#semi-supervised-learning)\n    - [Reinforcement Learning](#reinforcement-learning)\n    - [Data Mining Algorithms](#data-mining-algorithms)\n    - [Deep Learning Architectures](#deep-learning-architectures)\n  - [General Machine Learning Packages](#general-machine-learning-packages)\n  - [Model Evaluation & Monitoring](#model-evaluation--monitoring)\n    - [Evidently AI](#evidently-ai)\n  - [Deep Learning Packages](#deep-learning-packages)\n    - [PyTorch Ecosystem](#pytorch-ecosystem)\n    - [TensorFlow Ecosystem](#tensorflow-ecosystem)\n    - [Keras Ecosystem](#keras-ecosystem)\n  - [Visualization Tools](#visualization-tools)\n  - [Miscellaneous Tools](#miscellaneous-tools)\n- [Literature and Media](#literature-and-media)\n  - [Books](#books)\n    - [Book Deals (Affiliated)](#book-deals-affiliated)\n  - [Journals, Publications, and Magazines](#journals-publications-and-magazines)\n  - [Newsletters](#newsletters)\n  - [Bloggers](#bloggers)\n  - [Presentations](#presentations)\n  - [Podcasts](#podcasts)\n  - [YouTube Videos & Channels](#youtube-videos--channels)\n- [Socialize](#socialize)\n  - [Facebook Accounts](#facebook-accounts)\n  - [Twitter Accounts](#twitter-accounts)\n  - [Telegram Channels](#telegram-channels)\n  - [Slack Communities](#slack-communities)\n  - [GitHub Groups](#github-groups)\n  - [Data Science Competitions](#data-science-competitions)\n- [Fun](#fun)\n  - [Infographics](#infographics)\n  - [Datasets](#datasets)\n  - [Comics](#comics)\n- [Other Awesome Lists](#other-awesome-lists)\n  - [Hobby](#hobby)\n\n## What is Data Science?\n**[`^        back to top        ^`](#awesome-data-science)**\n\nData Science is one of the hottest topics on the Computer and Internet farmland nowadays. People have gathered data from applications and systems until today and now is the time to analyze them. The next steps are producing suggestions from the data and creating predictions about the future. [Here](https:\u002F\u002Fwww.quora.com\u002FData-Science\u002FWhat-is-data-science) you can find the biggest question for **Data Science** and hundreds of answers from experts.\n\n\n| Link | Preview |\n| --- | --- |\n| [Data Science For Beginners](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FData-Science-For-Beginners) | Microsoft are pleased to offer a 10-week, 20-lesson curriculum all about Data Science. |\n| [What is Data Science @ O'reilly](https:\u002F\u002Fwww.oreilly.com\u002Fideas\u002Fwhat-is-data-science) | _Data scientists combine entrepreneurship with patience, the willingness to build data products incrementally, the ability to explore, and the ability to iterate over a solution. They are inherently interdisciplinary. They can tackle all aspects of a problem, from initial data collection and data conditioning to drawing conclusions. They can think outside the box to come up with new ways to view the problem, or to work with very broadly defined problems: “here’s a lot of data, what can you make from it?”_ |\n| [What is Data Science @ Quora](https:\u002F\u002Fwww.quora.com\u002FData-Science\u002FWhat-is-data-science) | Data Science is a combination of a number of aspects of Data such as Technology, Algorithm development, and data interference to study the data, analyse it, and find innovative solutions to difficult problems. Basically Data Science is all about Analysing data and driving for business growth by finding creative ways. |\n| [The sexiest job of 21st century](https:\u002F\u002Fhbr.org\u002F2012\u002F10\u002Fdata-scientist-the-sexiest-job-of-the-21st-century) | _Data scientists today are akin to Wall Street “quants” of the 1980s and 1990s. In those days people with backgrounds in physics and math streamed to investment banks and hedge funds, where they could devise entirely new algorithms and data strategies. Then a variety of universities developed master’s programs in financial engineering, which churned out a second generation of talent that was more accessible to mainstream firms. The pattern was repeated later in the 1990s with search engineers, whose rarefied skills soon came to be taught in computer science programs._ |\n| [Wikipedia](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FData_science) | _Data science is an interdisciplinary field that uses scientific methods, processes, algorithms and systems to extract knowledge and insights from many structural and unstructured data. Data science is related to data mining, machine learning and big data._ |\n| [How to Become a Data Scientist](https:\u002F\u002Fwww.mastersindatascience.org\u002Fcareers\u002Fdata-scientist\u002F) | _Data scientists are big data wranglers, gathering and analyzing large sets of structured and unstructured data. A data scientist’s role combines computer science, statistics, and mathematics. They analyze, process, and model data then interpret the results to create actionable plans for companies and other organizations._ |\n| [a very short history of #datascience](https:\u002F\u002Fwww.forbes.com\u002Fsites\u002Fgilpress\u002F2013\u002F05\u002F28\u002Fa-very-short-history-of-data-science\u002F) | _The story of how data scientists became sexy is mostly the story of the coupling of the mature discipline of statistics with a very young one--computer science.  The term “Data Science” has emerged only recently to specifically designate a new profession that is expected to make sense of the vast stores of big data. But making sense of data has a long history and has been discussed by scientists, statisticians, librarians, computer scientists and others for years. The following timeline traces the evolution of the term “Data Science” and its use, attempts to define it, and related terms._ |\n|[Software Development Resources for Data Scientists](https:\u002F\u002Fwww.rstudio.com\u002Fblog\u002Fsoftware-development-resources-for-data-scientists\u002F)|_Data scientists concentrate on making sense of data through exploratory analysis, statistics, and models. Software developers apply a separate set of knowledge with different tools. Although their focus may seem unrelated, data science teams can benefit from adopting software development best practices. Version control, automated testing, and other dev skills help create reproducible, production-ready code and tools._|\n|[Data Scientist Roadmap](https:\u002F\u002Fwww.scaler.com\u002Fblog\u002Fhow-to-become-a-data-scientist\u002F)|_Data science is an excellent career choice in today’s data-driven world where approx 328.77 million terabytes of data are generated daily. And this number is only increasing day by day, which in turn increases the demand for skilled data scientists who can utilize this data to drive business growth._|\n|[Navigating Your Path to Becoming a Data Scientist](https:\u002F\u002Fwww.appliedaicourse.com\u002Fblog\u002Fhow-to-become-a-data-scientist\u002F)|_Data science is one of the most in-demand careers today. With businesses increasingly relying on data to make decisions, the need for skilled data scientists has grown rapidly. Whether it’s tech companies, healthcare organizations, or even government institutions, data scientists play a crucial role in turning raw data into valuable insights. But how do you become a data scientist, especially if you’re just starting out? _|\n\n## Where do I Start?\n**[`^        back to top        ^`](#awesome-data-science)**\n\nWhile not strictly necessary, having a programming language is a crucial skill to be effective as a data scientist. Currently, the most popular language is _Python_, closely followed by _R_. Python is a general-purpose scripting language that sees applications in a wide variety of fields. R is a domain-specific language for statistics, which contains a lot of common statistics tools out of the box.\n\n[Python](https:\u002F\u002Fpython.org\u002F) is by far the most popular language in science, due in no small part to the ease at which it can be used and the vibrant ecosystem of user-generated packages. To install packages, there are two main methods: Pip (invoked as `pip install`), the package manager that comes bundled with Python, and [Anaconda](https:\u002F\u002Fwww.anaconda.com) (invoked as `conda install`), a powerful package manager that can install packages for Python, R, and can download executables like Git. \n\nUnlike R, Python was not built from the ground up with data science in mind, but there are plenty of third party libraries to make up for this. A much more exhaustive list of packages can be found later in this document, but these four packages are a good set of choices to start your data science journey with: [Scikit-Learn](https:\u002F\u002Fscikit-learn.org\u002Fstable\u002Findex.html) is a general-purpose data science package which implements the most popular algorithms - it also includes rich documentation, tutorials, and examples of the models it implements. Even if you prefer to write your own implementations, Scikit-Learn is a valuable reference to the nuts-and-bolts behind many of the common algorithms you'll find. With [Pandas](https:\u002F\u002Fpandas.pydata.org\u002F), one can collect and analyze their data into a convenient table format. [Numpy](https:\u002F\u002Fnumpy.org\u002F) provides very fast tooling for mathematical operations, with a focus on vectors and matrices. [Seaborn](https:\u002F\u002Fseaborn.pydata.org\u002F), itself based on the [Matplotlib](https:\u002F\u002Fmatplotlib.org\u002F) package, is a quick way to generate beautiful visualizations of your data, with many good defaults available out of the box, as well as a gallery showing how to produce many common visualizations of your data.\n\nWhen embarking on your journey to becoming a data scientist, the choice of language isn't particularly important, and both Python and R have their pros and cons. Pick a language you like, and check out one of the [Free courses](#free-courses) we've listed below!\n\n### Beginner Roadmap\nIf you're just starting out, here's a simple recommended path:\n\n1. **Learn Python** – Start with basics: variables, loops, functions\n2. **Learn core libraries** – Pandas, NumPy, Matplotlib, Scikit-Learn\n3. **Practice with beginner projects** – Try Titanic survival or house price prediction on Kaggle\n4. **Learn Math basics** – Statistics, Linear Algebra, Probability\n5. **Move into ML** – Supervised learning → Unsupervised → Deep Learning\n\n## Agents\n\nThis section contains agent frameworks and tools that are useful for data science workflows.\n\n### Frameworks\n- [ADK-Rust](https:\u002F\u002Fgithub.com\u002Fzavora-ai\u002Fadk-rust) - Production-ready AI agent development kit for Rust with model-agnostic design (Gemini, OpenAI, Anthropic), multiple agent types (LLM, Graph, Workflow), MCP support, and built-in telemetry.\n\n### Tools\n- [Frostbyte MCP](https:\u002F\u002Fgithub.com\u002FOzorOwn\u002Ffrostbyte-mcp) - MCP server providing 13 data tools for AI agents: real-time crypto prices, IP geolocation, DNS lookups, web scraping to markdown, code execution, and screenshots. One API key for 40+ services.\n- [Arch Tools](https:\u002F\u002Farchtools.dev) - 61 production-ready AI API tools for data science workflows: code analysis, web scraping, NLP, image generation, crypto data, and search. REST API and MCP protocol support. [GitHub](https:\u002F\u002Fgithub.com\u002FDeesmo\u002FArch-AI-Tools)\n\n### Research & Knowledge Retrieval\n- [BGPT MCP](https:\u002F\u002Fbgpt.pro\u002Fmcp) - MCP server that gives AI agents access to a database of scientific papers built from raw experimental data extracted from full-text studies. Returns 25+ structured fields per paper including methods, results, sample sizes, and quality scores. [GitHub](https:\u002F\u002Fgithub.com\u002Fconnerlambden\u002Fbgpt-mcp)\n\n### Workflow\n**[`^        back to top        ^`](#awesome-data-science)**\n- [sim](https:\u002F\u002Fsim.ai) - Sim Studio's interface is a lightweight, intuitive way to quickly build and deploy LLMs that connect with your favorite tools.\n\n\n## Training Resources\n**[`^        back to top        ^`](#awesome-data-science)**\n\nHow do you learn data science? By doing data science, of course! Okay, okay - that might not be particularly helpful when you're first starting out. In this section, we've listed some learning resources, in rough order from least to greatest commitment - [Tutorials](#tutorials), [Massively Open Online Courses (MOOCs)](#moocs), [Intensive Programs](#intensive-programs), and [Colleges](#colleges).\n\n\n### Tutorials\n**[`^        back to top        ^`](#awesome-data-science)**\n\n- [1000 Data Science Projects](https:\u002F\u002Fcloud.blobcity.com\u002F#\u002Fps\u002Fexplore) you can run on the browser with IPython.\n- [#tidytuesday](https:\u002F\u002Fgithub.com\u002Frfordatascience\u002Ftidytuesday) - A weekly data project aimed at the R ecosystem.\n- [Data science your way](https:\u002F\u002Fgithub.com\u002Fjadianes\u002Fdata-science-your-way)\n- [DataCamp Cheatsheets](https:\u002F\u002Fwww.datacamp.com\u002Fcheat-sheet) Cheatsheets for data science.\n- [PySpark Cheatsheet](https:\u002F\u002Fgithub.com\u002Fkevinschaich\u002Fpyspark-cheatsheet)\n- [Machine Learning, Data Science and Deep Learning with Python ](https:\u002F\u002Fwww.manning.com\u002Flivevideo\u002Fmachine-learning-data-science-and-deep-learning-with-python)\n- [Your Guide to Latent Dirichlet Allocation](https:\u002F\u002Fmedium.com\u002F@lettier\u002Fhow-does-lda-work-ill-explain-using-emoji-108abf40fa7d)\n- [Tutorials of source code from the book Genetic Algorithms with Python by Clinton Sheppard](https:\u002F\u002Fgithub.com\u002Fhandcraftsman\u002FGeneticAlgorithmsWithPython)\n- [Tutorials to get started on signal processing for machine learning](https:\u002F\u002Fgithub.com\u002Fjinglescode\u002Fpython-signal-processing)\n- [Realtime deployment](https:\u002F\u002Fwww.microprediction.com\u002Fpython-1) Tutorial on Python time-series model deployment.\n- [Python for Data Science: A Beginner’s Guide](https:\u002F\u002Flearntocodewith.me\u002Fposts\u002Fpython-for-data-science\u002F)\n- [Minimum Viable Study Plan for Machine Learning Interviews](https:\u002F\u002Fgithub.com\u002Fkhangich\u002Fmachine-learning-interview)\n- [Understand and Know Machine Learning Engineering by Building Solid Projects](http:\u002F\u002Fmlzoomcamp.com\u002F)\n- [12 free Data Science projects to practice Python and Pandas](https:\u002F\u002Fwww.datawars.io\u002Farticles\u002F12-free-data-science-projects-to-practice-python-and-pandas)\n- [Best CV\u002FResume for Data Science Freshers](https:\u002F\u002Fenhancv.com\u002Fresume-examples\u002Fdata-scientist\u002F)\n- [Understand Data Science Course in Java](https:\u002F\u002Fwww.alter-solutions.com\u002Farticles\u002Fjava-data-science)\n- [Data Analytics Interview Questions (Beginner to Advanced)](https:\u002F\u002Fwww.appliedaicourse.com\u002Fblog\u002Fdata-analytics-interview-questions\u002F)\n- [Top 100+ Data Science Interview Questions and Answers](https:\u002F\u002Fwww.appliedaicourse.com\u002Fblog\u002Fdata-science-interview-questions\u002F)\n- [DataDriven - SQL, Python, and Data Modeling Interview Questions](https:\u002F\u002Fwww.datadriven.io\u002F)\n\n### Free Courses\n**[`^        back to top        ^`](#awesome-data-science)**\n\n- [Data Scientist with R](https:\u002F\u002Fwww.datacamp.com\u002Ftracks\u002Fdata-scientist-with-r)\n- [Data Scientist with Python](https:\u002F\u002Fwww.datacamp.com\u002Ftracks\u002Fdata-scientist-with-python)\n- [Genetic Algorithms OCW Course](https:\u002F\u002Focw.mit.edu\u002Fcourses\u002Felectrical-engineering-and-computer-science\u002F6-034-artificial-intelligence-fall-2010\u002Flecture-videos\u002Flecture-1-introduction-and-scope\u002F)\n- [AI Expert Roadmap](https:\u002F\u002Fgithub.com\u002FAMAI-GmbH\u002FAI-Expert-Roadmap) - Roadmap to becoming an Artificial Intelligence Expert\n- [Convex Optimization](https:\u002F\u002Fwww.edx.org\u002Fcourse\u002Fconvex-optimization) - Convex Optimization (basics of convex analysis; least-squares, linear and quadratic programs, semidefinite programming, minimax, extremal volume, and other problems; optimality conditions, duality theory...)\n- [Learning from Data](https:\u002F\u002Fhome.work.caltech.edu\u002Ftelecourse.html) - Introduction to machine learning covering basic theory, algorithms and applications\n- [Kaggle](https:\u002F\u002Fwww.kaggle.com\u002Flearn) - Learn about Data Science, Machine Learning, Python etc\n- [ML Observability Fundamentals](https:\u002F\u002Farize.com\u002Fml-observability-fundamentals\u002F) - Learn how to monitor and root-cause production ML issues.\n- [Weights & Biases Effective MLOps: Model Development](https:\u002F\u002Fwww.wandb.courses\u002Fcourses\u002Feffective-mlops-model-development) - Free Course and Certification for building an end-to-end machine using W&B\n- [Python for Data Science by Scaler](https:\u002F\u002Fwww.scaler.com\u002Ftopics\u002Fcourse\u002Fpython-for-data-science\u002F) - This course is designed to empower beginners with the essential skills to excel in today's data-driven world. The comprehensive curriculum will give you a solid foundation in statistics, programming, data visualization, and machine learning.\n- [MLSys-NYU-2022](https:\u002F\u002Fgithub.com\u002Fjacopotagliabue\u002FMLSys-NYU-2022\u002Ftree\u002Fmain) - Slides, scripts and materials for the Machine Learning in Finance course at NYU Tandon, 2022.\n- [Hands-on Train and Deploy ML](https:\u002F\u002Fgithub.com\u002FPaulescu\u002Fhands-on-train-and-deploy-ml) - A hands-on course to train and deploy a serverless API that predicts crypto prices.\n- [LLMOps: Building Real-World Applications With Large Language Models](https:\u002F\u002Fwww.comet.com\u002Fsite\u002Fllm-course\u002F) - Learn to build modern software with LLMs using the newest tools and techniques in the field.\n- [Prompt Engineering for Vision Models](https:\u002F\u002Fwww.deeplearning.ai\u002Fshort-courses\u002Fprompt-engineering-for-vision-models\u002F) - Learn to prompt cutting-edge computer vision models with natural language, coordinate points, bounding boxes, segmentation masks, and even other images in this free course from DeepLearning.AI.\n- [Data Science Course By IBM](https:\u002F\u002Fskillsbuild.org\u002Fstudents\u002Fcourse-catalog\u002Fdata-science) - Free resources and learn what data science is and how it’s used in different industries.\n\n\n  \n### MOOC's\n**[`^        back to top        ^`](#awesome-data-science)**\n\n- [Coursera Introduction to Data Science](https:\u002F\u002Fwww.coursera.org\u002Fspecializations\u002Fdata-science)\n- [Data Science - 9 Steps Courses, A Specialization on Coursera](https:\u002F\u002Fwww.coursera.org\u002Fspecializations\u002Fjhu-data-science)\n- [Data Mining - 5 Steps Courses, A Specialization on Coursera](https:\u002F\u002Fwww.coursera.org\u002Fspecializations\u002Fdata-mining)\n- [Machine Learning – 5 Steps Courses, A Specialization on Coursera](https:\u002F\u002Fwww.coursera.org\u002Fspecializations\u002Fmachine-learning)\n- [CS 109 Data Science](https:\u002F\u002Fcs109.github.io\u002F2015\u002F)\n- [OpenIntro](https:\u002F\u002Fwww.openintro.org\u002F)\n- [CS 171 Visualization](https:\u002F\u002Fwww.cs171.org\u002F#!index.md)\n- [Process Mining: Data science in Action](https:\u002F\u002Fwww.coursera.org\u002Flearn\u002Fprocess-mining)\n- [Oxford Deep Learning](https:\u002F\u002Fwww.cs.ox.ac.uk\u002Fprojects\u002FDeepLearn\u002F)\n- [Oxford Deep Learning - video](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PLE6Wd9FR--EfW8dtjAuPoTuPcqmOV53Fu)\n- [Oxford Machine Learning](https:\u002F\u002Fwww.cs.ox.ac.uk\u002Fresearch\u002Fai_ml\u002Findex.html)\n- [UBC Machine Learning - video](https:\u002F\u002Fwww.cs.ubc.ca\u002F~nando\u002F540-2013\u002Flectures.html)\n- [Data Science Specialization](https:\u002F\u002Fgithub.com\u002FDataScienceSpecialization\u002Fcourses)\n- [Coursera Big Data Specialization](https:\u002F\u002Fwww.coursera.org\u002Fspecializations\u002Fbig-data)\n- [Statistical Thinking for Data Science and Analytics by Edx](https:\u002F\u002Fwww.edx.org\u002Fcourse\u002Fstatistical-thinking-for-data-science-and-analytic)\n- [Cognitive Class AI by IBM](https:\u002F\u002Fcognitiveclass.ai\u002F)\n- [Udacity - Deep Learning](https:\u002F\u002Fwww.udacity.com\u002Fcourse\u002Fintro-to-tensorflow-for-deep-learning--ud187)\n- [Keras in Motion](https:\u002F\u002Fwww.manning.com\u002Flivevideo\u002Fkeras-in-motion)\n- [Microsoft Professional Program for Data Science](https:\u002F\u002Facademy.microsoft.com\u002Fen-us\u002Fprofessional-program\u002Ftracks\u002Fdata-science\u002F)\n- [COMP3222\u002FCOMP6246 - Machine Learning Technologies](https:\u002F\u002Ftdgunes.com\u002FCOMP6246-2019Fall\u002F)\n- [CS 231 - Convolutional Neural Networks for Visual Recognition](https:\u002F\u002Fcs231n.github.io\u002F)\n- [Coursera Tensorflow in practice](https:\u002F\u002Fwww.coursera.org\u002Fprofessional-certificates\u002Ftensorflow-in-practice)\n- [Coursera Deep Learning Specialization](https:\u002F\u002Fwww.coursera.org\u002Fspecializations\u002Fdeep-learning)\n- [365 Data Science Course](https:\u002F\u002F365datascience.com\u002F)\n- [Coursera Natural Language Processing Specialization](https:\u002F\u002Fwww.coursera.org\u002Fspecializations\u002Fnatural-language-processing)\n- [Coursera GAN Specialization](https:\u002F\u002Fwww.coursera.org\u002Fspecializations\u002Fgenerative-adversarial-networks-gans)\n- [Codecademy's Data Science](https:\u002F\u002Fwww.codecademy.com\u002Flearn\u002Fpaths\u002Fdata-science)\n- [Linear Algebra](https:\u002F\u002Focw.mit.edu\u002Fcourses\u002F18-06sc-linear-algebra-fall-2011\u002F) - Linear Algebra course by Gilbert Strang\n- [A 2020 Vision of Linear Algebra (G. Strang)](https:\u002F\u002Focw.mit.edu\u002Fresources\u002Fres-18-010-a-2020-vision-of-linear-algebra-spring-2020\u002F)\n- [Python for Data Science Foundation Course](https:\u002F\u002Fintellipaat.com\u002Facademy\u002Fcourse\u002Fpython-for-data-science-free-training\u002F)\n- [Data Science: Statistics & Machine Learning](https:\u002F\u002Fwww.coursera.org\u002Fspecializations\u002Fdata-science-statistics-machine-learning)\n- [Machine Learning Engineering for Production (MLOps)](https:\u002F\u002Fwww.coursera.org\u002Fspecializations\u002Fmachine-learning-engineering-for-production-mlops)\n- [Recommender Systems Specialization from University of Minnesota](https:\u002F\u002Fwww.coursera.org\u002Fspecializations\u002Frecommender-systems) is an intermediate\u002Fadvanced level specialization focused on Recommender System on the Coursera platform.\n- [Stanford Artificial Intelligence Professional Program](https:\u002F\u002Fonline.stanford.edu\u002Fprograms\u002Fartificial-intelligence-professional-program)\n- [Data Scientist with Python](https:\u002F\u002Fapp.datacamp.com\u002Flearn\u002Fcareer-tracks\u002Fdata-scientist-with-python)\n- [Programming with Julia](https:\u002F\u002Fwww.udemy.com\u002Fcourse\u002Fprogramming-with-julia\u002F)\n- [Scaler Data Science & Machine Learning Program](https:\u002F\u002Fwww.scaler.com\u002Fdata-science-course\u002F)\n- [Data Science Skill Tree](https:\u002F\u002Flabex.io\u002Fskilltrees\u002Fdata-science)\n- [Data Science for Beginners - Learn with AI tutor](https:\u002F\u002Fcodekidz.ai\u002Flesson-intro\u002Fdata-science-368dbf)\n- [Machine Learning for Beginners - Learn with AI tutor](https:\u002F\u002Fcodekidz.ai\u002Flesson-intro\u002Fmachine-lear-36abfb)\n- [Introduction to Data Science](https:\u002F\u002Fwww.mygreatlearning.com\u002Facademy\u002Flearn-for-free\u002Fcourses\u002Fintroduction-to-data-science)\n-[Getting Started with Python for Data Science](https:\u002F\u002Fwww.codecademy.com\u002Flearn\u002Fgetting-started-with-python-for-data-science) \n- [Google Advanced Data Analytics Certificate](https:\u002F\u002Fgrow.google\u002Fdata-analytics\u002F) – Professional courses in data analysis, statistics, and machine learning fundamentals.\n- [Maschinelle Sprachgebrauchsanalyse - Grundlagen der Korpuslinguistik](https:\u002F\u002Fwww.twillo.de\u002Fedu-sharing\u002Fcomponents\u002Fcollections?id=e6ce03ae-4660-49b0-be10-dcc92e71e796) - course material on text-mining \u002F corpus-linguistics *in German* funded by the federal state of North Rhine-Westphalia\n- [Programmieren für Germanist*innen](https:\u002F\u002Fwww.twillo.de\u002Fedu-sharing\u002Fcomponents\u002Fcollections?id=16bac749-f10e-483f-9020-5d6365b4e092) - course material: programming in python *in German* for digital humanities - funded by the federal state of North Rhine-Westphalia\n\n### Intensive Programs\n**[`^        back to top        ^`](#awesome-data-science)**\n\n- [S2DS](https:\u002F\u002Fwww.s2ds.org\u002F)\n- [WorldQuant University Applied Data Science Lab](https:\u002F\u002Fwww.wqu.edu\u002Fadsl)\n\n\n### Colleges\n**[`^        back to top        ^`](#awesome-data-science)**\n\n- [A list of colleges and universities offering degrees in data science.](https:\u002F\u002Fgithub.com\u002Fryanswanstrom\u002Fawesome-datascience-colleges)\n- [Data Science Degree @ Berkeley](https:\u002F\u002Fischoolonline.berkeley.edu\u002Fdata-science\u002F)\n- [Data Science Degree @ UVA](https:\u002F\u002Fdatascience.virginia.edu\u002F)\n- [Data Science Degree @ Wisconsin](https:\u002F\u002Fdatasciencedegree.wisconsin.edu\u002F)\n- [BS in Data Science & Applications](https:\u002F\u002Fstudy.iitm.ac.in\u002Fds\u002F)\n- [MS in Computer Information Systems @ Boston University](https:\u002F\u002Fwww.bu.edu\u002Fonline\u002Fprograms\u002Fgraduate-programs\u002Fcomputer-information-systems-masters-degree\u002F)\n- [MS in Business Analytics @ ASU Online](https:\u002F\u002Fasuonline.asu.edu\u002Fonline-degree-programs\u002Fgraduate\u002Fmaster-science-business-analytics\u002F)\n- [MS in Applied Data Science @ Syracuse](https:\u002F\u002Fischool.syr.edu\u002Facademics\u002Fapplied-data-science-masters-degree\u002F)\n- [M.S. Management & Data Science @ Leuphana](https:\u002F\u002Fwww.leuphana.de\u002Fen\u002Fgraduate-school\u002Fmasters-programmes\u002Fmanagement-data-science.html)\n- [Master of Data Science @ Melbourne University](https:\u002F\u002Fstudy.unimelb.edu.au\u002Ffind\u002Fcourses\u002Fgraduate\u002Fmaster-of-data-science\u002F#overview)\n- [Msc in Data Science @ The University of Edinburgh](https:\u002F\u002Fwww.ed.ac.uk\u002Fstudying\u002Fpostgraduate\u002Fdegrees\u002Findex.php?r=site\u002Fview&id=902)\n- [Master of Management Analytics @ Queen's University](https:\u002F\u002Fsmith.queensu.ca\u002Fgrad_studies\u002Fmma\u002Findex.php)\n- [Master of Data Science @ Illinois Institute of Technology](https:\u002F\u002Fwww.iit.edu\u002Facademics\u002Fprograms\u002Fdata-science-mas)\n- [Master of Applied Data Science @ The University of Michigan](https:\u002F\u002Fwww.si.umich.edu\u002Fprograms\u002Fmaster-applied-data-science)\n- [Master Data Science and Artificial Intelligence @ Eindhoven University of Technology](https:\u002F\u002Fwww.tue.nl\u002Fen\u002Feducation\u002Fgraduate-school\u002Fmaster-data-science-and-artificial-intelligence\u002F)\n- [Master's Degree in Data Science and Computer Engineering @ University of Granada](https:\u002F\u002Fmasteres.ugr.es\u002Fdatcom\u002F)\n\n## The Data Science Toolbox\n**[`^        back to top        ^`](#awesome-data-science)**\n\nThis section is a collection of packages, tools, algorithms, and other useful items in the data science world.\n\n### Algorithms\n**[`^        back to top        ^`](#awesome-data-science)**\n\nThese are some Machine Learning and Data Mining algorithms and models help you to understand your data and derive meaning from it.\n\n#### Three kinds of Machine Learning Systems\n\n- Based on training with human supervision\n- Based on learning incrementally on fly\n- Based on data points comparison and pattern detection\n\n### Comparison\n- [datacompy](https:\u002F\u002Fgithub.com\u002Fcapitalone\u002Fdatacompy) - DataComPy is a package to compare two Pandas DataFrames.\n\n#### Supervised Learning\n\n- [Regression](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FRegression)\n- [Linear Regression](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FLinear_regression)\n- [Ordinary Least Squares](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FOrdinary_least_squares)\n- [Logistic Regression](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FLogistic_regression)\n- [Stepwise Regression](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FStepwise_regression)\n- [Multivariate Adaptive Regression Splines](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FMultivariate_adaptive_regression_spline)\n- [Softmax Regression](https:\u002F\u002Fd2l.ai\u002Fchapter_linear-classification\u002Fsoftmax-regression.html)\n- [Locally Estimated Scatterplot Smoothing](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FLocal_regression)\n- Classification\n  - [k-nearest neighbor](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FK-nearest_neighbors_algorithm)\n  - [Support Vector Machines](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FSupport_vector_machine)\n  - [Decision Trees](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FDecision_tree)\n  - [ID3 algorithm](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FID3_algorithm)\n  - [C4.5 algorithm](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FC4.5_algorithm)\n- [Ensemble Learning](https:\u002F\u002Fscikit-learn.org\u002Fstable\u002Fmodules\u002Fensemble.html)\n  - [Boosting](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FBoosting_(machine_learning))\n  - [Stacking](https:\u002F\u002Fmachinelearningmastery.com\u002Fstacking-ensemble-machine-learning-with-python)\n  - [Bagging](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FBootstrap_aggregating)\n  - [Random Forest](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FRandom_forest)\n  - [AdaBoost](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FAdaBoost)\n\n#### Unsupervised Learning\n- [Clustering](https:\u002F\u002Fscikit-learn.org\u002Fstable\u002Fmodules\u002Fclustering.html#clustering)\n  - [Hierchical clustering](https:\u002F\u002Fscikit-learn.org\u002Fstable\u002Fmodules\u002Fclustering.html#hierarchical-clustering)\n  - [k-means](https:\u002F\u002Fscikit-learn.org\u002Fstable\u002Fmodules\u002Fclustering.html#k-means)\n  - [Density-based clustering](https:\u002F\u002Fscikit-learn.org\u002Fstable\u002Fmodules\u002Fclustering.html#dbscan)\n  - [Fuzzy clustering](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FFuzzy_clustering)\n  - [Mixture models](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FMixture_model)\n- [Dimension Reduction](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FDimensionality_reduction)\n  - [Principal Component Analysis (PCA)](https:\u002F\u002Fscikit-learn.org\u002Fstable\u002Fmodules\u002Fdecomposition.html#principal-component-analysis-pca)\n  - [t-SNE; t-distributed Stochastic Neighbor Embedding](https:\u002F\u002Fscikit-learn.org\u002Fstable\u002Fmodules\u002Fmanifold.html#t-distributed-stochastic-neighbor-embedding-tsne)\n  - [Factor Analysis](https:\u002F\u002Fscikit-learn.org\u002Fstable\u002Fmodules\u002Fdecomposition.html#factor-analysis)\n  - [Latent Dirichlet Allocation (LDA)](https:\u002F\u002Fscikit-learn.org\u002Fstable\u002Fmodules\u002Fdecomposition.html#latent-dirichlet-allocation-lda)\n- [Neural Networks](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FNeural_network)\n- [Self-organizing map](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FSelf-organizing_map)\n- [Adaptive resonance theory](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FAdaptive_resonance_theory)\n- [Hidden Markov Models (HMM)](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FHidden_Markov_model)\n\n#### Semi-Supervised Learning\n\n- S3VM\n- [Clustering](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FWeak_supervision#Cluster_assumption)\n- [Generative models](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FWeak_supervision#Generative_models)\n- [Low-density separation](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FWeak_supervision#Low-density_separation)\n- [Laplacian regularization](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FWeak_supervision#Laplacian_regularization)\n- [Heuristic approaches](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FWeak_supervision#Heuristic_approaches)\n\n#### Reinforcement Learning\n\n- [Q Learning](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FQ-learning)\n- [SARSA (State-Action-Reward-State-Action) algorithm](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FState%E2%80%93action%E2%80%93reward%E2%80%93state%E2%80%93action)\n- [Temporal difference learning](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FTemporal_difference_learning#:~:text=Temporal%20difference%20(TD)%20learning%20refers,estimate%20of%20the%20value%20function.)\n\n#### Data Mining Algorithms\n\n- [C4.5](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FC4.5_algorithm)\n- [k-Means](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FK-means_clustering)\n- [SVM (Support Vector Machine)](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FSupport_vector_machine)\n- [Apriori](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FApriori_algorithm)\n- [EM (Expectation-Maximization)](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FExpectation%E2%80%93maximization_algorithm)\n- [PageRank](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FPageRank)\n- [AdaBoost](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FAdaBoost)\n- [KNN (K-Nearest Neighbors)](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FK-nearest_neighbors_algorithm)\n- [Naive Bayes](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FNaive_Bayes_classifier)\n- [CART (Classification and Regression Trees)](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FDecision_tree_learning)\n#### Modern Data Mining Algorithms\n\n- [XGBoost (Extreme Gradient Boosting)](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FXGBoost)\n- [LightGBM (Light Gradient Boosting Machine)](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FLightGBM)\n- [CatBoost](https:\u002F\u002Fcatboost.ai\u002F)\n- [HDBSCAN (Hierarchical Density-Based Spatial Clustering of Applications with Noise)](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FDBSCAN#HDBSCAN)\n- [FP-Growth (Frequent Pattern Growth Algorithm)](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FAssociation_rule_learning#FP-growth_algorithm)\n- [Isolation Forest](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FIsolation_forest)\n- [Deep Embedded Clustering (DEC)](https:\u002F\u002Farxiv.org\u002Fabs\u002F1511.06335)\n- [TPU (Top-k Periodic and High-Utility Patterns)](https:\u002F\u002Farxiv.org\u002Fabs\u002F2509.15732)\n- [Context-Aware Rule Mining (Transformer-Based Framework)](https:\u002F\u002Farxiv.org\u002Fabs\u002F2503.11125)\n\n\n#### Deep Learning architectures\n\n- [Multilayer Perceptron](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FMultilayer_perceptron)\n- [Convolutional Neural Network (CNN)](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FConvolutional_neural_network)\n- [Recurrent Neural Network (RNN)](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FRecurrent_neural_network)\n- [Boltzmann Machines](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FBoltzmann_machine)\n- [Autoencoder](https:\u002F\u002Fwww.tensorflow.org\u002Ftutorials\u002Fgenerative\u002Fautoencoder)\n- [Generative Adversarial Network (GAN)](https:\u002F\u002Fdevelopers.google.com\u002Fmachine-learning\u002Fgan\u002Fgan_structure)\n- [Self-Organized Maps](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FSelf-organizing_map)\n- [Transformer](https:\u002F\u002Fwww.tensorflow.org\u002Ftext\u002Ftutorials\u002Ftransformer)\n- [Conditional Random Field (CRF)](https:\u002F\u002Ftowardsdatascience.com\u002Fconditional-random-fields-explained-e5b8256da776)\n- [ML System Designs)](https:\u002F\u002Fwww.evidentlyai.com\u002Fml-system-design)\n\n### General Machine Learning Packages\n**[`^        back to top        ^`](#awesome-data-science)**\n\n* [scikit-learn](https:\u002F\u002Fscikit-learn.org\u002F)\n* [scikit-multilearn](https:\u002F\u002Fgithub.com\u002Fscikit-multilearn\u002Fscikit-multilearn)\n* [sklearn-expertsys](https:\u002F\u002Fgithub.com\u002Ftmadl\u002Fsklearn-expertsys)\n* [scikit-feature](https:\u002F\u002Fgithub.com\u002Fjundongl\u002Fscikit-feature)\n* [scikit-rebate](https:\u002F\u002Fgithub.com\u002FEpistasisLab\u002Fscikit-rebate)\n* [seqlearn](https:\u002F\u002Fgithub.com\u002Flarsmans\u002Fseqlearn)\n* [sklearn-bayes](https:\u002F\u002Fgithub.com\u002FAmazaspShumik\u002Fsklearn-bayes)\n* [sklearn-crfsuite](https:\u002F\u002Fgithub.com\u002FTeamHG-Memex\u002Fsklearn-crfsuite)\n* [sklearn-deap](https:\u002F\u002Fgithub.com\u002Frsteca\u002Fsklearn-deap)\n* [sigopt_sklearn](https:\u002F\u002Fgithub.com\u002Fsigopt\u002Fsigopt-sklearn)\n* [sklearn-evaluation](https:\u002F\u002Fgithub.com\u002Fedublancas\u002Fsklearn-evaluation)\n* [scikit-image](https:\u002F\u002Fgithub.com\u002Fscikit-image\u002Fscikit-image)\n* [scikit-opt](https:\u002F\u002Fgithub.com\u002Fguofei9987\u002Fscikit-opt)\n* [scikit-posthocs](https:\u002F\u002Fgithub.com\u002Fmaximtrp\u002Fscikit-posthocs)\n* [feature-engine](https:\u002F\u002Ffeature-engine.trainindata.com\u002F)\n* [pystruct](https:\u002F\u002Fgithub.com\u002Fpystruct\u002Fpystruct)\n* [Shogun](https:\u002F\u002Fwww.shogun-toolbox.org\u002F)\n* [xLearn](https:\u002F\u002Fgithub.com\u002Faksnzhy\u002Fxlearn)\n* [cuML](https:\u002F\u002Fgithub.com\u002Frapidsai\u002Fcuml)\n* [causalml](https:\u002F\u002Fgithub.com\u002Fuber\u002Fcausalml)\n* [mlpack](https:\u002F\u002Fgithub.com\u002Fmlpack\u002Fmlpack)\n* [MLxtend](https:\u002F\u002Fgithub.com\u002Frasbt\u002Fmlxtend)\n* [modAL](https:\u002F\u002Fgithub.com\u002FmodAL-python\u002FmodAL)\n* [Sparkit-learn](https:\u002F\u002Fgithub.com\u002Flensacom\u002Fsparkit-learn)\n* [hyperlearn](https:\u002F\u002Fgithub.com\u002Fdanielhanchen\u002Fhyperlearn)\n* [dlib](https:\u002F\u002Fgithub.com\u002Fdavisking\u002Fdlib)\n* [imodels](https:\u002F\u002Fgithub.com\u002Fcsinva\u002Fimodels)\n* [jSciPy](https:\u002F\u002Fgithub.com\u002Fhissain\u002Fjscipy) - A Java port of SciPy's signal processing module, offering filters, transformations, and other scientific computing utilities.\n* [RuleFit](https:\u002F\u002Fgithub.com\u002FchristophM\u002Frulefit)\n* [pyGAM](https:\u002F\u002Fgithub.com\u002Fdswah\u002FpyGAM)\n* [Deepchecks](https:\u002F\u002Fgithub.com\u002Fdeepchecks\u002Fdeepchecks)\n* [scikit-survival](https:\u002F\u002Fscikit-survival.readthedocs.io\u002Fen\u002Fstable)\n* [interpretable](https:\u002F\u002Fpypi.org\u002Fproject\u002Finterpretable)\n* [XGBoost](https:\u002F\u002Fgithub.com\u002Fdmlc\u002Fxgboost)\n* [LightGBM](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FLightGBM)\n* [CatBoost](https:\u002F\u002Fgithub.com\u002Fcatboost\u002Fcatboost)\n* [PerpetualBooster](https:\u002F\u002Fgithub.com\u002Fperpetual-ml\u002Fperpetual)\n* [JAX](https:\u002F\u002Fgithub.com\u002Fgoogle\u002Fjax)\n\n\n\n### Deep Learning Packages\n\n#### PyTorch Ecosystem\n* [PyTorch](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Fpytorch)\n* [torchvision](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Fvision)\n* [torchtext](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Ftext)\n* [torchaudio](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Faudio)\n* [ignite](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Fignite)\n* [PyTorchNet](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Ftnt)\n* [PyToune](https:\u002F\u002Fgithub.com\u002FGRAAL-Research\u002Fpoutyne)\n* [skorch](https:\u002F\u002Fgithub.com\u002Fskorch-dev\u002Fskorch)\n* [PyVarInf](https:\u002F\u002Fgithub.com\u002Fctallec\u002Fpyvarinf)\n* [pytorch_geometric](https:\u002F\u002Fgithub.com\u002Fpyg-team\u002Fpytorch_geometric)\n* [GPyTorch](https:\u002F\u002Fgithub.com\u002Fcornellius-gp\u002Fgpytorch)\n* [pyro](https:\u002F\u002Fgithub.com\u002Fpyro-ppl\u002Fpyro)\n* [Catalyst](https:\u002F\u002Fgithub.com\u002Fcatalyst-team\u002Fcatalyst)\n* [pytorch_tabular](https:\u002F\u002Fgithub.com\u002Fmanujosephv\u002Fpytorch_tabular)\n* [Yolov3](https:\u002F\u002Fgithub.com\u002Fultralytics\u002Fyolov3)\n* [Yolov5](https:\u002F\u002Fgithub.com\u002Fultralytics\u002Fyolov5)\n* [Yolov8](https:\u002F\u002Fgithub.com\u002Fultralytics\u002Fultralytics)\n\n#### TensorFlow Ecosystem\n* [TensorFlow](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Ftensorflow)\n* [TensorLayer](https:\u002F\u002Fgithub.com\u002Ftensorlayer\u002FTensorLayer)\n* [TFLearn](https:\u002F\u002Fgithub.com\u002Ftflearn\u002Ftflearn)\n* [Sonnet](https:\u002F\u002Fgithub.com\u002Fdeepmind\u002Fsonnet)\n* [tensorpack](https:\u002F\u002Fgithub.com\u002Ftensorpack\u002Ftensorpack)\n* [TRFL](https:\u002F\u002Fgithub.com\u002Fdeepmind\u002Ftrfl)\n* [Polyaxon](https:\u002F\u002Fgithub.com\u002Fpolyaxon\u002Fpolyaxon)\n* [NeuPy](https:\u002F\u002Fgithub.com\u002Fitdxer\u002Fneupy)\n* [tfdeploy](https:\u002F\u002Fgithub.com\u002Friga\u002Ftfdeploy)\n* [tensorflow-upstream](https:\u002F\u002Fgithub.com\u002FROCmSoftwarePlatform\u002Ftensorflow-upstream)\n* [TensorFlow Fold](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Ffold)\n* [tensorlm](https:\u002F\u002Fgithub.com\u002Fbatzner\u002Ftensorlm)\n* [TensorLight](https:\u002F\u002Fgithub.com\u002Fbsautermeister\u002Ftensorlight)\n* [Mesh TensorFlow](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Fmesh)\n* [Ludwig](https:\u002F\u002Fgithub.com\u002Fludwig-ai\u002Fludwig)\n* [TF-Agents](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Fagents)\n* [TensorForce](https:\u002F\u002Fgithub.com\u002Ftensorforce\u002Ftensorforce)\n\n#### Keras Ecosystem\n\n* [Keras](https:\u002F\u002Fkeras.io)\n* [keras-contrib](https:\u002F\u002Fgithub.com\u002Fkeras-team\u002Fkeras-contrib)\n* [Hyperas](https:\u002F\u002Fgithub.com\u002Fmaxpumperla\u002Fhyperas)\n* [Elephas](https:\u002F\u002Fgithub.com\u002Fmaxpumperla\u002Felephas)\n* [Hera](https:\u002F\u002Fgithub.com\u002Fkeplr-io\u002Fhera)\n* [Spektral](https:\u002F\u002Fgithub.com\u002Fdanielegrattarola\u002Fspektral)\n* [qkeras](https:\u002F\u002Fgithub.com\u002Fgoogle\u002Fqkeras)\n* [keras-rl](https:\u002F\u002Fgithub.com\u002Fkeras-rl\u002Fkeras-rl)\n* [Talos](https:\u002F\u002Fgithub.com\u002Fautonomio\u002Ftalos)\n\n#### Visualization Tools\n**[`^        back to top        ^`](#awesome-data-science)**\n\n- [altair](https:\u002F\u002Faltair-viz.github.io\u002F)\n- [amcharts](https:\u002F\u002Fwww.amcharts.com\u002F)\n- [anychart](https:\u002F\u002Fwww.anychart.com\u002F)\n- [bokeh](https:\u002F\u002Fbokeh.org\u002F)\n- [Comet](https:\u002F\u002Fwww.comet.com\u002Fsite\u002Fproducts\u002Fml-experiment-tracking\u002F?utm_source=awesome-datascience)\n- [slemma](https:\u002F\u002Fslemma.com\u002F)\n- [cartodb](https:\u002F\u002Fcartodb.github.io\u002Fodyssey.js\u002F)\n- [Cube](https:\u002F\u002Fsquare.github.io\u002Fcube\u002F)\n- [d3plus](https:\u002F\u002Fd3plus.org\u002F)\n- [Data-Driven Documents(D3js)](https:\u002F\u002Fd3js.org\u002F)\n- [dygraphs](https:\u002F\u002Fdygraphs.com\u002F)\n- [exhibit](https:\u002F\u002Fwww.simile-widgets.org\u002Fexhibit\u002F)\n- [gephi](https:\u002F\u002Fgephi.org\u002F)\n- [ggplot2](https:\u002F\u002Fggplot2.tidyverse.org\u002F)\n- [Glue](http:\u002F\u002Fdocs.glueviz.org\u002Fen\u002Flatest\u002Findex.html)\n- [Google Chart Gallery](https:\u002F\u002Fdevelopers.google.com\u002Fchart\u002Finteractive\u002Fdocs\u002Fgallery)\n- [Highcharts](https:\u002F\u002Fwww.highcharts.com\u002F)\n- [import.io](https:\u002F\u002Fwww.import.io\u002F)\n- [Matplotlib](https:\u002F\u002Fmatplotlib.org\u002F)\n- [nvd3](https:\u002F\u002Fnvd3.org\u002F)\n- [Netron](https:\u002F\u002Fgithub.com\u002Flutzroeder\u002Fnetron)\n- [Openrefine](https:\u002F\u002Fopenrefine.org\u002F)\n- [plot.ly](https:\u002F\u002Fplot.ly\u002F)\n- [raw](https:\u002F\u002Frawgraphs.io)\n- [Resseract Lite](https:\u002F\u002Fgithub.com\u002Fabistarun\u002Fresseract-lite)\n- [Seaborn](https:\u002F\u002Fseaborn.pydata.org\u002F)\n- [techanjs](https:\u002F\u002Ftechanjs.org\u002F)\n- [Timeline](https:\u002F\u002Ftimeline.knightlab.com\u002F)\n- [variancecharts](https:\u002F\u002Fvariancecharts.com\u002Findex.html)\n- [vida](https:\u002F\u002Fvida.io\u002F)\n- [vizzu](https:\u002F\u002Fgithub.com\u002Fvizzuhq\u002Fvizzu-lib)\n- [Wrangler](http:\u002F\u002Fvis.stanford.edu\u002Fwrangler\u002F)\n- [r2d3](http:\u002F\u002Fwww.r2d3.us\u002Fvisual-intro-to-machine-learning-part-1\u002F)\n- [NetworkX](https:\u002F\u002Fnetworkx.org\u002F)\n- [Redash](https:\u002F\u002Fredash.io\u002F)\n- [Metabase](https:\u002F\u002Fwww.metabase.com\u002F)\n- [C3](https:\u002F\u002Fc3js.org\u002F)\n- [TensorWatch](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Ftensorwatch)\n- [geomap](https:\u002F\u002Fpypi.org\u002Fproject\u002Fgeomap\u002F)\n- [Dash](https:\u002F\u002Fplotly.com\u002Fdash\u002F)\n- [MetaReview](https:\u002F\u002Fmetareview-8c1.pages.dev\u002F) - Free online meta-analysis platform with 11 interactive D3.js statistical charts (forest plot, funnel plot, Galbraith, L'Abbé, Baujat, etc.), 5 effect size measures, AI literature screening, and publication-ready report export. [github.com](https:\u002F\u002Fgithub.com\u002FTerryFYL\u002Fmetareview)\n\n### Miscellaneous Tools\n**[`^        back to top        ^`](#awesome-data-science)**\n\n| Link | Description |\n| --- | --- |\n| [The Data Science Lifecycle Process](https:\u002F\u002Fgithub.com\u002Fdslp\u002Fdslp) | The Data Science Lifecycle Process is a process for taking data science teams from Idea to Value repeatedly and sustainably. The process is documented in this repo  |\n| [Data Science Lifecycle Template Repo](https:\u002F\u002Fgithub.com\u002Fdslp\u002Fdslp-repo-template) | Template repository for data science lifecycle project  |\n| [TabGAN](https:\u002F\u002Fgithub.com\u002FDiyago\u002FTabular-data-generation) | Synthetic tabular data generation using GANs, Diffusion Models, and LLMs with adversarial filtering and privacy metrics. |\n| [RexMex](https:\u002F\u002Fgithub.com\u002FAstraZeneca\u002Frexmex) | A general purpose recommender metrics library for fair evaluation.  |\n| [ChemicalX](https:\u002F\u002Fgithub.com\u002FAstraZeneca\u002Fchemicalx) | A PyTorch based deep learning library for drug pair scoring.  |\n| [FileShot.io](https:\u002F\u002Fgithub.com\u002FFileShot\u002FFileShotZKE) | Secure zero-knowledge encrypted file sharing (AES-256-GCM in-browser). No account required, MIT licensed, self-hostable, optional link expiry. |\n| [CorpusExplorer](http:\u002F\u002Fcorpusexplorer.de\u002F) | Software for corpus linguists and text\u002Fdata mining enthusiasts. Build your own corpora in over 60 languages. Use over 50 tools\u002Fvisualizations.  |\n| [PyTorch Geometric Temporal](https:\u002F\u002Fgithub.com\u002Fbenedekrozemberczki\u002Fpytorch_geometric_temporal) | Representation learning on dynamic graphs.  |\n| [Little Ball of Fur](https:\u002F\u002Fgithub.com\u002Fbenedekrozemberczki\u002Flittleballoffur) | A graph sampling library for NetworkX with a Scikit-Learn like API.  |\n| [Karate Club](https:\u002F\u002Fgithub.com\u002Fbenedekrozemberczki\u002Fkarateclub) | An unsupervised machine learning extension library for NetworkX with a Scikit-Learn like API. |\n| [ML Workspace](https:\u002F\u002Fgithub.com\u002Fml-tooling\u002Fml-workspace) | All-in-one web-based IDE for machine learning and data science. The workspace is deployed as a Docker container and is preloaded with a variety of popular data science libraries (e.g., Tensorflow, PyTorch) and dev tools (e.g., Jupyter, VS Code) |\n| [xonsh shell](https:\u002F\u002Fgithub.com\u002Fxonsh\u002Fxonsh) | A Python-powered shell that enables integration, management and orchestration of data science libraries mostly written in Python, allowing you to build pipelines, code and command-based workflows. It can also be used as a kernel for Jupyter Notebook.  |\n| [Neptune.ai](https:\u002F\u002Fneptune.ai) | Community-friendly platform supporting data scientists in creating and sharing machine learning models. Neptune facilitates teamwork, infrastructure management, models comparison and reproducibility. |\n| [steppy](https:\u002F\u002Fgithub.com\u002Fminerva-ml\u002Fsteppy) | Lightweight, Python library for fast and reproducible machine learning experimentation. Introduces very simple interface that enables clean machine learning pipeline design. |\n| [steppy-toolkit](https:\u002F\u002Fgithub.com\u002Fminerva-ml\u002Fsteppy-toolkit) | Curated collection of the neural networks, transformers and models that make your machine learning work faster and more effective. |\n| [Datalab from Google](https:\u002F\u002Fcloud.google.com\u002Fdatalab\u002Fdocs\u002F) | easily explore, visualize, analyze, and transform data using familiar languages, such as Python and SQL, interactively. |\n| [Hortonworks Sandbox](https:\u002F\u002Fwww.cloudera.com\u002Fdownloads\u002Fhortonworks-sandbox.html) | is a personal, portable Hadoop environment that comes with a dozen interactive Hadoop tutorials. |\n| [R](https:\u002F\u002Fwww.r-project.org\u002F) | is a free software environment for statistical computing and graphics. |\n| [Tidyverse](https:\u002F\u002Fwww.tidyverse.org\u002F) | is an opinionated collection of R packages designed for data science. All packages share an underlying design philosophy, grammar, and data structures. |\n| [RStudio](https:\u002F\u002Fwww.rstudio.com) | IDE – powerful user interface for R. It’s free and open source, and works on Windows, Mac, and Linux. |\n| [Python - Pandas - Anaconda](https:\u002F\u002Fwww.anaconda.com) | Completely free enterprise-ready Python distribution for large-scale data processing, predictive analytics, and scientific computing |\n| [Pandas GUI](https:\u002F\u002Fgithub.com\u002Fadrotog\u002FPandasGUI) | Pandas GUI |\n| [Polars](https:\u002F\u002Fgithub.com\u002Fpola-rs\u002Fpolars) | Fast DataFrame library for Rust and Python, designed as a faster alternative to Pandas |\n| [CiteMe](https:\u002F\u002Fciteme.app) | AI-powered academic citation generator. Searches 11+ scholarly databases (OpenAlex, PubMed, Semantic Scholar, CrossRef, SciELO) and formats references in 40+ citation styles. Available as web app, browser extension, Google Docs add-on, and public API. |\n| [Scikit-Learn](https:\u002F\u002Fscikit-learn.org\u002Fstable\u002F) | Machine Learning in Python |\n| [NumPy](https:\u002F\u002Fnumpy.org\u002F) | NumPy is fundamental for scientific computing with Python. It supports large, multi-dimensional arrays and matrices and includes an assortment of high-level mathematical functions to operate on these arrays. |\n| [Vaex](https:\u002F\u002Fvaex.io\u002F) | Vaex is a Python library that allows you to visualize large datasets and calculate statistics at high speeds. |\n| [SciPy](https:\u002F\u002Fscipy.org\u002F) | SciPy works with NumPy arrays and provides efficient routines for numerical integration and optimization. |\n| [Data Science Toolbox](https:\u002F\u002Fwww.coursera.org\u002Flearn\u002Fdata-scientists-tools) | Coursera Course |\n| [Data Science Toolbox](https:\u002F\u002Fdatasciencetoolbox.org\u002F) | Blog |\n| [Wolfram Data Science Platform](https:\u002F\u002Fwww.wolfram.com\u002Fdata-science-platform\u002F) | Take numerical, textual, image, GIS or other data and give it the Wolfram treatment, carrying out a full spectrum of data science analysis and visualization and automatically generate rich interactive reports—all powered by the revolutionary knowledge-based Wolfram Language. |\n| [Datadog](https:\u002F\u002Fwww.datadoghq.com\u002F) | Solutions, code, and devops for high-scale data science. |\n| [Variance](https:\u002F\u002Fvariancecharts.com\u002F) | Build powerful data visualizations for the web without writing JavaScript |\n| [Kite Development Kit](http:\u002F\u002Fkitesdk.org\u002Fdocs\u002Fcurrent\u002Findex.html) | The Kite Software Development Kit (Apache License, Version 2.0), or Kite for short, is a set of libraries, tools, examples, and documentation focused on making it easier to build systems on top of the Hadoop ecosystem. |\n| [Domino Data Labs](https:\u002F\u002Fwww.dominodatalab.com) | Run, scale, share, and deploy your models — without any infrastructure or setup. |\n| [Apache Flink](https:\u002F\u002Fflink.apache.org\u002F) | A platform for efficient, distributed, general-purpose data processing. |\n| [Apache Hama](https:\u002F\u002Fhama.apache.org\u002F) | Apache Hama is an Apache Top-Level open source project, allowing you to do advanced analytics beyond MapReduce. |\n| [Weka](https:\u002F\u002Fml.cms.waikato.ac.nz\u002Fweka\u002Findex.html) | Weka is a collection of machine learning algorithms for data mining tasks. |\n| [Octave](https:\u002F\u002Fwww.gnu.org\u002Fsoftware\u002Foctave\u002F) | GNU Octave is a high-level interpreted language, primarily intended for numerical computations.(Free Matlab) |\n| [Apache Spark](https:\u002F\u002Fspark.apache.org\u002F) | Lightning-fast cluster computing |\n| [Hydrosphere Mist](https:\u002F\u002Fgithub.com\u002FHydrospheredata\u002Fmist) | a service for exposing Apache Spark analytics jobs and machine learning models as realtime, batch or reactive web services. |\n| [Data Mechanics](https:\u002F\u002Fwww.datamechanics.co) | A data science and engineering platform making Apache Spark more developer-friendly and cost-effective. |\n| [Caffe](https:\u002F\u002Fcaffe.berkeleyvision.org\u002F) | Deep Learning Framework |\n| [Torch](http:\u002F\u002Ftorch.ch\u002F) | A SCIENTIFIC COMPUTING FRAMEWORK FOR LUAJIT |\n| [Nervana's python based Deep Learning Framework](https:\u002F\u002Fgithub.com\u002FNervanaSystems\u002Fneon) | Intel® Nervana™ reference deep learning framework committed to best performance on all hardware. |\n| [Skale](https:\u002F\u002Fgithub.com\u002Fskale-me\u002Fskale) | High performance distributed data processing in NodeJS |\n| [Aerosolve](https:\u002F\u002Fairbnb.io\u002Faerosolve\u002F) | A machine learning package built for humans. |\n| [Intel framework](https:\u002F\u002Fgithub.com\u002Fintel\u002Fidlf) | Intel® Deep Learning Framework |\n| [Datawrapper](https:\u002F\u002Fwww.datawrapper.de\u002F) | An open source data visualization platform helping everyone to create simple, correct and embeddable charts. Also at [github.com](https:\u002F\u002Fgithub.com\u002Fdatawrapper\u002Fdatawrapper) |\n| [Tensor Flow](https:\u002F\u002Fwww.tensorflow.org\u002F) | TensorFlow is an Open Source Software Library for Machine Intelligence |\n| [Natural Language Toolkit](https:\u002F\u002Fwww.nltk.org\u002F) | An introductory yet powerful toolkit for natural language processing and classification |\n| [Annotation Lab](https:\u002F\u002Fwww.johnsnowlabs.com\u002Fannotation-lab\u002F) | Free End-to-End No-Code platform for text annotation and DL model training\u002Ftuning. Out-of-the-box support for Named Entity Recognition, Classification, Relation extraction and Assertion Status Spark NLP models. Unlimited support for users, teams, projects, documents. |\n| [nlp-toolkit for node.js](https:\u002F\u002Fwww.npmjs.com\u002Fpackage\u002Fnlp-toolkit) | This module covers some basic nlp principles and implementations. The main focus is performance. When we deal with sample or training data in nlp, we quickly run out of memory. Therefore every implementation in this module is written as stream to only hold that data in memory that is currently processed at any step. |\n| [Julia](https:\u002F\u002Fjulialang.org) | high-level, high-performance dynamic programming language for technical computing |\n| [IJulia](https:\u002F\u002Fgithub.com\u002FJuliaLang\u002FIJulia.jl) | a Julia-language backend combined with the Jupyter interactive environment |\n| [Apache Zeppelin](https:\u002F\u002Fzeppelin.apache.org\u002F) | Web-based notebook that enables data-driven, interactive data analytics and collaborative documents with SQL, Scala and more  |\n| [Featuretools](https:\u002F\u002Fgithub.com\u002Falteryx\u002Ffeaturetools) | An open source framework for automated feature engineering written in python |\n| [Optimus](https:\u002F\u002Fgithub.com\u002Fhi-primus\u002Foptimus) | Cleansing, pre-processing, feature engineering, exploratory data analysis and easy ML with PySpark backend.  |\n| [Albumentations](https:\u002F\u002Fgithub.com\u002Falbumentations-team\u002Falbumentations) | А fast and framework agnostic image augmentation library that implements a diverse set of augmentation techniques. Supports classification, segmentation, and detection out of the box. Was used to win a number of Deep Learning competitions at Kaggle, Topcoder and those that were a part of the CVPR workshops. |\n| [DVC](https:\u002F\u002Fgithub.com\u002Fiterative\u002Fdvc) | An open-source data science version control system. It helps track, organize and make data science projects reproducible. In its very basic scenario it helps version control and share large data and model files. |\n| [Lambdo](https:\u002F\u002Fgithub.com\u002Fasavinov\u002Flambdo) | is a workflow engine that significantly simplifies data analysis by combining in one analysis pipeline (i) feature engineering and machine learning (ii) model training and prediction (iii) table population and column evaluation. |\n| [Feast](https:\u002F\u002Fgithub.com\u002Ffeast-dev\u002Ffeast) | A feature store for the management, discovery, and access of machine learning features. Feast provides a consistent view of feature data for both model training and model serving. |\n| [Polyaxon](https:\u002F\u002Fgithub.com\u002Fpolyaxon\u002Fpolyaxon) | A platform for reproducible and scalable machine learning and deep learning. |\n| [UBIAI](https:\u002F\u002Fubiai.tools) | Easy-to-use text annotation tool for teams with most comprehensive auto-annotation features. Supports NER, relations and document classification as well as OCR annotation for invoice labeling |\n| [Trains](https:\u002F\u002Fgithub.com\u002Fallegroai\u002Fclearml) | Auto-Magical Experiment Manager, Version Control & DevOps for AI |\n| [Hopsworks](https:\u002F\u002Fgithub.com\u002Flogicalclocks\u002Fhopsworks) | Open-source data-intensive machine learning platform with a feature store. Ingest and manage features for both online (MySQL Cluster)  and offline (Apache Hive) access, train and serve models at scale. |\n| [MindsDB](https:\u002F\u002Fgithub.com\u002Fmindsdb\u002Fmindsdb) | MindsDB is an Explainable AutoML framework for developers. With MindsDB you can build, train and use state of the art ML models in as simple as one line of code. |\n| [Lightwood](https:\u002F\u002Fgithub.com\u002Fmindsdb\u002Flightwood) | A Pytorch based framework that breaks down machine learning problems into smaller blocks that can be glued together seamlessly with an objective to build predictive models with one line of code. |\n| [AWS Data Wrangler](https:\u002F\u002Fgithub.com\u002Fawslabs\u002Faws-data-wrangler) | An open-source Python package that extends the power of Pandas library to AWS connecting DataFrames and AWS data related services (Amazon Redshift, AWS Glue, Amazon Athena, Amazon EMR, etc). |\n| [Amazon Rekognition](https:\u002F\u002Faws.amazon.com\u002Frekognition\u002F) | AWS Rekognition is a service that lets developers working with Amazon Web Services add image analysis to their applications. Catalog assets, automate workflows, and extract meaning from your media and applications.|\n| [Amazon Textract](https:\u002F\u002Faws.amazon.com\u002Ftextract\u002F) | Automatically extract printed text, handwriting, and data from any document. |\n| [Amazon Lookout for Vision](https:\u002F\u002Faws.amazon.com\u002Flookout-for-vision\u002F) | Spot product defects using computer vision to automate quality inspection. Identify missing product components, vehicle and structure damage, and irregularities for comprehensive quality control.|\n| [Amazon CodeGuru](https:\u002F\u002Faws.amazon.com\u002Fcodeguru\u002F) | Automate code reviews and optimize application performance with ML-powered recommendations.|\n| [CML](https:\u002F\u002Fgithub.com\u002Fiterative\u002Fcml) | An open source toolkit for using continuous integration in data science projects. Automatically train and test models in production-like environments with GitHub Actions & GitLab CI, and autogenerate visual reports on pull\u002Fmerge requests. |\n| [Dask](https:\u002F\u002Fdask.org\u002F) | An open source Python library to painlessly transition your analytics code to distributed computing systems (Big Data) |\n| [DuckDB](https:\u002F\u002Fgithub.com\u002Fduckdb\u002Fduckdb) | An in-process SQL OLAP database management system |\n| [Statsmodels](https:\u002F\u002Fwww.statsmodels.org\u002Fstable\u002Findex.html) | A Python-based inferential statistics, hypothesis testing and regression framework |\n| [Gensim](https:\u002F\u002Fradimrehurek.com\u002Fgensim\u002F) | An open-source library for topic modeling of natural language text |\n| [spaCy](https:\u002F\u002Fspacy.io\u002F) | A performant natural language processing toolkit |\n| [Grid Studio](https:\u002F\u002Fgithub.com\u002Fricklamers\u002Fgridstudio) | Grid studio is a web-based spreadsheet application with full integration of the Python programming language. |\n|[Python Data Science Handbook](https:\u002F\u002Fgithub.com\u002Fjakevdp\u002FPythonDataScienceHandbook)|Python Data Science Handbook: full text in Jupyter Notebooks|\n| [Shapley](https:\u002F\u002Fgithub.com\u002Fbenedekrozemberczki\u002Fshapley) | A data-driven framework to quantify the value of classifiers in a machine learning ensemble.  |\n| [DAGsHub](https:\u002F\u002Fdagshub.com) | A platform built on open source tools for data, model and pipeline management.  |\n| [Deepnote](https:\u002F\u002Fdeepnote.com) | A new kind of data science notebook. Jupyter-compatible, with real-time collaboration and running in the cloud. |\n| [Valohai](https:\u002F\u002Fvalohai.com) | An MLOps platform that handles machine orchestration, automatic reproducibility and deployment. |\n| [PyMC3](https:\u002F\u002Fdocs.pymc.io\u002F) | A Python Library for Probabalistic Programming (Bayesian Inference and Machine Learning) |\n| [PyStan](https:\u002F\u002Fpypi.org\u002Fproject\u002Fpystan\u002F) | Python interface to Stan (Bayesian inference and modeling) |\n| [hmmlearn](https:\u002F\u002Fpypi.org\u002Fproject\u002Fhmmlearn\u002F) | Unsupervised learning and inference of Hidden Markov Models |\n| [Chaos Genius](https:\u002F\u002Fgithub.com\u002Fchaos-genius\u002Fchaos_genius\u002F) | ML powered analytics engine for outlier\u002Fanomaly detection and root cause analysis |\n| [Nimblebox](https:\u002F\u002Fnimblebox.ai\u002F) | A full-stack MLOps platform designed to help data scientists and machine learning practitioners around the world discover, create, and launch multi-cloud apps from their web browser. |\n| [Towhee](https:\u002F\u002Fgithub.com\u002Ftowhee-io\u002Ftowhee) | A Python library that helps you encode your unstructured data into embeddings. |\n| [LineaPy](https:\u002F\u002Fgithub.com\u002FLineaLabs\u002Flineapy) | Ever been frustrated with cleaning up long, messy Jupyter notebooks? With LineaPy, an open source Python library, it takes as little as two lines of code to transform messy development code into production pipelines. |\n| [envd](https:\u002F\u002Fgithub.com\u002Ftensorchord\u002Fenvd) | 🏕️ machine learning development environment for data science and AI\u002FML engineering teams |\n| [Explore Data Science Libraries](https:\u002F\u002Fkandi.openweaver.com\u002Fexplore\u002Fdata-science) | A search engine 🔎 tool to discover & find a curated list of popular & new libraries, top authors, trending project kits, discussions, tutorials & learning resources |\n| [MLEM](https:\u002F\u002Fgithub.com\u002Fiterative\u002Fmlem) | 🐶 Version and deploy your ML models following GitOps principles |\n| [MLflow](https:\u002F\u002Fmlflow.org\u002F) | MLOps framework for managing ML models across their full lifecycle |\n| [cleanlab](https:\u002F\u002Fgithub.com\u002Fcleanlab\u002Fcleanlab) | Python library for data-centric AI and automatically detecting various issues in ML datasets |\n| [AutoGluon](https:\u002F\u002Fgithub.com\u002Fawslabs\u002Fautogluon) | AutoML to easily produce accurate predictions for image, text, tabular, time-series, and multi-modal data |\n| [Arize AI](https:\u002F\u002Farize.com\u002F) | Arize AI community tier observability tool for monitoring machine learning models in production and root-causing issues such as data quality and performance drift. |\n| [Aureo.io](https:\u002F\u002Faureo.io) | Aureo.io is a low-code platform that focuses on building artificial intelligence. It provides users with the capability to create pipelines, automations and integrate them with artificial intelligence models – all with their basic data. |\n| [ERD Lab](https:\u002F\u002Fwww.erdlab.io\u002F) | Free cloud based entity relationship diagram (ERD) tool made for developers.\n| [Arize-Phoenix](https:\u002F\u002Fdocs.arize.com\u002Fphoenix) | MLOps in a notebook - uncover insights, surface problems, monitor, and fine tune your models. |\n| [Comet](https:\u002F\u002Fgithub.com\u002Fcomet-ml\u002Fcomet-examples) | An MLOps platform with experiment tracking, model production management, a model registry, and full data lineage to support your ML workflow from training straight through to production. |\n| [Opik](https:\u002F\u002Fgithub.com\u002Fcomet-ml\u002Fopik) | Evaluate, test, and ship LLM applications across your dev and production lifecycles. |\n| [Synthical](https:\u002F\u002Fsynthical.com) | AI-powered collaborative environment for research. Find relevant papers, create collections to manage bibliography, and summarize content — all in one place |\n| [teeplot](https:\u002F\u002Fgithub.com\u002Fmmore500\u002Fteeplot) | Workflow tool to automatically organize data visualization output |\n| [Streamlit](https:\u002F\u002Fgithub.com\u002Fstreamlit\u002Fstreamlit) | App framework for Machine Learning and Data Science projects |\n| [Gradio](https:\u002F\u002Fgithub.com\u002Fgradio-app\u002Fgradio) | Create customizable UI components around machine learning models |\n| [Weights & Biases](https:\u002F\u002Fgithub.com\u002Fwandb\u002Fwandb) | Experiment tracking, dataset versioning, and model management |\n| [DVC](https:\u002F\u002Fgithub.com\u002Fiterative\u002Fdvc) | Open-source version control system for machine learning projects |\n| [Optuna](https:\u002F\u002Fgithub.com\u002Foptuna\u002Foptuna) | Automatic hyperparameter optimization software framework |\n| [Ray Tune](https:\u002F\u002Fgithub.com\u002Fray-project\u002Fray) | Scalable hyperparameter tuning library |\n| [Apache Airflow](https:\u002F\u002Fgithub.com\u002Fapache\u002Fairflow) | Platform to programmatically author, schedule, and monitor workflows |\n| [Prefect](https:\u002F\u002Fgithub.com\u002FPrefectHQ\u002Fprefect) | Workflow management system for modern data stacks |\n| [Kedro](https:\u002F\u002Fgithub.com\u002Fkedro-org\u002Fkedro) | Open-source Python framework for creating reproducible, maintainable data science code |\n| [Hamilton](https:\u002F\u002Fgithub.com\u002Fdagworks-inc\u002Fhamilton) | Lightweight library to author and manage reliable data transformations |\n| [SHAP](https:\u002F\u002Fgithub.com\u002Fslundberg\u002Fshap) | Game theoretic approach to explain the output of any machine learning model |\n| [InterpretML](https:\u002F\u002Fgithub.com\u002Finterpretml\u002Finterpret) | InterpretML implements the Explainable Boosting Machine (EBM), a modern, fully interpretable machine learning model based on Generalized Additive Models (GAMs). This open-source package also provides visualization tools for EBMs, other glass-box models, and black-box explanations |\n| [LIME](https:\u002F\u002Fgithub.com\u002Fmarcotcr\u002Flime) | Explaining the predictions of any machine learning classifier |\n| [flyte](https:\u002F\u002Fgithub.com\u002Fflyteorg\u002Fflyte) | Workflow automation platform for machine learning |\n| [dbt](https:\u002F\u002Fgithub.com\u002Fdbt-labs\u002Fdbt-core) | Data build tool |\n| [zasper](https:\u002F\u002Fgithub.com\u002Fzasper-io\u002Fzasper) | Supercharged IDE for Data Science |\n| [skrub](https:\u002F\u002Fgithub.com\u002Fskrub-data\u002Fskrub\u002F) | A Python library to ease preprocessing and feature engineering for tabular machine learning |\n| [Codeflash](https:\u002F\u002Fwww.codeflash.ai\u002F) | Ship Blazing-Fast Python Code — Every Time |\n| [Hugging Face](https:\u002F\u002Fhuggingface.co\u002F) | Popular open platform for sharing ML models, datasets, and collaborating on NLP and generative AI projects. |\n| [Chinese-Elite](https:\u002F\u002Fgithub.com\u002Fanonym-g\u002FChinese-Elite) | An open-source project that automatically maps relationship networks by parsing public data using LLMs and visualizes it as an interactive graph. |\n| [Desbordante](https:\u002F\u002Fgithub.com\u002Fdesbordante\u002Fdesbordante-core\u002F) | An open-source data profiler specifically focused on discovery and validation of complex patterns, such as [numerical association rules](https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002FDesbordante\u002Fdesbordante-core\u002Fblob\u002Fmain\u002Fexamples\u002Fnotebooks\u002FNumerical_Association_Rules.ipynb), [differential dependencies](https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002FDesbordante\u002Fdesbordante-core\u002Fblob\u002Fmain\u002Fexamples\u002Fnotebooks\u002FDifferential_Dependencies.ipynb), [denial constraints](https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002FDesbordante\u002Fdesbordante-core\u002Fblob\u002Fmain\u002Fexamples\u002Fnotebooks\u002FDenial_Constraints.ipynb), and more. |\n| [dna-claude-analysis](https:\u002F\u002Fgithub.com\u002Fshmlkv\u002Fdna-claude-analysis) | Personal genome analysis toolkit with Python scripts analyzing raw DNA data across 17 categories (health risks, ancestry, pharmacogenomics, nutrition, psychology, and more) and generating a terminal-style single-page HTML visualization. |\n| [RunMat](https:\u002F\u002Fgithub.com\u002Frunmat-org\u002Frunmat) | Fast MATLAB-syntax runtime with automatic CPU\u002FGPU execution and fused array kernels. |\n| [Turbostream](https:\u002F\u002Fgithub.com\u002Fturboline-ai\u002Fturbostream) | A terminal UI for experimenting with custom rule engines and selective LLM analysis on real-time data streams, without worrying about streaming infra or backpressure. |\n| [WFGY ProblemMap](https:\u002F\u002Fgithub.com\u002Fonestardao\u002FWFGY\u002Fblob\u002Fmain\u002FProblemMap\u002FREADME.md) | Open source “failure atlas” of 16 recurring issues in LLM and RAG pipelines, with observable symptoms and suggested fixes for data science teams. |\n| [Deploybase](https:\u002F\u002Fdeploybase.ai\u002F) | Track real-time GPU and LLM pricing across all cloud and inference providers. |\n| [DeepAnalyze](https:\u002F\u002Fgithub.com\u002Fruc-datalab\u002FDeepAnalyze) | An agentic LLM for autonomous data science, which can autonomously complete a wide range of data science tasks without human intervention. |\n\n\n\n## Literature and Media\n**[`^        back to top        ^`](#awesome-data-science)**\n\nThis section includes some additional reading material, channels to watch, and talks to listen to.\n\n### Books\n**[`^        back to top        ^`](#awesome-data-science)**\n\n- [Data Science From Scratch: First Principles with Python](https:\u002F\u002Fwww.amazon.com\u002FData-Science-Scratch-Principles-Python-dp-1492041130\u002Fdp\u002F1492041130\u002Fref=dp_ob_title_bk)\n- [Artificial Intelligence with Python - Tutorialspoint](https:\u002F\u002Fwww.tutorialspoint.com\u002Fartificial_intelligence_with_python\u002Fartificial_intelligence_with_python_tutorial.pdf)\n- [Machine Learning from Scratch](https:\u002F\u002Fdafriedman97.github.io\u002Fmlbook\u002Fcontent\u002Fintroduction.html)\n- [Probabilistic Machine Learning: An Introduction](https:\u002F\u002Fprobml.github.io\u002Fpml-book\u002Fbook1.html)\n- [How to Lead in Data Science](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fhow-to-lead-in-data-science) - Early Access\n- [Fighting Churn With Data](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Ffighting-churn-with-data)\n- [Data Science at Scale with Python and Dask](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fdata-science-with-python-and-dask)\n- [Python Data Science Handbook](https:\u002F\u002Fjakevdp.github.io\u002FPythonDataScienceHandbook\u002F)\n- [The Data Science Handbook: Advice and Insights from 25 Amazing Data Scientists](https:\u002F\u002Fwww.thedatasciencehandbook.com\u002F)\n- [Think Like a Data Scientist](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fthink-like-a-data-scientist)\n- [Introducing Data Science](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fintroducing-data-science)\n- [Practical Data Science with R](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fpractical-data-science-with-r)\n- [Everyday Data Science](https:\u002F\u002Fwww.amazon.com\u002Fdp\u002FB08TZ1MT3W\u002Fref=cm_sw_r_cp_apa_fabc_a0ceGbWECF9A8) & [(cheaper PDF version)](https:\u002F\u002Fgum.co\u002Feverydaydata)\n- [Exploring Data Science](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fexploring-data-science) - free eBook sampler\n- [Exploring the Data Jungle](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fexploring-the-data-jungle) - free eBook sampler\n- [Classic Computer Science Problems in Python](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fclassic-computer-science-problems-in-python)\n- [Math for Programmers](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fmath-for-programmers) Early access\n- [R in Action, Third Edition](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fr-in-action-third-edition) Early Access\n- [Data Science Bookcamp](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fdata-science-bookcamp) Early access\n- [Data Science Thinking: The Next Scientific, Technological and Economic Revolution](https:\u002F\u002Fwww.springer.com\u002Fgp\u002Fbook\u002F9783319950914)\n- [Applied Data Science: Lessons Learned for the Data-Driven Business](https:\u002F\u002Fwww.springer.com\u002Fgp\u002Fbook\u002F9783030118204)\n- [The Data Science Handbook](https:\u002F\u002Fwww.amazon.com\u002FData-Science-Handbook-Field-Cady\u002Fdp\u002F1119092949)\n- [Essential Natural Language Processing](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fgetting-started-with-natural-language-processing) - Early access\n- [Mining Massive Datasets](http:\u002F\u002Fwww.mmds.org\u002F) - free e-book comprehended by an online course\n- [Pandas in Action](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fpandas-in-action) - Early access\n- [Genetic Algorithms and Genetic Programming](https:\u002F\u002Fwww.taylorfrancis.com\u002Fbooks\u002F9780429141973)\n- [Advances in Evolutionary Algorithms](https:\u002F\u002Fwww.intechopen.com\u002Fbooks\u002Fadvances_in_evolutionary_algorithms) - Free Download\n- [Genetic Programming: New Approaches and Successful Applications](https:\u002F\u002Fwww.intechopen.com\u002Fbooks\u002Fgenetic-programming-new-approaches-and-successful-applications) - Free Download\n- [Evolutionary Algorithms](https:\u002F\u002Fwww.intechopen.com\u002Fbooks\u002Fevolutionary-algorithms) - Free Download\n- [Advances in Genetic Programming, Vol. 3](http:\u002F\u002Fwww0.cs.ucl.ac.uk\u002Fstaff\u002FW.Langdon\u002Faigp3\u002F) - Free Download\n- [Genetic Algorithms and Evolutionary Computation](https:\u002F\u002Fwww.talkorigins.org\u002Ffaqs\u002Fgenalg\u002Fgenalg.html) - Free Download\n- [Convex Optimization](https:\u002F\u002Fweb.stanford.edu\u002F~boyd\u002Fcvxbook\u002Fbv_cvxbook.pdf) - Convex Optimization book by Stephen Boyd - Free Download\n- [Data Analysis with Python and PySpark](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fdata-analysis-with-python-and-pyspark) - Early Access\n- [R for Data Science](https:\u002F\u002Fr4ds.had.co.nz\u002F)\n- [Build a Career in Data Science](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fbuild-a-career-in-data-science)\n- [Machine Learning Bookcamp](https:\u002F\u002Fmlbookcamp.com\u002F) - Early access\n- [Hands-On Machine Learning with Scikit-Learn, Keras, and TensorFlow, 2nd Edition](https:\u002F\u002Fwww.oreilly.com\u002Flibrary\u002Fview\u002Fhands-on-machine-learning\u002F9781492032632\u002F)\n- [Effective Data Science Infrastructure](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Feffective-data-science-infrastructure)\n- [Practical MLOps: How to Get Ready for Production Models](https:\u002F\u002Fvalohai.com\u002Fmlops-ebook\u002F)\n- [Data Analysis with Python and PySpark](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fdata-analysis-with-python-and-pyspark)\n- [Regression, a Friendly guide](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fregression-a-friendly-guide) - Early Access\n- [Streaming Systems: The What, Where, When, and How of Large-Scale Data Processing](https:\u002F\u002Fwww.oreilly.com\u002Flibrary\u002Fview\u002Fstreaming-systems\u002F9781491983867\u002F)\n- [Data Science at the Command Line: Facing the Future with Time-Tested Tools](https:\u002F\u002Fwww.oreilly.com\u002Flibrary\u002Fview\u002Fdata-science-at\u002F9781491947845\u002F)\n- [Machine Learning with Python - Tutorialspoint](https:\u002F\u002Fwww.tutorialspoint.com\u002Fmachine_learning_with_python\u002Fmachine_learning_with_python_tutorial.pdf)\n- [Deep Learning](https:\u002F\u002Fwww.deeplearningbook.org\u002F)\n- [Designing Cloud Data Platforms](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fdesigning-cloud-data-platforms) - Early Access\n- [An Introduction to Statistical Learning with Applications in R](https:\u002F\u002Fwww.statlearning.com\u002F)\n- [The Elements of Statistical Learning: Data Mining, Inference, and Prediction](https:\u002F\u002Fhastie.su.domains\u002FElemStatLearn\u002F)\n- [Deep Learning with PyTorch](https:\u002F\u002Fwww.simonandschuster.com\u002Fbooks\u002FDeep-Learning-with-PyTorch\u002FEli-Stevens\u002F9781617295263)\n- [Neural Networks and Deep Learning](http:\u002F\u002Fneuralnetworksanddeeplearning.com)\n- [Deep Learning Cookbook](https:\u002F\u002Fwww.oreilly.com\u002Flibrary\u002Fview\u002Fdeep-learning-cookbook\u002F9781491995839\u002F)\n- [Introduction to Machine Learning with Python](https:\u002F\u002Fwww.oreilly.com\u002Flibrary\u002Fview\u002Fintroduction-to-machine\u002F9781449369880\u002F)\n- [Artificial Intelligence: Foundations of Computational Agents, 2nd Edition](https:\u002F\u002Fartint.info\u002Findex.html) - Free HTML version\n- [The Quest for Artificial Intelligence: A History of Ideas and Achievements](https:\u002F\u002Fai.stanford.edu\u002F~nilsson\u002FQAI\u002Fqai.pdf) - Free Download\n- [Graph Algorithms for Data Science](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fgraph-algorithms-for-data-science) - Early Access\n- [Data Mesh in Action](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fdata-mesh-in-action) - Early Access\n- [Julia for Data Analysis](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fjulia-for-data-analysis) - Early Access\n- [Casual Inference for Data Science](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fjulia-for-data-analysis) - Early Access\n- [Regular Expression Puzzles and AI Coding Assistants](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fregular-expression-puzzles-and-ai-coding-assistants) by David Mertz\n- [Dive into Deep Learning](https:\u002F\u002Fd2l.ai\u002F)\n- [Data for All](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fdata-for-all)\n- [Interpretable Machine Learning: A Guide for Making Black Box Models Explainable](https:\u002F\u002Fchristophm.github.io\u002Finterpretable-ml-book\u002F) - Free GitHub version\n- [Foundations of Data Science](https:\u002F\u002Fwww.cs.cornell.edu\u002Fjeh\u002Fbook.pdf) Free Download \n- [Comet for DataScience: Enhance your ability to manage and optimize the life cycle of your data science project](https:\u002F\u002Fwww.amazon.com\u002FComet-Data-Science-Enhance-optimize\u002Fdp\u002F1801814430) \n- [Software Engineering for Data Scientists](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fsoftware-engineering-for-data-scientists) - Early Access\n- [Julia for Data Science](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fjulia-for-data-science) - Early Access\n- [An Introduction to Statistical Learning](https:\u002F\u002Fwww.statlearning.com\u002F) - Download Page\n- [Machine Learning For Absolute Beginners](https:\u002F\u002Fwww.amazon.in\u002FMachine-Learning-Absolute-Beginners-Introduction-ebook\u002Fdp\u002FB07335JNW1)\n- [Unifying Business, Data, and Code: Designing Data Products with JSON Schema](https:\u002F\u002Flearning.oreilly.com\u002Flibrary\u002Fview\u002Funifying-business-data\u002F9781098144999\u002F)\n- [Grokking Bayes](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fgrokking-bayes)\n- [Machine Learning Q and AI](https:\u002F\u002Fsebastianraschka.com\u002Fbooks\u002Fml-q-and-ai)\n- [JavaScript for Data Science](https:\u002F\u002Fthird-bit.com\u002Fjs4ds\u002F) - Free html page\n- [Angewandte Data Science](https:\u002F\u002Fangewandtedatascience.de\u002F) - German book about applied data science\n- [The Math Behind Artificial Intelligence](https:\u002F\u002Fwww.freecodecamp.org\u002Fnews\u002Fthe-math-behind-artificial-intelligence-book): A free FreeCodeCamp book teaching the math behind AI in plain English from an engineering point of view.\n\n#### Book Deals (Affiliated)\n\n- [eBook sale - Save up to 45% on eBooks!](https:\u002F\u002Fwww.manning.com\u002F?utm_source=mikrobusiness&utm_medium=affiliate&utm_campaign=ebook_sale_8_8_22)\n\n- [Causal Machine Learning](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fcausal-machine-learning?utm_source=mikrobusiness&utm_medium=affiliate&utm_campaign=book_ness_causal_7_26_22&a_aid=mikrobusiness&a_bid=43a2198b\n)\n- [Managing ML Projects](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fmanaging-machine-learning-projects?utm_source=mikrobusiness&utm_medium=affiliate&utm_campaign=book_thompson_managing_6_14_22)\n- [Causal Inference for Data Science](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fcausal-inference-for-data-science?utm_source=mikrobusiness&utm_medium=affiliate&utm_campaign=book_ruizdevilla_causal_6_6_22)\n- [Data for All](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fdata-for-all?utm_source=mikrobusiness&utm_medium=affiliate)\n\n### Journals, Publications and Magazines\n**[`^        back to top        ^`](#awesome-data-science)**\n\n- [ICML](https:\u002F\u002Ficml.cc\u002F2015\u002F) - International Conference on Machine Learning\n- [GECCO](https:\u002F\u002Fgecco-2019.sigevo.org\u002Findex.html\u002FHomePage) - The Genetic and Evolutionary Computation Conference (GECCO)\n- [epjdatascience](https:\u002F\u002Fepjdatascience.springeropen.com\u002F)\n- [Journal of Data Science](https:\u002F\u002Fjds-online.org\u002Fjournal\u002FJDS) - an international journal devoted to applications of statistical methods at large\n- [Big Data Research](https:\u002F\u002Fwww.journals.elsevier.com\u002Fbig-data-research)\n- [Journal of Big Data](https:\u002F\u002Fjournalofbigdata.springeropen.com\u002F)\n- [Big Data & Society](https:\u002F\u002Fjournals.sagepub.com\u002Fhome\u002Fbds)\n- [Data Science Journal](https:\u002F\u002Fwww.jstage.jst.go.jp\u002Fbrowse\u002Fdsj)\n- [datatau.com\u002Fnews](https:\u002F\u002Fwww.datatau.com\u002Fnews) - Like Hacker News, but for data\n- [Data Science Trello Board](https:\u002F\u002Ftrello.com\u002Fb\u002FrbpEfMld\u002Fdata-science)\n- [Medium Data Science Topic](https:\u002F\u002Fmedium.com\u002Ftag\u002Fdata-science) - Data Science related publications on medium\n- [Towards Data Science Genetic Algorithm Topic](https:\u002F\u002Ftowardsdatascience.com\u002Fintroduction-to-genetic-algorithms-including-example-code-e396e98d8bf3#:~:text=A%20genetic%20algorithm%20is%20a,offspring%20of%20the%20next%20generation.) -Genetic Algorithm related Publications towards Data Science\n- [Maxim AI](https:\u002F\u002Fgetmaxim.ai). Tool for AI Agent Simulation, Evaluation & Observability. \n\n### Newsletters\n**[`^        back to top        ^`](#awesome-data-science)**\n\n- [AI Weekly](https:\u002F\u002Faiweekly.co) - Curated AI intelligence briefing from industry leaders covering models, funding, policy, and applications. 3x\u002Fweek since 2017, 40K+ subscribers.\n- [DataTalks.Club](https:\u002F\u002Fdatatalks.club). A weekly newsletter about data-related things. [Archive](https:\u002F\u002Fus19.campaign-archive.com\u002Fhome\u002F?u=0d7822ab98152f5afc118c176&id=97178021aa).\n- [The Analytics Engineering Roundup](https:\u002F\u002Froundup.getdbt.com\u002Fabout). A newsletter about data science. [Archive](https:\u002F\u002Froundup.getdbt.com\u002Farchive).\n- [Techpresso](https:\u002F\u002Fdupple.com\u002Ftechpresso). A free daily newsletter covering the most impactful developments in AI, ML, and tech. [Archive](https:\u002F\u002Fdupple.com\u002Ftechpresso).\n\n### Mailing lists\n**[`^        back to top        ^`](#awesome-data-science)**\n- [Working Group - Research Software Engineering in the Digital Humanities](https:\u002F\u002Fwww.listserv.dfn.de\u002Fsympa\u002Finfo\u002Fag-dhrse). This is the mailing list for the Research Software Engineering in the Digital Humanities (DH-RSE) working group.\n\n### Bloggers\n**[`^        back to top        ^`](#awesome-data-science)**\n\n- [Wes McKinney](https:\u002F\u002Fwesmckinney.com\u002Farchives.html) - Wes McKinney Archives.\n- [Matthew Russell](https:\u002F\u002Fminingthesocialweb.com\u002F) - Mining The Social Web.\n- [Greg Reda](http:\u002F\u002Fwww.gregreda.com\u002F) - Greg Reda Personal Blog\n- [Julia Evans](https:\u002F\u002Fjvns.ca\u002F) - Recurse Center alumna\n- [Hakan Kardas](https:\u002F\u002Fwww.cse.unr.edu\u002F~hkardes\u002F) - Personal Web Page\n- [Sean J. Taylor](https:\u002F\u002Fseanjtaylor.com\u002F) - Personal Web Page\n- [Drew Conway](http:\u002F\u002Fdrewconway.com\u002F) - Personal Web Page\n- [Hilary Mason](https:\u002F\u002Fhilarymason.com\u002F) - Personal Web Page\n- [Noah Iliinsky](http:\u002F\u002Fcomplexdiagrams.com\u002F) - Personal Blog\n- [Matt Harrison](https:\u002F\u002Fhairysun.com\u002F) - Personal Blog\n- [Vamshi Ambati](https:\u002F\u002Fallthingsds.wordpress.com\u002F) - AllThings Data Sciene\n- [Prash Chan](https:\u002F\u002Fwww.mdmgeek.com\u002F) - Tech Blog on Master Data Management And Every Buzz Surrounding It\n- [Clare Corthell](http:\u002F\u002Fdatasciencemasters.org\u002F) - The Open Source Data Science Masters\n- [Datawrangling](http:\u002F\u002Fwww.datawrangling.org) by Peter Skomoroch. MACHINE LEARNING, DATA MINING, AND MORE\n- [Quora Data Science](https:\u002F\u002Fwww.quora.com\u002Ftopic\u002FData-Science) - Data Science Questions and Answers from experts\n- [Siah](https:\u002F\u002Fopenresearch.wordpress.com\u002F) a PhD student at Berkeley\n- [Louis Dorard](https:\u002F\u002Fwww.ownml.co\u002Fblog\u002F) a technology guy with a penchant for the web and for data, big and small\n- [Machine Learning Mastery](https:\u002F\u002Fmachinelearningmastery.com\u002F) about helping professional programmers confidently apply machine learning algorithms to address complex problems.\n- [Daniel Forsyth](https:\u002F\u002Fwww.danielforsyth.me\u002F) - Personal Blog\n- [Data Science Weekly](https:\u002F\u002Fwww.datascienceweekly.org\u002F) - Weekly News Blog\n- [Revolution Analytics](https:\u002F\u002Fblog.revolutionanalytics.com\u002F) - Data Science Blog\n- [R Bloggers](https:\u002F\u002Fwww.r-bloggers.com\u002F) - R Bloggers\n- [The Practical Quant](https:\u002F\u002Fpracticalquant.blogspot.com\u002F) Big data\n- [Yet Another Data Blog](https:\u002F\u002Fyet-another-data-blog.blogspot.com\u002F) Yet Another Data Blog\n- [KD Nuggets](https:\u002F\u002Fwww.kdnuggets.com\u002F) Data Mining, Analytics, Big Data, Data, Science not a blog a portal\n- [Meta Brown](https:\u002F\u002Fwww.metabrown.com\u002Fblog\u002F) - Personal Blog\n- [Data Scientist](https:\u002F\u002Fdatascientists.com\u002F) is building the data scientist culture.\n- [WhatSTheBigData](https:\u002F\u002Fwhatsthebigdata.com\u002F) is some of, all of, or much more than the above and this blog explores its impact on information technology, the business world, government agencies, and our lives.\n- [Tevfik Kosar](https:\u002F\u002Fmagnus-notitia.blogspot.com\u002F) - Magnus Notitia\n- [New Data Scientist](https:\u002F\u002Fnewdatascientist.blogspot.com\u002F) How a Social Scientist Jumps into the World of Big Data\n- [Harvard Data Science](https:\u002F\u002Fharvarddatascience.com\u002F) - Thoughts on Statistical Computing and Visualization\n- [Data Science 101](https:\u002F\u002Fryanswanstrom.com\u002Fdatascience101\u002F) - Learning To Be A Data Scientist\n- [Kaggle Past Solutions](https:\u002F\u002Fwww.chioka.in\u002Fkaggle-competition-solutions\u002F)\n- [DataScientistJourney](https:\u002F\u002Fdatascientistjourney.wordpress.com\u002Fcategory\u002Fdata-science\u002F)\n- [NYC Taxi Visualization Blog](https:\u002F\u002Fchriswhong.github.io\u002Fnyctaxi\u002F)\n- [Data-Mania](https:\u002F\u002Fwww.data-mania.com\u002F)\n- [Data-Magnum](https:\u002F\u002Fdata-magnum.com\u002F)\n- [datascopeanalytics](https:\u002F\u002Fdatascopeanalytics.com\u002Fblog\u002F)\n- [Digital transformation](https:\u002F\u002Ftarrysingh.com\u002F)\n- [datascientistjourney](https:\u002F\u002Fdatascientistjourney.wordpress.com\u002Fcategory\u002Fdata-science\u002F)\n- [Data Mania Blog](https:\u002F\u002Fwww.data-mania.com\u002Fblog\u002F) - [The File Drawer](https:\u002F\u002Fchris-said.io\u002F) - Chris Said's science blog\n- [Emilio Ferrara's web page](http:\u002F\u002Fwww.emilio.ferrara.name\u002F)\n- [DataNews](https:\u002F\u002Fdatanews.tumblr.com\u002F)\n- [Reddit TextMining](https:\u002F\u002Fwww.reddit.com\u002Fr\u002Ftextdatamining\u002F)\n- [Periscopic](https:\u002F\u002Fperiscopic.com\u002F#!\u002Fnews)\n- [Hilary Parker](https:\u002F\u002Fhilaryparker.com\u002F)\n- [Data Stories](https:\u002F\u002Fdatastori.es\u002F)\n- [Data Science Lab](https:\u002F\u002Fdatasciencelab.wordpress.com\u002F)\n- [Meaning of](https:\u002F\u002Fwww.kennybastani.com\u002F)\n- [Adventures in Data Land](https:\u002F\u002Fblog.smola.org)\n- [Dataclysm](https:\u002F\u002Ftheblog.okcupid.com\u002F)\n- [FlowingData](https:\u002F\u002Fflowingdata.com\u002F) - Visualization and Statistics\n- [Calculated Risk](https:\u002F\u002Fwww.calculatedriskblog.com\u002F)\n- [O'reilly Learning Blog](https:\u002F\u002Fwww.oreilly.com\u002Fcontent\u002Ftopics\u002Foreilly-learning\u002F)\n- [Dominodatalab](https:\u002F\u002Fblog.dominodatalab.com\u002F)\n- [i am trask](https:\u002F\u002Fiamtrask.github.io\u002F) - A Machine Learning Craftsmanship Blog\n- [Vademecum of Practical Data Science](https:\u002F\u002Fdatasciencevademecum.wordpress.com\u002F) - Handbook and recipes for data-driven solutions of real-world problems\n- [Dataconomy](https:\u002F\u002Fdataconomy.com\u002F) - A blog on the newly emerging data economy\n- [Springboard](https:\u002F\u002Fwww.springboard.com\u002Fblog\u002F) - A blog with resources for data science learners\n- [Analytics Vidhya](https:\u002F\u002Fwww.analyticsvidhya.com\u002F) - A full-fledged website about data science and analytics study material.\n- [Occam's Razor](https:\u002F\u002Fwww.kaushik.net\u002Favinash\u002F) - Focused on Web Analytics.\n- [Data School](https:\u002F\u002Fwww.dataschool.io\u002F) - Data science tutorials for beginners!\n- [Colah's Blog](https:\u002F\u002Fcolah.github.io) - Blog for understanding Neural Networks!\n- [Sebastian's Blog](https:\u002F\u002Fruder.io\u002F#open) - Blog for NLP and transfer learning!\n- [Distill](https:\u002F\u002Fdistill.pub) - Dedicated to clear explanations of machine learning!\n- [Chris Albon's Website](https:\u002F\u002Fchrisalbon.com\u002F) - Data Science and AI notes\n- [Andrew Carr](https:\u002F\u002Fandrewnc.github.io\u002Fblog\u002Fblog.html) - Data Science with Esoteric programming languages\n- [floydhub](https:\u002F\u002Fblog.floydhub.com\u002Fintroduction-to-genetic-algorithms\u002F) - Blog for Evolutionary Algorithms\n- [Jingles](https:\u002F\u002Fjinglescode.github.io\u002F) - Review and extract key concepts from academic papers\n- [nbshare](https:\u002F\u002Fwww.nbshare.io\u002Fnotebooks\u002Fdata-science\u002F) - Data Science notebooks\n- [Loic Tetrel](https:\u002F\u002Fltetrel.github.io\u002F) - Data science blog\n- [Chip Huyen's Blog](https:\u002F\u002Fhuyenchip.com\u002Fblog\u002F) - ML Engineering, MLOps, and the use of ML in startups\n- [Maria Khalusova](https:\u002F\u002Fwww.mariakhalusova.com\u002F) - Data science blog\n- [Aditi Rastogi](https:\u002F\u002Fmedium.com\u002F@aditi2507rastogi) - ML,DL,Data Science blog\n- [Santiago Basulto](https:\u002F\u002Fmedium.com\u002F@santiagobasulto) - Data Science with Python\n- [Akhil Soni](https:\u002F\u002Fmedium.com\u002F@akhil0435) - ML, DL and Data Science\n- [Akhil Soni](https:\u002F\u002Fakhilworld.hashnode.dev\u002F) - ML, DL and Data Science \n- [Applied AI Blogs](https:\u002F\u002Fwww.appliedaicourse.com\u002Fblog\u002F) - In-depth articles on AI, machine learning, and data science concepts with practical applications.\n- [Scaler Blogs](https:\u002F\u002Fwww.scaler.com\u002Fblog\u002F) - Educational content on software development, AI, and career growth in tech.\n- [Mlu github](https:\u002F\u002Fmlu-explain.github.io\u002F) - Mlu is developed amazon to help people in ml space you can learn everything from basics here with live diagrams\n- [Jan Oliver Rüdiger](https:\u002F\u002Fnotesjor.de\u002F) - ML, DL and Data Science - with a focus on text-\u002Fdata-mining\n\n### Presentations\n**[`^        back to top        ^`](#awesome-data-science)**\n\n- [How to Become a Data Scientist](https:\u002F\u002Fwww.slideshare.net\u002Fryanorban\u002Fhow-to-become-a-data-scientist)\n- [Introduction to Data Science](https:\u002F\u002Fwww.slideshare.net\u002FNikoVuokko\u002Fintroduction-to-data-science-25391618)\n- [Intro to Data Science for Enterprise Big Data](https:\u002F\u002Fwww.slideshare.net\u002Fpacoid\u002Fintro-to-data-science-for-enterprise-big-data)\n- [How to Interview a Data Scientist](https:\u002F\u002Fwww.slideshare.net\u002Fdtunkelang\u002Fhow-to-interview-a-data-scientist)\n- [How to Share Data with a Statistician](https:\u002F\u002Fgithub.com\u002Fjtleek\u002Fdatasharing)\n- [The Science of a Great Career in Data Science](https:\u002F\u002Fwww.slideshare.net\u002Fkatemats\u002Fthe-science-of-a-great-career-in-data-science)\n- [What Does a Data Scientist Do?](https:\u002F\u002Fwww.slideshare.net\u002Fdatasciencelondon\u002Fbig-data-sorry-data-science-what-does-a-data-scientist-do)\n- [Building Data Start-Ups: Fast, Big, and Focused](https:\u002F\u002Fwww.slideshare.net\u002Fmedriscoll\u002Fdriscoll-strata-buildingdatastartups25may2011clean)\n- [How to win data science competitions with Deep Learning](https:\u002F\u002Fwww.slideshare.net\u002F0xdata\u002Fhow-to-win-data-science-competitions-with-deep-learning)\n- [Full-Stack Data Scientist](https:\u002F\u002Fwww.slideshare.net\u002FAlexeyGrigorev\u002Ffullstack-data-scientist)\n\n### Podcasts\n**[`^        back to top        ^`](#awesome-data-science)**\n\n- [AI at Home](https:\u002F\u002Fpodcasts.apple.com\u002Fus\u002Fpodcast\u002Fdata-science-at-home\u002Fid1069871378)\n- [AI Today](https:\u002F\u002Fwww.cognilytica.com\u002Faitoday\u002F)\n- [Adversarial Learning](https:\u002F\u002Fadversariallearning.com\u002F)\n- [Chai time Data Science](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PLLvvXm0q8zUbiNdoIazGzlENMXvZ9bd3x)\n- [Chain of Thought](https:\u002F\u002Fwww.chainofthought.show\u002F)\n- [Data Engineering Podcast](https:\u002F\u002Fwww.dataengineeringpodcast.com\u002F)\n- [Data Science at Home](https:\u002F\u002Fdatascienceathome.com\u002F)\n- [Data Science Mixer](https:\u002F\u002Fcommunity.alteryx.com\u002Ft5\u002FData-Science-Mixer\u002Fbg-p\u002Fmixer)\n- [Data Skeptic](https:\u002F\u002Fdataskeptic.com\u002F)\n- [Data Stories](https:\u002F\u002Fdatastori.es\u002F)\n- [Datacast](https:\u002F\u002Fjameskle.com\u002Fwrites\u002Fcategory\u002FDatacast)\n- [DataFramed](https:\u002F\u002Fwww.datacamp.com\u002Fcommunity\u002Fpodcast)\n- [DataTalks.Club](https:\u002F\u002Fanchor.fm\u002Fdatatalksclub)\n- [Gradient Descent](https:\u002F\u002Fwandb.ai\u002Ffully-connected\u002Fgradient-descent)\n- [Learning Machines 101](https:\u002F\u002Fwww.learningmachines101.com\u002F)\n- [Let's Data (Brazil)](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PLn_z5E4dh_Lj5eogejMxfOiNX3nOhmhmM)\n- [Linear Digressions](https:\u002F\u002Flineardigressions.com\u002F)\n- [Not So Standard Deviations](https:\u002F\u002Fnssdeviations.com\u002F)\n- [O'Reilly Data Show Podcast](https:\u002F\u002Fwww.oreilly.com\u002Fradar\u002Ftopics\u002Foreilly-data-show-podcast\u002F)\n- [Partially Derivative](http:\u002F\u002Fpartiallyderivative.com\u002F)\n- [Superdatascience](https:\u002F\u002Fwww.superdatascience.com\u002Fpodcast\u002F)\n- [The Data Engineering Show](https:\u002F\u002Fwww.dataengineeringshow.com\u002F)\n- [The Radical AI Podcast](https:\u002F\u002Fwww.radicalai.org\u002F)\n- [What's The Point](https:\u002F\u002Ffivethirtyeight.com\u002Ftag\u002Fwhats-the-point\u002F)\n- [The Analytics Engineering Podcast](https:\u002F\u002Froundup.getdbt.com\u002Fs\u002Fthe-analytics-engineering-podcast)\n\n### YouTube Videos & Channels\n**[`^        back to top        ^`](#awesome-data-science)**\n\n- [What is machine learning?](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=WXHM_i-fgGo)\n- [Andrew Ng: Deep Learning, Self-Taught Learning and Unsupervised Feature Learning](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=n1ViNeWhC24)\n- [Data36 - Data Science for Beginners by Tomi Mester](https:\u002F\u002Fwww.youtube.com\u002Fc\u002FTomiMesterData36comDataScienceForBeginners)\n- [Deep Learning: Intelligence from Big Data](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=czLI3oLDe8M)\n- [Interview with Google's AI and Deep Learning 'Godfather' Geoffrey Hinton](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=1Wp3IIpssEc)\n- [Introduction to Deep Learning with Python](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=S75EdAcXHKk)\n- [What is machine learning, and how does it work?](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=elojMnjn4kk)\n- [CampusX](https:\u002F\u002Fwww.youtube.com\u002F@campusx-official)\n- [Data School](https:\u002F\u002Fwww.youtube.com\u002Fchannel\u002FUCnVzApLJE2ljPZSeQylSEyg) - Data Science Education\n- [Neural Nets for Newbies by Melanie Warrick (May 2015)](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=Cu6A96TUy_o)\n- [Neural Networks video series by Hugo Larochelle](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PL6Xpj9I5qXYEcOhn7TqghAJ6NAPrNmUBH)\n- [Google DeepMind co-founder Shane Legg - Machine Super Intelligence](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=evNCyRL3DOU)\n- [Data Science Primer](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=cHzvYxBN9Ls&list=PLPqVjP3T4RIRsjaW07zoGzH-Z4dBACpxY)\n- [Data Science with Genetic Algorithms](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=lpD38NxTOnk)\n- [Data Science for Beginners](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PL2zq7klxX5ATMsmyRazei7ZXkP1GHt-vs)\n- [DataTalks.Club](https:\u002F\u002Fwww.youtube.com\u002Fchannel\u002FUCDvErgK0j5ur3aLgn6U-LqQ)\n- [Mildlyoverfitted - Tutorials on intermediate ML\u002FDL topics](https:\u002F\u002Fwww.youtube.com\u002Fchannel\u002FUCYBSjwkGTK06NnDnFsOcR7g)\n- [mlops.community - Interviews of industry experts about production ML](https:\u002F\u002Fwww.youtube.com\u002Fchannel\u002FUCYBSjwkGTK06NnDnFsOcR7g)\n- [ML Street Talk - Unabashedly technical and non-commercial, so you will hear no annoying pitches.](https:\u002F\u002Fwww.youtube.com\u002Fc\u002Fmachinelearningstreettalk)\n- [Neural networks by 3Blue1Brown ](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PLZHQObOWTQDNU6R1_67000Dx_ZCJB-3pi)\n- [Neural networks from scratch by Sentdex](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PLQVvvaa0QuDcjD5BAw2DxE6OF2tius3V3)\n- [Manning Publications YouTube channel](https:\u002F\u002Fwww.youtube.com\u002Fc\u002FManningPublications\u002Ffeatured)\n- [Ask Dr Chong: How to Lead in Data Science - Part 1](https:\u002F\u002Fyoutu.be\u002FJYuQZii5o58)\n- [Ask Dr Chong: How to Lead in Data Science - Part 2](https:\u002F\u002Fyoutu.be\u002FSzqIXV-O-ko)\n- [Ask Dr Chong: How to Lead in Data Science - Part 3](https:\u002F\u002Fyoutu.be\u002FOgwm7k_smTA)\n- [Ask Dr Chong: How to Lead in Data Science - Part 4](https:\u002F\u002Fyoutu.be\u002Fa9usjdzTxTU)\n- [Ask Dr Chong: How to Lead in Data Science - Part 5](https:\u002F\u002Fyoutu.be\u002FMYdQq-F3Ws0)\n- [Ask Dr Chong: How to Lead in Data Science - Part 6](https:\u002F\u002Fyoutu.be\u002FLOOt4OVC3hY)\n- [Regression Models: Applying simple Poisson regression](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=9Hk8K8jhiOo)\n- [Deep Learning Architectures](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PLv8Cp2NvcY8DpVcsmOT71kymgMmcr59Mf)\n- [Time Series Modelling and Analysis](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PL3N9eeOlCrP5cK0QRQxeJd6GrQvhAtpBK)\n- [Serrano.Academy](https:\u002F\u002Fwww.youtube.com\u002F@SerranoAcademy)\n- [End to End Data Science Playlist](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=S_F_c9e2bz4&list=PLZoTAELRMXVPS-dOaVbAux22vzqdgoGhG)\n- [Introduction to Data Science - Linkedin](https:\u002F\u002Fwww.linkedin.com\u002Flearning\u002Fintroduction-to-data-science-22668235\u002Fbeginning-your-data-science-exploration?u=42458916)\n\n## Socialize\n**[`^        back to top        ^`](#awesome-data-science)**\n\nBelow are some Social Media links. Connect with other data scientists!\n\n- [Facebook Accounts](#facebook-accounts)\n- [Twitter Accounts](#twitter-accounts)\n- [Telegram Channels](#telegram-channels)\n- [Slack Communities](#slack-communities)\n- [GitHub Groups](#github-groups)\n- [Data Science Competitions](#data-science-competitions)\n\n\n### Facebook Accounts\n**[`^        back to top        ^`](#awesome-data-science)**\n\n- [Data](https:\u002F\u002Fwww.facebook.com\u002Fdata)\n- [Big Data Scientist](https:\u002F\u002Fwww.facebook.com\u002FBigdatascientist)\n- [Data Science Day](https:\u002F\u002Fwww.facebook.com\u002Fdatascienceday\u002F)\n- [Data Science Academy](https:\u002F\u002Fwww.facebook.com\u002Fnycdatascience)\n- [Facebook Data Science Page](https:\u002F\u002Fwww.facebook.com\u002Fpages\u002FData-science\u002F431299473579193?ref=br_rs)\n- [Data Science London](https:\u002F\u002Fwww.facebook.com\u002Fpages\u002FData-Science-London\u002F226174337471513)\n- [Data Science Technology and Corporation](https:\u002F\u002Fwww.facebook.com\u002FDataScienceTechnologyCorporation?ref=br_rs)\n- [Data Science - Closed Group](https:\u002F\u002Fwww.facebook.com\u002Fgroups\u002F1394010454157077\u002F?ref=br_rs)\n- [Center for Data Science](https:\u002F\u002Fwww.facebook.com\u002Fcenterdatasciences?ref=br_rs)\n- [Big data hadoop NOSQL Hive Hbase](https:\u002F\u002Fwww.facebook.com\u002Fgroups\u002Fbigdatahadoop\u002F)\n- [Analytics, Data Mining, Predictive Modeling, Artificial Intelligence](https:\u002F\u002Fwww.facebook.com\u002Fgroups\u002Fdata.analytics\u002F)\n- [Big Data Analytics using R](https:\u002F\u002Fwww.facebook.com\u002Fgroups\u002F434352233255448\u002F)\n- [Big Data Analytics with R and Hadoop](https:\u002F\u002Fwww.facebook.com\u002Fgroups\u002Frhadoop\u002F)\n- [Big Data Learnings](https:\u002F\u002Fwww.facebook.com\u002Fgroups\u002Fbigdatalearnings\u002F)\n- [Big Data, Data Science, Data Mining & Statistics](https:\u002F\u002Fwww.facebook.com\u002Fgroups\u002Fbigdatastatistics\u002F)\n- [BigData\u002FHadoop Expert](https:\u002F\u002Fwww.facebook.com\u002Fgroups\u002FBigDataExpert\u002F)\n- [Data Mining \u002F Machine Learning \u002F AI](https:\u002F\u002Fwww.facebook.com\u002Fgroups\u002Fmachinelearningforum\u002F)\n- [Data Mining\u002FBig Data - Social Network Ana](https:\u002F\u002Fwww.facebook.com\u002Fgroups\u002Fdataminingsocialnetworks\u002F)\n- [Vademecum of Practical Data Science](https:\u002F\u002Fwww.facebook.com\u002Fdatasciencevademecum)\n- [Veri Bilimi Istanbul](https:\u002F\u002Fwww.facebook.com\u002Fgroups\u002Fveribilimiistanbul\u002F)\n- [The Data Science Blog](https:\u002F\u002Fwww.facebook.com\u002FtheDataScienceBlog\u002F)\n\n\n### Twitter Accounts\n**[`^        back to top        ^`](#awesome-data-science)**\n\n| Twitter | Description |\n| --- | --- |\n| [Big Data Combine](https:\u002F\u002Ftwitter.com\u002FBigDataCombine) | Rapid-fire, live tryouts for data scientists seeking to monetize their models as trading strategies |\n| Big Data Mania | Data Viz Wiz, Data Journalist, Growth Hacker, Author of Data Science for Dummies (2015) |\n| [Big Data Science](https:\u002F\u002Ftwitter.com\u002Fanalyticbridge) | Big Data, Data Science, Predictive Modeling, Business Analytics, Hadoop, Decision and Operations Research. |\n| Charlie Greenbacker | Director of Data Science at @ExploreAltamira |\n| [Chris Said](https:\u002F\u002Ftwitter.com\u002FChris_Said) | Data scientist at Twitter |\n| [Clare Corthell](https:\u002F\u002Ftwitter.com\u002Fclarecorthell) | Dev, Design, Data Science @mattermark #hackerei |\n| [DADI Charles-Abner](https:\u002F\u002Ftwitter.com\u002FDadiCharles) | #datascientist @Ekimetrics. , #machinelearning #dataviz #DynamicCharts #Hadoop #R #Python #NLP #Bitcoin #dataenthousiast |\n| [Data Science Central](https:\u002F\u002Ftwitter.com\u002FDataScienceCtrl) | Data Science Central is the industry's single resource for Big Data practitioners. |\n| [Data Science London](https:\u002F\u002Ftwitter.com\u002Fds_ldn)  | Data Science. Big Data. Data Hacks. Data Junkies. Data Startups. Open Data |\n| [Data Science Renee](https:\u002F\u002Ftwitter.com\u002FBecomingDataSci) | Documenting my path from SQL Data Analyst pursuing an Engineering Master's Degree to Data Scientist |\n| [Data Science Report](https:\u002F\u002Ftwitter.com\u002FTedOBrien93) | Mission is to help guide & advance careers in Data Science & Analytics |\n| [Data Science Tips](https:\u002F\u002Ftwitter.com\u002Fdatasciencetips) | Tips and Tricks for Data Scientists around the world! #datascience #bigdata |\n| [Data Vizzard](https:\u002F\u002Ftwitter.com\u002FDataVisualizati) | DataViz, Security, Military |\n| [DataScienceX](https:\u002F\u002Ftwitter.com\u002FDataScienceX) |  |\n| deeplearning4j | |\n| [DJ Patil](https:\u002F\u002Ftwitter.com\u002Fdpatil) | White House Data Chief, VP @ RelateIQ. |\n| [Domino Data Lab](https:\u002F\u002Ftwitter.com\u002FDominoDataLab) | |\n| [Drew Conway](https:\u002F\u002Ftwitter.com\u002Fdrewconway) | Data nerd, hacker, student of conflict. |\n| Emilio Ferrara | #Networks, #MachineLearning and #DataScience. I work on #Social Media. Postdoc at @IndianaUniv |\n| [Erin Bartolo](https:\u002F\u002Ftwitter.com\u002Ferinbartolo) | Running with #BigData--enjoying a love\u002Fhate relationship with its hype. @iSchoolSU #DataScience Program Mgr. |\n| [Greg Reda](https:\u002F\u002Ftwitter.com\u002Fgjreda)  | Working @ _GrubHub_ about data and pandas |\n| [Gregory Piatetsky](https:\u002F\u002Ftwitter.com\u002Fkdnuggets) |  KDnuggets President, Analytics\u002FBig Data\u002FData Mining\u002FData Science expert, KDD & SIGKDD co-founder, was Chief Scientist at 2 startups, part-time philosopher. |\n| [Hadley Wickham](https:\u002F\u002Ftwitter.com\u002Fhadleywickham) |  Chief Scientist at RStudio, and an Adjunct Professor of Statistics at the University of Auckland, Stanford University, and Rice University. |\n| [Hakan Kardas](https:\u002F\u002Ftwitter.com\u002Fhakan_kardes) | Data Scientist |\n| [Hilary Mason](https:\u002F\u002Ftwitter.com\u002Fhmason) | Data Scientist in Residence at @accel. |\n| [Jeff Hammerbacher](https:\u002F\u002Ftwitter.com\u002Fhackingdata)  | ReTweeting about data science |\n| [John Myles White](https:\u002F\u002Ftwitter.com\u002Fjohnmyleswhite)  | Scientist at Facebook and Julia developer. Author of Machine Learning for Hackers and Bandit Algorithms for Website Optimization. Tweets reflect my views only. |\n| [Juan Miguel Lavista](https:\u002F\u002Ftwitter.com\u002FBDataScientist) | Principal Data Scientist @ Microsoft Data Science Team |\n| [Julia Evans](https:\u002F\u002Ftwitter.com\u002Fb0rk) | Hacker - Pandas - Data Analyze |\n| [Kenneth Cukier](https:\u002F\u002Ftwitter.com\u002Fkncukier) | The Economist's Data Editor and co-author of Big Data (http:\u002F\u002Fwww.big-data-book.com\u002F). |\n| Kevin Davenport | Organizer of https:\u002F\u002Fwww.meetup.com\u002FSan-Diego-Data-Science-R-Users-Group\u002F |\n| [Kevin Markham](https:\u002F\u002Ftwitter.com\u002Fjustmarkham) | Data science instructor, and founder of [Data School](https:\u002F\u002Fwww.dataschool.io\u002F) |\n| [Kim Rees](https:\u002F\u002Ftwitter.com\u002Fkrees) | Interactive data visualization and tools. Data flaneur. |\n| [Kirk Borne](https:\u002F\u002Ftwitter.com\u002FKirkDBorne) | DataScientist, PhD Astrophysicist, Top #BigData Influencer. |\n| Linda Regber | Data storyteller, visualizations. |\n| [Luis Rei](https:\u002F\u002Ftwitter.com\u002Flmrei) | PhD Student. Programming, Mobile, Web. Artificial Intelligence, Intelligent Robotics Machine Learning, Data Mining, Natural Language Processing, Data Science. |\n| Mark Stevenson | Data Analytics Recruitment Specialist at Salt (@SaltJobs)  Analytics - Insight - Big Data - Data science |\n| [Matt Harrison](https:\u002F\u002Ftwitter.com\u002F__mharrison__) | Opinions of full-stack Python guy, author, instructor, currently playing Data Scientist. Occasional fathering, husbanding, organic gardening. |\n| [Matthew Russell](https:\u002F\u002Ftwitter.com\u002Fptwobrussell) | Mining the Social Web. |\n| [Mert Nuhoğlu](https:\u002F\u002Ftwitter.com\u002Fmertnuhoglu)  | Data Scientist at BizQualify, Developer |\n| [Monica Rogati](https:\u002F\u002Ftwitter.com\u002Fmrogati) | Data @ Jawbone. Turned data into stories & products at LinkedIn. Text mining, applied machine learning, recommender systems. Ex-gamer, ex-machine coder; namer. |\n| [Noah Iliinsky](https:\u002F\u002Ftwitter.com\u002Fnoahi) | Visualization & interaction designer. Practical cyclist. Author of vis books: https:\u002F\u002Fwww.oreilly.com\u002Fpub\u002Fau\u002F4419 |\n| [Paul Miller](https:\u002F\u002Ftwitter.com\u002FPaulMiller) | Cloud Computing\u002F Big Data\u002F Open Data Analyst & Consultant. Writer, Speaker & Moderator. Gigaom Research Analyst. |\n| [Peter Skomoroch](https:\u002F\u002Ftwitter.com\u002Fpeteskomoroch) | Creating intelligent systems to automate tasks & improve decisions. Entrepreneur, ex-Principal Data Scientist @LinkedIn. Machine Learning, ProductRei, Networks |\n| [Prash Chan](https:\u002F\u002Ftwitter.com\u002FMDMGeek) | Solution Architect @ IBM, Master Data Management, Data Quality & Data Governance Blogger. Data Science, Hadoop, Big Data & Cloud. |\n| [Quora Data Science](https:\u002F\u002Ftwitter.com\u002Fq_datascience)  | Quora's data science topic |\n| [R-Bloggers](https:\u002F\u002Ftwitter.com\u002FRbloggers) | Tweet blog posts from the R blogosphere, data science conferences, and (!) open jobs for data scientists. |\n| [Rand Hindi](https:\u002F\u002Ftwitter.com\u002Frandhindi) |  |\n| [Randy Olson](https:\u002F\u002Ftwitter.com\u002Frandal_olson) | Computer scientist researching artificial intelligence. Data tinkerer. Community leader for @DataIsBeautiful. #OpenScience advocate. |\n| [Recep Erol](https:\u002F\u002Ftwitter.com\u002FEROLRecep) | Data Science geek @ UALR |\n| [Ryan Orban](https:\u002F\u002Ftwitter.com\u002Fryanorban) | Data scientist, genetic origamist, hardware aficionado |\n| [Sean J. Taylor](https:\u002F\u002Ftwitter.com\u002Fseanjtaylor) | Social Scientist. Hacker. Facebook Data Science Team. Keywords: Experiments, Causal Inference, Statistics, Machine Learning, Economics. |\n| [Silvia K. Spiva](https:\u002F\u002Ftwitter.com\u002Fsilviakspiva) | #DataScience at Cisco |\n| [Harsh B. Gupta](https:\u002F\u002Ftwitter.com\u002Fharshbg) | Data Scientist at BBVA Compass |\n| [Spencer Nelson](https:\u002F\u002Ftwitter.com\u002Fspenczar_n) | Data nerd |\n| [Talha Oz](https:\u002F\u002Ftwitter.com\u002FtozCSS) | Enjoys ABM, SNA, DM, ML, NLP, HI, Python, Java. Top percentile Kaggler\u002Fdata scientist |\n| [Tasos Skarlatidis](https:\u002F\u002Ftwitter.com\u002Fanskarl) | Complex Event Processing, Big Data, Artificial Intelligence and Machine Learning. Passionate about programming and open-source. |\n| [Terry Timko](https:\u002F\u002Ftwitter.com\u002FTerry_Timko) | InfoGov; Bigdata; Data as a Service; Data Science; Open, Social & Business Data Convergence |\n| [Tony Baer](https:\u002F\u002Ftwitter.com\u002FTonyBaer) | IT analyst with Ovum covering Big Data & data management with some systems engineering thrown in. |\n| [Tony Ojeda](https:\u002F\u002Ftwitter.com\u002Ftonyojeda3) | Data Scientist , Author , Entrepreneur. Co-founder @DataCommunityDC. Founder @DistrictDataLab. #DataScience #BigData #DataDC |\n| [Vamshi Ambati](https:\u002F\u002Ftwitter.com\u002Fvambati) | Data Science @ PayPal. #NLP, #machinelearning; PhD, Carnegie Mellon alumni (Blog: https:\u002F\u002Fallthingsds.wordpress.com ) |\n| [Wes McKinney](https:\u002F\u002Ftwitter.com\u002Fwesmckinn) | Pandas (Python Data Analysis library). |\n| [WileyEd](https:\u002F\u002Ftwitter.com\u002FWileyEd) | Senior Manager - @Seagate Big Data Analytics @McKinsey Alum #BigData + #Analytics Evangelist #Hadoop, #Cloud, #Digital, & #R Enthusiast |\n| [WNYC Data News Team](https:\u002F\u002Ftwitter.com\u002Fdatanews) | The data news crew at @WNYC. Practicing data-driven journalism, making it visual, and showing our work. |\n| [Alexey Grigorev](https:\u002F\u002Ftwitter.com\u002FAl_Grigor) | Data science author |\n| [İlker Arslan](https:\u002F\u002Ftwitter.com\u002Filkerarslan_35) | Data science author. Shares mostly about Julia programming |\n| [INEVITABLE](https:\u002F\u002Ftwitter.com\u002FWeAreInevitable) | AI & Data Science Start-up Company based in England, UK |\n| [Jan Oliver Rüdiger](https:\u002F\u002Fx.com\u002FnotesJOR) | ML, DL and Data Science - with a focus on text-\u002Fdata-mining |\n\n### Telegram Channels\n**[`^        back to top        ^`](#awesome-data-science)**\n\n- [Open Data Science](https:\u002F\u002Ft.me\u002Fopendatascience) – First Telegram Data Science channel. Covering all technical and popular staff about anything related to Data Science: AI, Big Data, Machine Learning, Statistics, general Math and the applications of former.\n- [Loss function porn](https:\u002F\u002Ft.me\u002Floss_function_porn) — Beautiful posts on DS\u002FML theme with video or graphic visualization.\n- [Machinelearning](https:\u002F\u002Ft.me\u002Fai_machinelearning_big_data) – Daily ML news.\n\n\n### Slack Communities\n[top](#awesome-data-science)\n\n- [DataTalks.Club](https:\u002F\u002Fdatatalks.club)\n\n### GitHub Groups\n- [Berkeley Institute for Data Science](https:\u002F\u002Fgithub.com\u002FBIDS)\n\n### Data Science Competitions\n\nSome data mining competition platforms\n\n- [Kaggle](https:\u002F\u002Fwww.kaggle.com\u002F)\n- [DrivenData](https:\u002F\u002Fwww.drivendata.org\u002F)\n- [Analytics Vidhya](https:\u002F\u002Fdatahack.analyticsvidhya.com\u002F)\n- [InnoCentive](https:\u002F\u002Fwww.innocentive.com\u002F)\n- [Microprediction](https:\u002F\u002Fwww.microprediction.com\u002Fpython-1)\n\n## Fun\n\n- [Infographic](#infographics)\n- [Datasets](#datasets)\n- [Comics](#comics)\n\n\n### Infographics\n**[`^        back to top        ^`](#awesome-data-science)**\n\n| Preview                                                                                                                                                                                                                                     | Description                                                                                                                                                                                                                                                  |\n| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |\n| [\u003Cimg src=\"https:\u002F\u002Fi.imgur.com\u002F0OoLaa5.png\" width=\"150\" \u002F>](https:\u002F\u002Fi.imgur.com\u002F0OoLaa5.png)                                                                                                                                                | [Key differences of a data scientist vs. data engineer](https:\u002F\u002Fsearchbusinessanalytics.techtarget.com\u002Ffeature\u002FKey-differences-of-a-data-scientist-vs-data-engineer)                                                                                         |\n| [\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Facademic_awesome-datascience_readme_8865c0342320.png\" width=\"150\" \u002F>](https:\u002F\u002Fs3.amazonaws.com\u002Fassets.datacamp.com\u002Fblog_assets\u002FDataScienceEightSteps_Full.png)                    | A visual guide to Becoming a Data Scientist in 8 Steps by [DataCamp](https:\u002F\u002Fwww.datacamp.com) [(img)](https:\u002F\u002Fs3.amazonaws.com\u002Fassets.datacamp.com\u002Fblog_assets\u002FDataScienceEightSteps_Full.png)                                                              |\n| [\u003Cimg src=\"https:\u002F\u002Fi.imgur.com\u002FW2t2Roz.png\" width=\"150\" \u002F>](https:\u002F\u002Fi.imgur.com\u002FFxsL3b8.png)                                                                                                                                                | Mindmap on required skills ([img](https:\u002F\u002Fi.imgur.com\u002FFxsL3b8.png))                                                                                                                                                                                          |\n| [\u003Cimg src=\"https:\u002F\u002Fi.imgur.com\u002Frb9ruaa.png\" width=\"150\" \u002F>](https:\u002F\u002Fnirvacana.com\u002Fthoughts\u002Fwp-content\u002Fuploads\u002F2013\u002F07\u002FRoadToDataScientist1.png)                                                                                              | Swami Chandrasekaran made a [Curriculum via Metro map](http:\u002F\u002Fnirvacana.com\u002Fthoughts\u002F2013\u002F07\u002F08\u002Fbecoming-a-data-scientist\u002F).                                                                                                                                            |\n| [\u003Cimg src=\"https:\u002F\u002Fi.imgur.com\u002FXBgKF2l.png\" width=\"150\" \u002F>](https:\u002F\u002Fi.imgur.com\u002F4ZBBvb0.png)                                                                                                                                                | by [@kzawadz](https:\u002F\u002Ftwitter.com\u002Fkzawadz) via [twitter](https:\u002F\u002Ftwitter.com\u002FMktngDistillery\u002Fstatus\u002F538671811991715840)                                                                                                                                      |\n| [\u003Cimg src=\"https:\u002F\u002Fi.imgur.com\u002Fl9ZGtal.jpg\" width=\"150\" \u002F>](https:\u002F\u002Fi.imgur.com\u002FxLY3XZn.jpg)                                                                                                                                                | By [Data Science Central](https:\u002F\u002Fwww.datasciencecentral.com\u002F)                                                                                                                                                                                                |\n| [\u003Cimg src=\"https:\u002F\u002Fi.imgur.com\u002FTWkB4X6.png\" width=\"150\" \u002F>](https:\u002F\u002Fi.imgur.com\u002F0TydZ4M.png)                                                                                                                                                | Data Science Wars: R vs Python                                                                                                                                                                                                                               |\n| [\u003Cimg src=\"https:\u002F\u002Fi.imgur.com\u002FgtTlW5I.png\" width=\"150\" \u002F>](https:\u002F\u002Fi.imgur.com\u002FHnRwlce.png)                                                                                                                                                | How to select statistical or machine learning techniques                                                                                                                                                                                                     |\n| [\u003Cimg src=\"https:\u002F\u002Fscikit-learn.org\u002F1.5\u002F_downloads\u002Fb82bf6cd7438a351f19fac60fbc0d927\u002Fml_map.svg\" width=\"150\" \u002F>](https:\u002F\u002Fscikit-learn.org\u002F1.5\u002F_downloads\u002Fb82bf6cd7438a351f19fac60fbc0d927\u002Fml_map.svg)                                                                                                           | [Choosing the Right Estimator](https:\u002F\u002Fscikit-learn.org\u002F1.5\u002Fmachine_learning_map.html#choosing-the-right-estimator)                                                                                                                                                                                                                                 |\n| [\u003Cimg src=\"https:\u002F\u002Fi.imgur.com\u002F3JSyUq1.png\" width=\"150\" \u002F>](https:\u002F\u002Fi.imgur.com\u002FuEqMwZa.png)                                                                                                                                                | The Data Science Industry: Who Does What                                                                                                                                                                                                                     |\n| [\u003Cimg src=\"https:\u002F\u002Fi.imgur.com\u002FDQqFwwy.png\" width=\"150\" \u002F>](https:\u002F\u002Fi.imgur.com\u002FRsHqY84.png)                                                                                                                                                | Data Science ~~Venn~~ Euler Diagram                                                                                                                                                                                                                          |\n| [\u003Cimg src=\"https:\u002F\u002Fwww.springboard.com\u002Fblog\u002Fwp-content\u002Fuploads\u002F2016\u002F03\u002F20160324_springboard_vennDiagram.png\" width=\"150\" height=\"150\" \u002F>](https:\u002F\u002Fwww.springboard.com\u002Fblog\u002Fwp-content\u002Fuploads\u002F2016\u002F03\u002F20160324_springboard_vennDiagram.png) | Different Data Science Skills and Roles from [Springboard](https:\u002F\u002Fwww.springboard.com)                                                                                       |\n| [\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Facademic_awesome-datascience_readme_dc617a4aeade.jpg\" width=\"150\" alt=\"Data Fallacies To Avoid\" \u002F>](https:\u002F\u002Fdata-literacy.geckoboard.com\u002Fposter\u002F)                                                 | A simple and friendly way of teaching your non-data scientist\u002Fnon-statistician colleagues [how to avoid mistakes with data](https:\u002F\u002Fdata-literacy.geckoboard.com\u002Fposter\u002F). From Geckoboard's [Data Literacy Lessons](https:\u002F\u002Fdata-literacy.geckoboard.com\u002F). |\n\n### Datasets\n**[`^        back to top        ^`](#awesome-data-science)**\n\n- [Academic Torrents](https:\u002F\u002Facademictorrents.com\u002F)\n- [ADS-B Exchange](https:\u002F\u002Fwww.adsbexchange.com\u002Fdata-samples\u002F) - Specific datasets for aircraft and Automatic Dependent Surveillance-Broadcast (ADS-B) sources.\n- [AI Displacement Tracker](https:\u002F\u002Fgithub.com\u002Fnoahaust2\u002Fai-displacement-tracker) - Structured dataset tracking 92 AI-attributed workforce reduction events affecting 453,748 workers across 12 countries and 11 sectors. JSON and CSV formats. CC-BY-4.0 licensed.\n- [hadoopilluminated.com](https:\u002F\u002Fhadoopilluminated.com\u002Fhadoop_illuminated\u002FPublic_Bigdata_Sets.html)\n- [data.gov](https:\u002F\u002Fcatalog.data.gov\u002Fdataset) - The home of the U.S. Government's open data\n- [United States Census Bureau](https:\u002F\u002Fwww.census.gov\u002F)\n- [enigma.com](https:\u002F\u002Fenigma.com\u002F) - Navigate the world of public data - Quickly search and analyze billions of public records published by governments, companies and organizations.\n- [datahub.io](https:\u002F\u002Fdatahub.io\u002F)\n- [aws.amazon.com\u002Fdatasets](https:\u002F\u002Faws.amazon.com\u002Fdatasets\u002F)\n- [datacite.org](https:\u002F\u002Fdatacite.org\u002F)\n- [The official portal for European data](https:\u002F\u002Fdata.europa.eu\u002Fen)\n- [NASDAQ:DATA](https:\u002F\u002Fdata.nasdaq.com\u002F) - Nasdaq Data Link A premier source for financial, economic and alternative datasets.\n- [Congressional Stock Brain](https:\u002F\u002Fcongressionalstockbrain.com) - Free AI-powered tool that scores U.S. congressional STOCK Act trade disclosures by significance. Machine-scored signals from 537 lawmakers's public trade filings.\n- [figshare.com](https:\u002F\u002Ffigshare.com\u002F)\n- [GeoLite Legacy Downloadable Databases](https:\u002F\u002Fdev.maxmind.com\u002Fgeoip)\n- [Hugging Face Datasets](https:\u002F\u002Fhuggingface.co\u002Fdatasets)\n- [Quora's Big Datasets Answer](https:\u002F\u002Fwww.quora.com\u002FWhere-can-I-find-large-datasets-open-to-the-public)\n- [Public Big Data Sets](https:\u002F\u002Fhadoopilluminated.com\u002Fhadoop_illuminated\u002FPublic_Bigdata_Sets.html)\n- [Kaggle Datasets](https:\u002F\u002Fwww.kaggle.com\u002Fdatasets)\n- [A Deep Catalog of Human Genetic Variation](https:\u002F\u002Fwww.internationalgenome.org\u002Fdata)\n- [A community-curated database of well-known people, places, and things](https:\u002F\u002Fdevelopers.google.com\u002Ffreebase\u002F)\n- [Google Public Data](https:\u002F\u002Fwww.google.com\u002Fpublicdata\u002Fdirectory)\n- [World Bank Data](https:\u002F\u002Fdata.worldbank.org\u002F)\n- [NYC Taxi data](https:\u002F\u002Fchriswhong.github.io\u002Fnyctaxi\u002F)\n- [Open Data Philly](https:\u002F\u002Fwww.opendataphilly.org\u002F) Connecting people with data for Philadelphia\n- [grouplens.org](https:\u002F\u002Fgrouplens.org\u002Fdatasets\u002F) Sample movie (with ratings), book and wiki datasets\n- [UC Irvine Machine Learning Repository](https:\u002F\u002Farchive.ics.uci.edu\u002Fml\u002F) - contains data sets good for machine learning\n- [research-quality data sets](https:\u002F\u002Fweb.archive.org\u002Fweb\u002F20150320022752\u002Fhttps:\u002F\u002Fbitly.com\u002Fbundles\u002Fhmason\u002F1) by [Hilary Mason](https:\u002F\u002Fweb.archive.org\u002Fweb\u002F20150501033715\u002Fhttps:\u002F\u002Fbitly.com\u002Fu\u002Fhmason\u002Fbundles)\n- [National Centers for Environmental Information](https:\u002F\u002Fwww.ncei.noaa.gov\u002F)\n- [ClimateData.us](https:\u002F\u002Fwww.climatedata.us\u002F) (related: [U.S. Climate Resilience Toolkit](https:\u002F\u002Ftoolkit.climate.gov\u002F))\n- [r\u002Fdatasets](https:\u002F\u002Fwww.reddit.com\u002Fr\u002Fdatasets\u002F)\n- [MapLight](https:\u002F\u002Fwww.maplight.org\u002Fdata-series) - provides a variety of data free of charge for uses that are freely available to the general public. Click on a data set below to learn more\n- [GHDx](https:\u002F\u002Fghdx.healthdata.org\u002F) - Institute for Health Metrics and Evaluation - a catalog of health and demographic datasets from around the world and including IHME results\n- [St. Louis Federal Reserve Economic Data - FRED](https:\u002F\u002Ffred.stlouisfed.org\u002F)\n- [New Zealand Institute of Economic Research – Data1850](https:\u002F\u002Fdata1850.nz\u002F)\n- [Open Data Sources](https:\u002F\u002Fgithub.com\u002Fdatasciencemasters\u002Fdata)\n- [UNICEF Data](https:\u002F\u002Fdata.unicef.org\u002F)\n- [undata](https:\u002F\u002Fdata.un.org\u002F)\n- [NASA SocioEconomic Data and Applications Center - SEDAC](https:\u002F\u002Fearthdata.nasa.gov\u002Fcenters\u002Fsedac-daac)\n- [The GDELT Project](https:\u002F\u002Fwww.gdeltproject.org\u002F)\n- [Sweden, Statistics](https:\u002F\u002Fwww.scb.se\u002Fen\u002F)\n- [StackExchange Data Explorer](https:\u002F\u002Fdata.stackexchange.com) - an open source tool for running arbitrary queries against public data from the Stack Exchange network.\n- [San Fransisco Government Open Data](https:\u002F\u002Fdatasf.org\u002Fopendata\u002F)\n- [IBM Asset Dataset](https:\u002F\u002Fdeveloper.ibm.com\u002Fexchanges\u002Fdata\u002F)\n- [Open data Index](http:\u002F\u002Findex.okfn.org\u002F)\n- [Public Git Archive](https:\u002F\u002Fgithub.com\u002Fsrc-d\u002Fdatasets\u002Ftree\u002Fmaster\u002FPublicGitArchive)\n- [GHTorrent](https:\u002F\u002Fghtorrent.org\u002F)\n- [Microsoft Research Open Data](https:\u002F\u002Fmsropendata.com\u002F)\n- [Open Government Data Platform India](https:\u002F\u002Fdata.gov.in\u002F)\n- [Google Dataset Search (beta)](https:\u002F\u002Fdatasetsearch.research.google.com\u002F)\n- [NAYN.CO Turkish News with categories](https:\u002F\u002Fgithub.com\u002Fnaynco\u002Fnayn.data)\n- [Covid-19](https:\u002F\u002Fgithub.com\u002Fdatasets\u002Fcovid-19)\n- [Covid-19 Google](https:\u002F\u002Fgithub.com\u002Fgoogle-research\u002Fopen-covid-19-data)\n- [Enron Email Dataset](https:\u002F\u002Fwww.cs.cmu.edu\u002F~.\u002Fenron\u002F)\n- [5000 Images of Clothes](https:\u002F\u002Fgithub.com\u002Falexeygrigorev\u002Fclothing-dataset)\n- [IBB Open Portal](https:\u002F\u002Fdata.ibb.gov.tr\u002Fen\u002F)\n- [The Humanitarian Data Exchange](https:\u002F\u002Fdata.humdata.org\u002F)\n- [250k+ Job Postings](https:\u002F\u002Faws.amazon.com\u002Fmarketplace\u002Fpp\u002Fprodview-p2554p3tczbes) - An expanding dataset of historical job postings from Luxembourg from 2020 to today. Free with 250k+ job postings hosted on AWS Data Exchange.\n- [FinancialData.Net](https:\u002F\u002Ffinancialdata.net\u002Fdocumentation) - Financial datasets (stock market data, financial statements, sustainability data, and more).\n- [Google Dataset Search](https:\u002F\u002Fdatasetsearch.research.google.com\u002F) – Find datasets across the web.\n- [notesjor corpus-collection](https:\u002F\u002Fnotes.jan-oliver-ruediger.de\u002Fkorpora\u002F) - Free corpora (over 6 billion tokens) mostly German (both historically and in contemporary German).\n- [CLARIN-Repository](https:\u002F\u002Flindat.mff.cuni.cz\u002Frepository\u002Fhome) - CLARIN is a European repository for scientific datasets.\n- [GBIF](https:\u002F\u002Fwww.gbif.org\u002F) - Global Biodiversity Information Facility: 2.4B+ species occurrence records. Free, open API for ecological modeling and ML research.\n- [FAOSTAT](https:\u002F\u002Fwww.fao.org\u002Ffaostat\u002Fen\u002F) - UN FAO statistics on food production, trade, land use, and emissions for 245+ countries. Free API and bulk download.\n- [FirstData](https:\u002F\u002Fgithub.com\u002FMLT-OSS\u002FFirstData) - The world's most comprehensive authoritative data source knowledge base. 210+ curated sources from governments, international organizations, and research institutions. MCP integration for AI agents. MIT licensed.\n- [latamdata-py](https:\u002F\u002Fgithub.com\u002Fjuanmoisesd\u002Flatamdata-py) - Python package for one-line access to 38 open research datasets from Latin America (health, neuroscience, mental health, economics). pip install latamdata-py.\n- [ZipCheckup](https:\u002F\u002Fgithub.com\u002Fartakulov\u002Fus-water-quality-data) - Free ZIP-level environmental safety data for 42,000+ US ZIP codes: water quality, air quality, PFAS contamination, radon, lead, flood risk, and 11 more verticals. Public REST API, npm\u002FPyPI packages, CC BY 4.0.\n\n\n### Comics\n**[`^        back to top        ^`](#awesome-data-science)**\n\n- [Comic compilation](https:\u002F\u002Fmedium.com\u002F@nikhil_garg\u002Fa-compilation-of-comics-explaining-statistics-data-science-and-machine-learning-eeefbae91277)\n- [Cartoons](https:\u002F\u002Fwww.kdnuggets.com\u002Fwebsites\u002Fcartoons.html)\n- [Data Science Cartoons](https:\u002F\u002Fwww.cartoonstock.com\u002Fdirectory\u002Fd\u002Fdata_science.asp)\n- [Data Science: The XKCD Edition](https:\u002F\u002Fdavidlindelof.com\u002Fdata-science-the-xkcd-edition\u002F)\n\n## Other Awesome Lists\n\n- Other amazingly awesome lists can be found in the [awesome-awesomeness](https:\u002F\u002Fgithub.com\u002Fbayandin\u002Fawesome-awesomeness)\n- [Awesome Machine Learning](https:\u002F\u002Fgithub.com\u002Fjosephmisiti\u002Fawesome-machine-learning)\n- [lists](https:\u002F\u002Fgithub.com\u002Fjnv\u002Flists)\n- [awesome-dataviz](https:\u002F\u002Fgithub.com\u002Fjavierluraschi\u002Fawesome-dataviz)\n- [awesome-python](https:\u002F\u002Fgithub.com\u002Fvinta\u002Fawesome-python)\n- [Data Science IPython Notebooks.](https:\u002F\u002Fgithub.com\u002Fdonnemartin\u002Fdata-science-ipython-notebooks)\n- [awesome-r](https:\u002F\u002Fgithub.com\u002Fqinwf\u002Fawesome-R)\n- [awesome-datasets](https:\u002F\u002Fgithub.com\u002Fawesomedata\u002Fawesome-public-datasets)\n- [awesome-Machine Learning & Deep Learning Tutorials](https:\u002F\u002Fgithub.com\u002Fujjwalkarn\u002FMachine-Learning-Tutorials\u002Fblob\u002Fmaster\u002FREADME.md)\n- [Awesome Data Science Ideas](https:\u002F\u002Fgithub.com\u002FJosPolfliet\u002Fawesome-ai-usecases)\n- [Machine Learning for Software Engineers](https:\u002F\u002Fgithub.com\u002FZuzooVn\u002Fmachine-learning-for-software-engineers)\n- [Community Curated Data Science Resources](https:\u002F\u002Fhackr.io\u002Ftutorials\u002Flearn-data-science)\n- [Awesome Machine Learning On Source Code](https:\u002F\u002Fgithub.com\u002Fsrc-d\u002Fawesome-machine-learning-on-source-code)\n- [Awesome Community Detection](https:\u002F\u002Fgithub.com\u002Fbenedekrozemberczki\u002Fawesome-community-detection)\n- [Awesome Graph Classification](https:\u002F\u002Fgithub.com\u002Fbenedekrozemberczki\u002Fawesome-graph-classification)\n- [Awesome Decision Tree Papers](https:\u002F\u002Fgithub.com\u002Fbenedekrozemberczki\u002Fawesome-decision-tree-papers)\n- [Awesome Fraud Detection Papers](https:\u002F\u002Fgithub.com\u002Fbenedekrozemberczki\u002Fawesome-fraud-detection-papers)\n- [Awesome Gradient Boosting Papers](https:\u002F\u002Fgithub.com\u002Fbenedekrozemberczki\u002Fawesome-gradient-boosting-papers)\n- [Awesome Computer Vision Models](https:\u002F\u002Fgithub.com\u002Fnerox8664\u002Fawesome-computer-vision-models)\n- [Awesome Monte Carlo Tree Search](https:\u002F\u002Fgithub.com\u002Fbenedekrozemberczki\u002Fawesome-monte-carlo-tree-search-papers)\n- [Glossary of common statistics and ML terms](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fglossary-of-common-statistics-and-machine-learning-terms\u002F)\n- [100 NLP Papers](https:\u002F\u002Fgithub.com\u002Fmhagiwara\u002F100-nlp-papers)\n- [Awesome Game Datasets](https:\u002F\u002Fgithub.com\u002Fleomaurodesenv\u002Fgame-datasets#readme)\n- [Data Science Interviews Questions](https:\u002F\u002Fgithub.com\u002Falexeygrigorev\u002Fdata-science-interviews)\n- [Awesome Explainable Graph Reasoning](https:\u002F\u002Fgithub.com\u002FAstraZeneca\u002Fawesome-explainable-graph-reasoning)\n- [Top Data Science Interview Questions](https:\u002F\u002Fwww.interviewbit.com\u002Fdata-science-interview-questions\u002F)\n- [Awesome Drug Synergy, Interaction and Polypharmacy Prediction](https:\u002F\u002Fgithub.com\u002FAstraZeneca\u002Fawesome-drug-pair-scoring)\n- [Deep Learning Interview Questions](https:\u002F\u002Fwww.adaface.com\u002Fblog\u002Fdeep-learning-interview-questions\u002F)\n- [Top Future Trends in Data Science in 2023](https:\u002F\u002Fmedium.com\u002Fthe-modern-scientist\u002Ftop-future-trends-in-data-science-in-2023-3e616c8998b8)\n- [How Generative AI Is Changing Creative Work](https:\u002F\u002Fhbr.org\u002F2022\u002F11\u002Fhow-generative-ai-is-changing-creative-work)\n- [What is generative AI?](https:\u002F\u002Fwww.techtarget.com\u002Fsearchenterpriseai\u002Fdefinition\u002Fgenerative-AI)\n- [Top 100+ Machine Learning Interview Questions (Beginner to Advanced)](https:\u002F\u002Fwww.appliedaicourse.com\u002Fblog\u002Fmachine-learning-interview-questions\u002F)\n- [Data Science Projects](https:\u002F\u002Fgithub.com\u002Fveb-101\u002FData-Science-Projects)\n- [Is Data Science a Good Career?](https:\u002F\u002Fwww.scaler.com\u002Fblog\u002Fis-data-science-a-good-career\u002F)\n- [The Future of Data Science: Predictions and Trends](https:\u002F\u002Fwww.appliedaicourse.com\u002Fblog\u002Ffuture-of-data-science\u002F)\n- [Data Science and Machine Learning: What’s The Difference?](https:\u002F\u002Fwww.appliedaicourse.com\u002Fblog\u002Fdata-science-and-machine-learning-whats-the-difference\u002F)\n- [AI in Data Science: Uses, Roles, and Tools](https:\u002F\u002Fwww.scaler.com\u002Fblog\u002Fai-in-data-science\u002F)\n- [Top 13 Data Science Programming Languages](https:\u002F\u002Fwww.appliedaicourse.com\u002Fblog\u002Fdata-science-programming-languages\u002F)\n- [40+ Data Analytics Projects Ideas](https:\u002F\u002Fwww.appliedaicourse.com\u002Fblog\u002Fdata-analytics-projects-ideas\u002F)\n- [Best Data Science Courses with Certificates](https:\u002F\u002Fwww.appliedaicourse.com\u002Fblog\u002Fbest-data-science-courses\u002F)\n- [Generative AI Models](https:\u002F\u002Fwww.appliedaicourse.com\u002Fblog\u002Fgenerative-ai-models\u002F)\n- [Awesome Data Analysis](https:\u002F\u002Fgithub.com\u002FPavelGrigoryevDS\u002Fawesome-data-analysis) -  A curated list of data analysis tools, libraries and resources.\n- [Awesome Evidence Synthesis](https:\u002F\u002Fgithub.com\u002Fevidencesynthesis-tools\u002Fawesome-evidence-synthesis) - A curated list of open-source tools for systematic reviews, meta-analysis, and evidence synthesis.\n\n\n### Hobby\n- [Awesome Music Production](https:\u002F\u002Fgithub.com\u002Fad-si\u002Fawesome-music-production)\n","\u003Cdiv align=\"center\" markdown=\"1\">\n   \u003Csup>特别感谢赞助商：\u003C\u002Fsup>\n   \u003Cbr \u002F>\n   \u003Cbr \u002F>\n   \u003Ca href=\"https:\u002F\u002Frequestly.com\u002Fawesomedatascience\">\n      \u003Cimg alt=\"Requestly 赞助\" width=\"400\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Facademic_awesome-datascience_readme_0f75b519af94.png\">\n   \u003C\u002Fa>\n   \u003Cbr>\n   \n   ### [Requestly - 免费且开源的 Postman 替代工具](https:\u002F\u002Frequestly.com\u002Fawesomedatascience)\n   [一体化平台，用于测试、模拟和拦截 API](https:\u002F\u002Frequestly.com\u002Fawesomedatascience)\n   \u003Cbr>\n\u003C\u002Fdiv>\n\n\u003Chr>\n\n\u003Cdiv align=\"center\">\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Facademic_awesome-datascience_readme_3237deef3f14.jpg\">\u003C\u002Fdiv>\n\n# 令人惊叹的数据科学\n\n[![Awesome](https:\u002F\u002Fcdn.jsdelivr.net\u002Fgh\u002Fsindresorhus\u002Fawesome@d7305f38d29fed78fa85652e3a63e154dd8e8829\u002Fmedia\u002Fbadge.svg)](https:\u002F\u002Fgithub.com\u002Fsindresorhus\u002Fawesome) \n\n欢迎贡献内容，请参阅 [`CONTRIBUTING.md`](CONTRIBUTING.md)。\n\n**一个开源的数据科学资源库，用于学习并应用相关概念以解决现实世界的问题。**\n\n这是开始学习 **数据科学** 的捷径。只需按照步骤回答以下问题：“什么是数据科学？我应该学习哪些内容才能掌握数据科学？”\n\n\n\u003Cbr>\n\n\n## 赞助商\n\n| 赞助商 | 简介 |\n| --- | --- |\n| --- | 成为首位赞助商吧！`github@academic.io` |\n\n\n\n## 目录\n\n- [什么是数据科学？](#what-is-data-science)\n- [从哪里开始？](#where-do-i-start)\n- [代理](#agents)\n- [学习资源](#training-resources)\n  - [教程](#tutorials)\n  - [免费课程](#free-courses)\n  - [大规模开放在线课程](#moocs)\n  - [强化培训项目](#intensive-programs)\n  - [高校](#colleges)\n- [数据科学工具箱](#the-data-science-toolbox)\n\n  - [算法](#algorithms)\n    - [监督学习](#supervised-learning)\n    - [无监督学习](#unsupervised-learning)\n    - [半监督学习](#semi-supervised-learning)\n    - [强化学习](#reinforcement-learning)\n    - [数据挖掘算法](#data-mining-algorithms)\n    - [深度学习架构](#deep-learning-architectures)\n  - [通用机器学习库](#general-machine-learning-packages)\n  - [模型评估与监控](#model-evaluation--monitoring)\n    - [Evidently AI](#evidently-ai)\n  - [深度学习框架](#deep-learning-packages)\n    - [PyTorch 生态系统](#pytorch-ecosystem)\n    - [TensorFlow 生态系统](#tensorflow-ecosystem)\n    - [Keras 生态系统](#keras-ecosystem)\n  - [可视化工具](#visualization-tools)\n  - [其他工具](#miscellaneous-tools)\n- [文献与媒体](#literature-and-media)\n  - [书籍](#books)\n    - [图书优惠（联盟链接）](#book-deals-affiliated)\n  - [期刊、出版物和杂志](#journals-publications-and-magazines)\n  - [新闻通讯](#newsletters)\n  - [博主](#bloggers)\n  - [演讲](#presentations)\n  - [播客](#podcasts)\n  - [YouTube 视频与频道](#youtube-videos--channels)\n- [社交互动](#socialize)\n  - [Facebook 账号](#facebook-accounts)\n  - [Twitter 账号](#twitter-accounts)\n  - [Telegram 频道](#telegram-channels)\n  - [Slack 社区](#slack-communities)\n  - [GitHub 群组](#github-groups)\n  - [数据科学竞赛](#data-science-competitions)\n- [趣味内容](#fun)\n  - [信息图表](#infographics)\n  - [数据集](#datasets)\n  - [漫画](#comics)\n- [其他优秀列表](#other-awesome-lists)\n  - [兴趣爱好](#hobby)\n\n## 什么是数据科学？\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n数据科学如今是计算机和互联网领域最热门的话题之一。人们从各种应用和系统中积累了海量数据，而现在正是对这些数据进行分析的时机。接下来的步骤是从数据中提炼出有价值的见解，并对未来做出预测。[在这里](https:\u002F\u002Fwww.quora.com\u002FData-Science\u002FWhat-is-data-science)你可以找到关于“数据科学”这一主题的最大问题，以及来自专家的数百条回答。\n\n\n| 链接 | 预览 |\n| --- | --- |\n| [面向初学者的数据科学](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FData-Science-For-Beginners) | 微软很高兴推出一门为期10周、包含20节课的数据科学课程。 |\n| [O’Reilly：什么是数据科学](https:\u002F\u002Fwww.oreilly.com\u002Fideas\u002Fwhat-is-data-science) | _数据科学家将创业精神与耐心相结合，愿意逐步构建数据产品，具备探索能力，并能不断迭代解决方案。他们本质上是跨学科的。他们能够处理问题的各个方面，从最初的数据收集和数据清洗，到得出结论。他们善于跳出固有思维模式，提出看待问题的新方法，或者应对范围极为宽泛的问题：“这里有很多数据，你能从中创造出什么？”_ |\n| [Quora：什么是数据科学](https:\u002F\u002Fwww.quora.com\u002FData-Science\u002FWhat-is-data-science) | 数据科学是技术、算法开发和数据推理等多个领域的结合，旨在研究和分析数据，从而为复杂问题找到创新性解决方案。简而言之，数据科学的核心在于通过分析数据，寻找创造性的方法来推动业务增长。 |\n| [21世纪最性感的职业](https:\u002F\u002Fhbr.org\u002F2012\u002F10\u002Fdata-scientist-the-sexiest-job-of-the-21st-century) | _如今的数据科学家类似于20世纪八九十年代华尔街的“量化分析师”。那时，拥有物理和数学背景的人纷纷涌入投资银行和对冲基金，在那里他们可以设计全新的算法和数据策略。随后，多所大学开设了金融工程硕士项目，培养出第二代人才，这些人才更容易被主流企业所接纳。这一模式在20世纪90年代再次上演，当时搜索引擎工程师凭借其稀缺的专业技能，很快便成为计算机科学专业教学的重点内容。_ |\n| [维基百科](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FData_science) | _数据科学是一门跨学科领域，它运用科学方法、流程、算法和系统，从结构化与非结构化数据中提取知识和洞见。数据科学与数据挖掘、机器学习和大数据密切相关。_ |\n| [如何成为一名数据科学家](https:\u002F\u002Fwww.mastersindatascience.org\u002Fcareers\u002Fdata-scientist\u002F) | _数据科学家是大数据的驾驭者，负责收集和分析大量结构化与非结构化数据。数据科学家的工作融合了计算机科学、统计学和数学。他们对数据进行分析、处理和建模，然后解读结果，为企业和其他组织制定可执行的行动计划。_ |\n| [数据科学的简短历史](https:\u002F\u002Fwww.forbes.com\u002Fsites\u002Fgilpress\u002F2013\u002F05\u002F28\u002Fa-very-short-history-of-data-science\u002F) | _数据科学家之所以变得炙手可热，很大程度上是因为成熟的统计学与年轻的计算机科学相结合。术语“数据科学”直到近期才出现，专门用来指代一种新兴职业——即能够从海量大数据中提炼出价值的职业。然而，对数据的理解由来已久，科学家、统计学家、图书馆员、计算机科学家等各界人士多年来一直在探讨这一话题。以下时间线梳理了“数据科学”这一术语的演变及其使用情况、对其定义的各种尝试，以及相关概念。_ |\n|[数据科学家的软件开发资源](https:\u002F\u002Fwww.rstudio.com\u002Fblog\u002Fsoftware-development-resources-for-data-scientists\u002F)|_数据科学家专注于通过探索性分析、统计学和模型来理解数据。而软件开发者则运用一套不同的知识体系和工具。尽管两者的工作重点看似不同，但数据科学团队仍可受益于采纳软件开发的最佳实践。版本控制、自动化测试等开发技能有助于创建可复现且适合生产的代码和工具。_|\n|[数据科学家职业发展路线图](https:\u002F\u002Fwww.scaler.com\u002Fblog\u002Fhow-to-become-a-data-scientist\u002F)|_在当今这个数据驱动的世界里，每天约产生3.2877亿TB的数据，数据科学无疑是一个极佳的职业选择。而且这一数字还在与日俱增，随之而来的是对能够利用这些数据推动业务增长的熟练数据科学家的需求也在迅速上升。_|\n|[规划你的数据科学家之路](https:\u002F\u002Fwww.appliedaicourse.com\u002Fblog\u002Fhow-to-become-a-data-scientist\u002F)|_数据科学是当前需求量最大的职业之一。随着企业越来越依赖数据来做决策，对专业数据科学家的需求也急剧增加。无论是科技公司、医疗机构，还是政府机构，数据科学家都在将原始数据转化为有价值洞察方面发挥着关键作用。那么，对于刚刚起步的人来说，究竟该如何成为一名数据科学家呢？_|\n\n## 我从哪里开始？\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n虽然不是绝对必要，但掌握一门编程语言对于成为一名高效的数据科学家至关重要。目前最受欢迎的语言是 _Python_，紧随其后的是 _R_。Python 是一种通用脚本语言，在众多领域都有广泛应用。而 R 则是一种专门用于统计分析的领域特定语言，内置了大量常用的统计工具。\n\n[Python](https:\u002F\u002Fpython.org\u002F) 无疑是科学界最流行的语言，这在很大程度上归功于其易用性以及由社区驱动的丰富软件包生态系统。安装软件包主要有两种方式：Pip（通过 `pip install` 调用），它是 Python 自带的包管理器；以及 [Anaconda](https:\u002F\u002Fwww.anaconda.com)（通过 `conda install` 调用），它是一个功能强大的包管理工具，不仅可以为 Python 和 R 安装软件包，还能下载 Git 等可执行文件。\n\n与 R 不同，Python 并非从一开始就专为数据科学设计，但有许多第三方库可以弥补这一不足。本文档后面会提供更为详尽的软件包列表，不过以下四个库是开启数据科学之旅的良好起点：[Scikit-Learn](https:\u002F\u002Fscikit-learn.org\u002Fstable\u002Findex.html) 是一个通用的数据科学库，实现了最流行的算法，并配有丰富的文档、教程和示例。即使你倾向于自己编写实现代码，Scikit-Learn 仍然是了解许多常见算法底层原理的重要参考。借助 [Pandas](https:\u002F\u002Fpandas.pydata.org\u002F)，你可以将数据整理并分析成方便的表格格式。[Numpy](https:\u002F\u002Fnumpy.org\u002F) 提供了高效的数学运算工具，尤其擅长处理向量和矩阵操作。而基于 [Matplotlib](https:\u002F\u002Fmatplotlib.org\u002F) 的 [Seaborn](https:\u002F\u002Fseaborn.pydata.org\u002F) 则能快速生成美观的数据可视化图表，自带许多实用的默认设置，并提供图库展示如何制作常见的数据可视化效果。\n\n在踏上数据科学家之路时，选择哪种语言并不特别重要，Python 和 R 各有优劣。挑选一门你喜欢的语言，并查看我们下面列出的其中一门 [免费课程](#free-courses) 吧！\n\n### 初学者路线图\n如果你刚刚起步，这里有一条简单的推荐路径：\n\n1. **学习 Python** – 从基础开始：变量、循环、函数\n2. **学习核心库** – Pandas、NumPy、Matplotlib、Scikit-Learn\n3. **通过初级项目练习** – 尝试在 Kaggle 上完成泰坦尼克号生存预测或房价预测项目\n4. **学习数学基础** – 统计学、线性代数、概率论\n5. **进入机器学习领域** – 监督学习 → 非监督学习 → 深度学习\n\n## 代理\n\n本节包含对数据科学工作流有用的代理框架和工具。\n\n### 框架\n- [ADK-Rust](https:\u002F\u002Fgithub.com\u002Fzavora-ai\u002Fadk-rust) - 基于 Rust 的生产级 AI 代理开发工具包，采用模型无关的设计（支持 Gemini、OpenAI、Anthropic 等），支持多种代理类型（LLM、图谱、工作流），并具备 MCP 支持和内置遥测功能。\n\n### 工具\n- [Frostbyte MCP](https:\u002F\u002Fgithub.com\u002FOzorOwn\u002Ffrostbyte-mcp) - 一款 MCP 服务器，为 AI 代理提供 13 种数据工具：实时加密货币价格、IP 地理定位、DNS 查询、网页抓取转 Markdown、代码执行和截图等。只需一个 API 密钥即可使用 40 多项服务。\n- [Arch Tools](https:\u002F\u002Farchtools.dev) - 61 款适用于数据科学工作流的生产级 AI API 工具：代码分析、网页抓取、自然语言处理、图像生成、加密货币数据和搜索等。支持 REST API 和 MCP 协议。[GitHub](https:\u002F\u002Fgithub.com\u002FDeesmo\u002FArch-AI-Tools)\n\n### 研究与知识检索\n- [BGPT MCP](https:\u002F\u002Fbgpt.pro\u002Fmcp) - 一个 MCP 服务器，为 AI 代理提供基于原始实验数据构建的科学论文数据库，这些数据是从全文研究中提取的。每篇论文返回 25 余个结构化字段，包括方法、结果、样本量和质量评分。[GitHub](https:\u002F\u002Fgithub.com\u002Fconnerlambden\u002Fbgpt-mcp)\n\n### 工作流\n**[`^        返回顶部        ^`](#awesome-data-science)**\n- [sim](https:\u002F\u002Fsim.ai) - Sim Studio 的界面轻量且直观，能够快速构建和部署与您常用工具连接的 LLM。\n\n## 学习资源\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n怎样学习数据科学呢？当然是通过实践数据科学！好吧，好吧——但这对你刚开始学习时可能帮助不大。在这一部分，我们按投入程度由低到高的顺序列出了几种学习资源：[教程](#tutorials)、[大规模开放在线课程 (MOOCs)](#moocs)、[强化课程](#intensive-programs) 和 [高校](#colleges)。\n\n### 教程\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n- [1000个数据科学项目](https:\u002F\u002Fcloud.blobcity.com\u002F#\u002Fps\u002Fexplore)，你可以在浏览器中使用IPython运行。\n- [#tidytuesday](https:\u002F\u002Fgithub.com\u002Frfordatascience\u002Ftidytuesday) - 一个面向R生态系统的每周数据项目。\n- [以你的方式进行数据科学](https:\u002F\u002Fgithub.com\u002Fjadianes\u002Fdata-science-your-way)\n- [DataCamp速查表](https:\u002F\u002Fwww.datacamp.com\u002Fcheat-sheet) 数据科学速查表。\n- [PySpark速查表](https:\u002F\u002Fgithub.com\u002Fkevinschaich\u002Fpyspark-cheatsheet)\n- [使用Python的机器学习、数据科学和深度学习](https:\u002F\u002Fwww.manning.com\u002Flivevideo\u002Fmachine-learning-data-science-and-deep-learning-with-python)\n- [潜狄利克雷分配指南](https:\u002F\u002Fmedium.com\u002F@lettier\u002Fhow-does-lda-work-ill-explain-using-emoji-108abf40fa7d)\n- [克林顿·谢泼德所著《Python遗传算法》一书中的源代码教程](https:\u002F\u002Fgithub.com\u002Fhandcraftsman\u002FGeneticAlgorithmsWithPython)\n- [用于机器学习信号处理的入门教程](https:\u002F\u002Fgithub.com\u002Fjinglescode\u002Fpython-signal-processing)\n- [实时部署](https:\u002F\u002Fwww.microprediction.com\u002Fpython-1) Python时间序列模型部署教程。\n- [数据科学中的Python：初学者指南](https:\u002F\u002Flearntocodewith.me\u002Fposts\u002Fpython-for-data-science\u002F)\n- [机器学习面试的最小可行学习计划](https:\u002F\u002Fgithub.com\u002Fkhangich\u002Fmachine-learning-interview)\n- [通过构建扎实的项目来理解并掌握机器学习工程](http:\u002F\u002Fmlzoomcamp.com\u002F)\n- [12个免费的数据科学项目，用于练习Python和Pandas](https:\u002F\u002Fwww.datawars.io\u002Farticles\u002F12-free-data-science-projects-to-practice-python-and-pandas)\n- [数据科学新手的最佳简历](https:\u002F\u002Fenhancv.com\u002Fresume-examples\u002Fdata-scientist\u002F)\n- [用Java理解数据科学课程](https:\u002F\u002Fwww.alter-solutions.com\u002Farticles\u002Fjava-data-science)\n- [数据分析面试题（从初级到高级）](https:\u002F\u002Fwww.appliedaicourse.com\u002Fblog\u002Fdata-analytics-interview-questions\u002F)\n- [顶级100+数据科学面试题及答案](https:\u002F\u002Fwww.appliedaicourse.com\u002Fblog\u002Fdata-science-interview-questions\u002F)\n- [DataDriven - SQL、Python和数据建模面试题](https:\u002F\u002Fwww.datadriven.io\u002F)\n\n### 免费课程\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n- [使用R的数据科学家](https:\u002F\u002Fwww.datacamp.com\u002Ftracks\u002Fdata-scientist-with-r)\n- [使用Python的数据科学家](https:\u002F\u002Fwww.datacamp.com\u002Ftracks\u002Fdata-scientist-with-python)\n- [遗传算法OCW课程](https:\u002F\u002Focw.mit.edu\u002Fcourses\u002Felectrical-engineering-and-computer-science\u002F6-034-artificial-intelligence-fall-2010\u002Flecture-videos\u002Flecture-1-introduction-and-scope\u002F)\n- [AI专家路线图](https:\u002F\u002Fgithub.com\u002FAMAI-GmbH\u002FAI-Expert-Roadmap) - 成为人工智能专家的路线图\n- [凸优化](https:\u002F\u002Fwww.edx.org\u002Fcourse\u002Fconvex-optimization) - 凸优化（凸分析基础；最小二乘法、线性和二次规划、半定规划、极小极大问题、极值体积等问题；最优性条件、对偶理论等）\n- [从数据中学习](https:\u002F\u002Fhome.work.caltech.edu\u002Ftelecourse.html) - 介绍机器学习，涵盖基本理论、算法和应用\n- [Kaggle](https:\u002F\u002Fwww.kaggle.com\u002Flearn) - 学习数据科学、机器学习、Python等\n- [ML可观测性基础](https:\u002F\u002Farize.com\u002Fml-observability-fundamentals\u002F) - 学习如何监控并定位生产环境中机器学习问题的根本原因。\n- [Weights & Biases高效MLOps：模型开发](https:\u002F\u002Fwww.wandb.courses\u002Fcourses\u002Feffective-mlops-model-development) - 使用W&B构建端到端机器学习系统的免费课程和认证\n- [Scaler提供的数据科学Python课程](https:\u002F\u002Fwww.scaler.com\u002Ftopics\u002Fcourse\u002Fpython-for-data-science\u002F) - 本课程旨在帮助初学者掌握在当今数据驱动世界中脱颖而出所需的关键技能。全面的课程将为你提供统计学、编程、数据可视化和机器学习的坚实基础。\n- [MLSys-NYU-2022](https:\u002F\u002Fgithub.com\u002Fjacopotagliabue\u002FMLSys-NYU-2022\u002Ftree\u002Fmain) - 纽约大学坦登学院2022年金融机器学习课程的幻灯片、脚本和资料。\n- [动手训练和部署机器学习](https:\u002F\u002Fgithub.com\u002FPaulescu\u002Fhands-on-train-and-deploy-ml) - 一门动手实践课程，用于训练和部署一个预测加密货币价格的无服务器API。\n- [LLMOps：使用大型语言模型构建真实世界的应用程序](https:\u002F\u002Fwww.comet.com\u002Fsite\u002Fllm-course\u002F) - 学习如何使用该领域最新的工具和技术，利用LLM构建现代软件。\n- [视觉模型提示工程](https:\u002F\u002Fwww.deeplearning.ai\u002Fshort-courses\u002Fprompt-engineering-for-vision-models\u002F) - 在这门来自DeepLearning.AI的免费课程中，学习如何使用自然语言、坐标点、边界框、分割掩码，甚至其他图像来提示最先进的计算机视觉模型。\n- [IBM数据科学课程](https:\u002F\u002Fskillsbuild.org\u002Fstudents\u002Fcourse-catalog\u002Fdata-science) - 免费资源，了解什么是数据科学以及它在不同行业中的应用。\n\n### MOOC课程\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n- [Coursera 数据科学导论](https:\u002F\u002Fwww.coursera.org\u002Fspecializations\u002Fdata-science)\n- [数据科学——9步课程，Coursera上的专项课程](https:\u002F\u002Fwww.coursera.org\u002Fspecializations\u002Fjhu-data-science)\n- [数据挖掘——5步课程，Coursera上的专项课程](https:\u002F\u002Fwww.coursera.org\u002Fspecializations\u002Fdata-mining)\n- [机器学习——5步课程，Coursera上的专项课程](https:\u002F\u002Fwww.coursera.org\u002Fspecializations\u002Fmachine-learning)\n- [CS 109 数据科学](https:\u002F\u002Fcs109.github.io\u002F2015\u002F)\n- [OpenIntro](https:\u002F\u002Fwww.openintro.org\u002F)\n- [CS 171 可视化](https:\u002F\u002Fwww.cs171.org\u002F#!index.md)\n- [流程挖掘：数据科学实战](https:\u002F\u002Fwww.coursera.org\u002Flearn\u002Fprocess-mining)\n- [牛津深度学习](https:\u002F\u002Fwww.cs.ox.ac.uk\u002Fprojects\u002FDeepLearn\u002F)\n- [牛津深度学习——视频](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PLE6Wd9FR--EfW8dtjAuPoTuPcqmOV53Fu)\n- [牛津机器学习](https:\u002F\u002Fwww.cs.ox.ac.uk\u002Fresearch\u002Fai_ml\u002Findex.html)\n- [UBC 机器学习——视频](https:\u002F\u002Fwww.cs.ubc.ca\u002F~nando\u002F540-2013\u002Flectures.html)\n- [数据科学专项课程](https:\u002F\u002Fgithub.com\u002FDataScienceSpecialization\u002Fcourses)\n- [Coursera 大数据专项课程](https:\u002F\u002Fwww.coursera.org\u002Fspecializations\u002Fbig-data)\n- [Edx 的“面向数据科学与分析的统计思维”](https:\u002F\u002Fwww.edx.org\u002Fcourse\u002Fstatistical-thinking-for-data-science-and-analytic)\n- [IBM 认知课堂人工智能](https:\u002F\u002Fcognitiveclass.ai\u002F)\n- [Udacity——深度学习](https:\u002F\u002Fwww.udacity.com\u002Fcourse\u002Fintro-to-tensorflow-for-deep-learning--ud187)\n- [Keras实战](https:\u002F\u002Fwww.manning.com\u002Flivevideo\u002Fkeras-in-motion)\n- [微软数据科学专业项目](https:\u002F\u002Facademy.microsoft.com\u002Fen-us\u002Fprofessional-program\u002Ftracks\u002Fdata-science\u002F)\n- [COMP3222\u002FCOMP6246——机器学习技术](https:\u002F\u002Ftdgunes.com\u002FCOMP6246-2019Fall\u002F)\n- [CS 231——用于视觉识别的卷积神经网络](https:\u002F\u002Fcs231n.github.io\u002F)\n- [Coursera TensorFlow实战](https:\u002F\u002Fwww.coursera.org\u002Fprofessional-certificates\u002Ftensorflow-in-practice)\n- [Coursera 深度学习专项课程](https:\u002F\u002Fwww.coursera.org\u002Fspecializations\u002Fdeep-learning)\n- [365 数据科学课程](https:\u002F\u002F365datascience.com\u002F)\n- [Coursera 自然语言处理专项课程](https:\u002F\u002Fwww.coursera.org\u002Fspecializations\u002Fnatural-language-processing)\n- [Coursera GAN专项课程](https:\u002F\u002Fwww.coursera.org\u002Fspecializations\u002Fgenerative-adversarial-networks-gans)\n- [Codecademy 数据科学](https:\u002F\u002Fwww.codecademy.com\u002Flearn\u002Fpaths\u002Fdata-science)\n- [线性代数](https:\u002F\u002Focw.mit.edu\u002Fcourses\u002F18-06sc-linear-algebra-fall-2011\u002F)——吉尔伯特·斯特兰格的线性代数课程\n- [2020年线性代数展望（G. 斯特兰格）](https:\u002F\u002Focw.mit.edu\u002Fresources\u002Fres-18-010-a-2020-vision-of-linear-algebra-spring-2020\u002F)\n- [数据科学Python基础课程](https:\u002F\u002Fintellipaat.com\u002Facademy\u002Fcourse\u002Fpython-for-data-science-free-training\u002F)\n- [数据科学：统计学与机器学习](https:\u002F\u002Fwww.coursera.org\u002Fspecializations\u002Fdata-science-statistics-machine-learning)\n- [面向生产的机器学习工程（MLOps）](https:\u002F\u002Fwww.coursera.org\u002Fspecializations\u002Fmachine-learning-engineering-for-production-mlops)\n- [明尼苏达大学推荐系统专项课程](https:\u002F\u002Fwww.coursera.org\u002Fspecializations\u002Frecommender-systems)，这是一项专注于推荐系统的中级\u002F高级专项课程。\n- [斯坦福人工智能专业项目](https:\u002F\u002Fonline.stanford.edu\u002Fprograms\u002Fartificial-intelligence-professional-program)\n- [使用Python的数据科学家](https:\u002F\u002Fapp.datacamp.com\u002Flearn\u002Fcareer-tracks\u002Fdata-scientist-with-python)\n- [Julia编程](https:\u002F\u002Fwww.udemy.com\u002Fcourse\u002Fprogramming-with-julia\u002F)\n- [Scaler 数据科学与机器学习项目](https:\u002F\u002Fwww.scaler.com\u002Fdata-science-course\u002F)\n- [数据科学技能树](https:\u002F\u002Flabex.io\u002Fskilltrees\u002Fdata-science)\n- [面向初学者的数据科学——与AI导师一起学习](https:\u002F\u002Fcodekidz.ai\u002Flesson-intro\u002Fdata-science-368dbf)\n- [面向初学者的机器学习——与AI导师一起学习](https:\u002F\u002Fcodekidz.ai\u002Flesson-intro\u002Fmachine-lear-36abfb)\n- [数据科学导论](https:\u002F\u002Fwww.mygreatlearning.com\u002Facademy\u002Flearn-for-free\u002Fcourses\u002Fintroduction-to-data-science)\n- [数据科学Python入门](https:\u002F\u002Fwww.codecademy.com\u002Flearn\u002Fgetting-started-with-python-for-data-science)\n- [谷歌高级数据分析证书](https:\u002F\u002Fgrow.google\u002Fdata-analytics\u002F)——涵盖数据分析、统计学和机器学习基础的专业课程。\n- [机器语言使用分析——语料库语言学基础](https:\u002F\u002Fwww.twillo.de\u002Fedu-sharing\u002Fcomponents\u002Fcollections?id=e6ce03ae-4660-49b0-be10-dcc92e71e796)——由北莱茵-威斯特法伦州资助的德语文本挖掘\u002F语料库语言学课程材料\n- [为德语研究者准备的编程](https:\u002F\u002Fwww.twillo.de\u002Fedu-sharing\u002Fcomponents\u002Fcollections?id=16bac749-f10e-483f-9020-5d6365b4e092)——由北莱茵-威斯特法伦州资助的面向数字人文领域的Python编程课程材料\n\n### 集中培训项目\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n- [S2DS](https:\u002F\u002Fwww.s2ds.org\u002F)\n- [WorldQuant University 应用数据科学实验室](https:\u002F\u002Fwww.wqu.edu\u002Fadsl)\n\n### 大学\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n- [提供数据科学学位的高校列表。](https:\u002F\u002Fgithub.com\u002Fryanswanstrom\u002Fawesome-datascience-colleges)\n- [伯克利大学数据科学学位项目](https:\u002F\u002Fischoolonline.berkeley.edu\u002Fdata-science\u002F)\n- [弗吉尼亚大学数据科学学位项目](https:\u002F\u002Fdatascience.virginia.edu\u002F)\n- [威斯康星大学数据科学学位项目](https:\u002F\u002Fdatasciencedegree.wisconsin.edu\u002F)\n- [数据科学与应用理学学士学位](https:\u002F\u002Fstudy.iitm.ac.in\u002Fds\u002F)\n- [波士顿大学计算机信息系统硕士项目](https:\u002F\u002Fwww.bu.edu\u002Fonline\u002Fprograms\u002Fgraduate-programs\u002Fcomputer-information-systems-masters-degree\u002F)\n- [亚利桑那州立大学在线商业分析硕士项目](https:\u002F\u002Fasuonline.asu.edu\u002Fonline-degree-programs\u002Fgraduate\u002Fmaster-science-business-analytics\u002F)\n- [雪城大学应用数据科学硕士项目](https:\u002F\u002Fischool.syr.edu\u002Facademics\u002Fapplied-data-science-masters-degree\u002F)\n- [洛芬纳大学管理与数据科学硕士学位](https:\u002F\u002Fwww.leuphana.de\u002Fen\u002Fgraduate-school\u002Fmasters-programmes\u002Fmanagement-data-science.html)\n- [墨尔本大学数据科学硕士学位](https:\u002F\u002Fstudy.unimelb.edu.au\u002Ffind\u002Fcourses\u002Fgraduate\u002Fmaster-of-data-science\u002F#overview)\n- [爱丁堡大学数据科学理学硕士项目](https:\u002F\u002Fwww.ed.ac.uk\u002Fstudying\u002Fpostgraduate\u002Fdegrees\u002Findex.php?r=site\u002Fview&id=902)\n- [女王大学管理分析硕士学位](https:\u002F\u002Fsmith.queensu.ca\u002Fgrad_studies\u002Fmma\u002Findex.php)\n- [伊利诺伊理工学院应用数据科学硕士学位](https:\u002F\u002Fwww.iit.edu\u002Facademics\u002Fprograms\u002Fdata-science-mas)\n- [密歇根大学应用数据科学硕士学位](https:\u002F\u002Fwww.si.umich.edu\u002Fprograms\u002Fmaster-applied-data-science)\n- [埃因霍温理工大学数据科学与人工智能硕士学位](https:\u002F\u002Fwww.tue.nl\u002Fen\u002Feducation\u002Fgraduate-school\u002Fmaster-data-science-and-artificial-intelligence\u002F)\n- [格拉纳达大学数据科学与计算机工程硕士学位](https:\u002F\u002Fmasteres.ugr.es\u002Fdatcom\u002F)\n\n## 数据科学工具箱\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n本节收集了数据科学领域中常用的软件包、工具、算法及其他实用资源。\n\n### 算法\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n以下是一些机器学习和数据挖掘算法及模型，可帮助您理解数据并从中提取有意义的信息。\n\n#### 三种类型的机器学习系统\n\n- 基于人类监督的训练\n- 基于增量式在线学习\n- 基于数据点比较与模式识别\n\n### 比较\n- [datacompy](https:\u002F\u002Fgithub.com\u002Fcapitalone\u002Fdatacompy) - DataComPy 是一个用于比较两个 Pandas DataFrame 的工具包。\n\n#### 监督学习\n\n- [回归](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FRegression)\n- [线性回归](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FLinear_regression)\n- [普通最小二乘法](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FOrdinary_least_squares)\n- [逻辑回归](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FLogistic_regression)\n- [逐步回归](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FStepwise_regression)\n- [多元自适应回归样条](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FMultivariate_adaptive_regression_spline)\n- [Softmax 回归](https:\u002F\u002Fd2l.ai\u002Fchapter_linear-classification\u002Fsoftmax-regression.html)\n- [局部加权散点平滑](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FLocal_regression)\n- 分类\n  - [k 最近邻](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FK-nearest_neighbors_algorithm)\n  - [支持向量机](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FSupport_vector_machine)\n  - [决策树](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FDecision_tree)\n  - [ID3 算法](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FID3_algorithm)\n  - [C4.5 算法](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FC4.5_algorithm)\n- [集成学习](https:\u002F\u002Fscikit-learn.org\u002Fstable\u002Fmodules\u002Fensemble.html)\n  - [提升方法](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FBoosting_(machine_learning))\n  - [堆叠](https:\u002F\u002Fmachinelearningmastery.com\u002Fstacking-ensemble-machine-learning-with-python)\n  - [自助聚合](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FBootstrap_aggregating)\n  - [随机森林](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FRandom_forest)\n  - [AdaBoost](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FAdaBoost)\n\n#### 无监督学习\n- [聚类](https:\u002F\u002Fscikit-learn.org\u002Fstable\u002Fmodules\u002Fclustering.html#clustering)\n  - [层次聚类](https:\u002F\u002Fscikit-learn.org\u002Fstable\u002Fmodules\u002Fclustering.html#hierarchical-clustering)\n  - [k 均值](https:\u002F\u002Fscikit-learn.org\u002Fstable\u002Fmodules\u002Fclustering.html#k-means)\n  - [基于密度的聚类](https:\u002F\u002Fscikit-learn.org\u002Fstable\u002Fmodules\u002Fclustering.html#dbscan)\n  - [模糊聚类](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FFuzzy_clustering)\n  - [混合模型](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FMixture_model)\n- [降维](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FDimensionality_reduction)\n  - [主成分分析 (PCA)](https:\u002F\u002Fscikit-learn.org\u002Fstable\u002Fmodules\u002Fdecomposition.html#principal-component-analysis-pca)\n  - [t-SNE; t 分布随机邻域嵌入](https:\u002F\u002Fscikit-learn.org\u002Fstable\u002Fmodules\u002Fmanifold.html#t-distributed-stochastic-neighbor-embedding-tsne)\n  - [因子分析](https:\u002F\u002Fscikit-learn.org\u002Fstable\u002Fmodules\u002Fdecomposition.html#factor-analysis)\n  - [隐含狄利克雷分布 (LDA)](https:\u002F\u002Fscikit-learn.org\u002Fstable\u002Fmodules\u002Fdecomposition.html#latent-dirichlet-allocation-lda)\n- [神经网络](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FNeural_network)\n- [自组织映射](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FSelf-organizing_map)\n- [适应性共振理论](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FAdaptive_resonance_theory)\n- [隐马尔可夫模型 (HMM)](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FHidden_Markov_model)\n\n#### 半监督学习\n\n- S3VM\n- [聚类](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FWeak_supervision#Cluster_assumption)\n- [生成模型](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FWeak_supervision#Generative_models)\n- [低密度分离](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FWeak_supervision#Low-density_separation)\n- [拉普拉斯正则化](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FWeak_supervision#Laplacian_regularization)\n- [启发式方法](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FWeak_supervision#Heuristic_approaches)\n\n#### 强化学习\n\n- [Q 学习](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FQ-learning)\n- [SARSA (状态-动作-奖励-状态-动作) 算法](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FState%E2%80%93action%E2%80%93reward%E2%80%93state%E2%80%93action)\n- [时序差分学习](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FTemporal_difference_learning#:~:text=Temporal%20difference%20(TD)%20learning%20refers,estimate%20of%20the%20value%20function.)\n\n#### 数据挖掘算法\n\n- [C4.5](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FC4.5_algorithm)\n- [k-Means](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FK-means_clustering)\n- [SVM (支持向量机)](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FSupport_vector_machine)\n- [Apriori](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FApriori_algorithm)\n- [EM (期望-最大化)](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FExpectation%E2%80%93maximization_algorithm)\n- [PageRank](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FPageRank)\n- [AdaBoost](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FAdaBoost)\n- [KNN (K 最近邻)](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FK-nearest_neighbors_algorithm)\n- [朴素贝叶斯](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FNaive_Bayes_classifier)\n- [CART (分类与回归树)](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FDecision_tree_learning)\n\n#### 现代数据挖掘算法\n\n- [XGBoost (极端梯度提升)](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FXGBoost)\n- [LightGBM (轻量级梯度提升机)](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FLightGBM)\n- [CatBoost](https:\u002F\u002Fcatboost.ai\u002F)\n- [HDBSCAN (基于密度的层次化空间聚类，适用于噪声数据)](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FDBSCAN#HDBSCAN)\n- [FP-Growth (频繁模式增长算法)](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FAssociation_rule_learning#FP-growth_algorithm)\n- [孤立森林](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FIsolation_forest)\n- [深度嵌入聚类 (DEC)](https:\u002F\u002Farxiv.org\u002Fabs\u002F1511.06335)\n- [TPU (Top-k 周期性和高实用价值模式)](https:\u002F\u002Farxiv.org\u002Fabs\u002F2509.15732)\n- [上下文感知规则挖掘 (基于 Transformer 的框架)](https:\u002F\u002Farxiv.org\u002Fabs\u002F2503.11125)\n\n\n#### 深度学习架构\n\n- [多层感知器](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FMultilayer_perceptron)\n- [卷积神经网络 (CNN)](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FConvolutional_neural_network)\n- [循环神经网络 (RNN)](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FRecurrent_neural_network)\n- [玻尔兹曼机](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FBoltzmann_machine)\n- [自编码器](https:\u002F\u002Fwww.tensorflow.org\u002Ftutorials\u002Fgenerative\u002Fautoencoder)\n- [生成对抗网络 (GAN)](https:\u002F\u002Fdevelopers.google.com\u002Fmachine-learning\u002Fgan\u002Fgan_structure)\n- [自组织映射](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FSelf-organizing_map)\n- [Transformer](https:\u002F\u002Fwww.tensorflow.org\u002Ftext\u002Ftutorials\u002Ftransformer)\n- [条件随机场 (CRF)](https:\u002F\u002Ftowardsdatascience.com\u002Fconditional-random-fields-explained-e5b8256da776)\n- [机器学习系统设计](https:\u002F\u002Fwww.evidentlyai.com\u002Fml-system-design)\n\n### 通用机器学习工具包\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n* [scikit-learn](https:\u002F\u002Fscikit-learn.org\u002F)\n* [scikit-multilearn](https:\u002F\u002Fgithub.com\u002Fscikit-multilearn\u002Fscikit-multilearn)\n* [sklearn-expertsys](https:\u002F\u002Fgithub.com\u002Ftmadl\u002Fsklearn-expertsys)\n* [scikit-feature](https:\u002F\u002Fgithub.com\u002Fjundongl\u002Fscikit-feature)\n* [scikit-rebate](https:\u002F\u002Fgithub.com\u002FEpistasisLab\u002Fscikit-rebate)\n* [seqlearn](https:\u002F\u002Fgithub.com\u002Flarsmans\u002Fseqlearn)\n* [sklearn-bayes](https:\u002F\u002Fgithub.com\u002FAmazaspShumik\u002Fsklearn-bayes)\n* [sklearn-crfsuite](https:\u002F\u002Fgithub.com\u002FTeamHG-Memex\u002Fsklearn-crfsuite)\n* [sklearn-deap](https:\u002F\u002Fgithub.com\u002Frsteca\u002Fsklearn-deap)\n* [sigopt_sklearn](https:\u002F\u002Fgithub.com\u002Fsigopt\u002Fsigopt-sklearn)\n* [sklearn-evaluation](https:\u002F\u002Fgithub.com\u002Fedublancas\u002Fsklearn-evaluation)\n* [scikit-image](https:\u002F\u002Fgithub.com\u002Fscikit-image\u002Fscikit-image)\n* [scikit-opt](https:\u002F\u002Fgithub.com\u002Fguofei9987\u002Fscikit-opt)\n* [scikit-posthocs](https:\u002F\u002Fgithub.com\u002Fmaximtrp\u002Fscikit-posthocs)\n* [feature-engine](https:\u002F\u002Ffeature-engine.trainindata.com\u002F)\n* [pystruct](https:\u002F\u002Fgithub.com\u002Fpystruct\u002Fpystruct)\n* [Shogun](https:\u002F\u002Fwww.shogun-toolbox.org\u002F)\n* [xLearn](https:\u002F\u002Fgithub.com\u002Faksnzhy\u002Fxlearn)\n* [cuML](https:\u002F\u002Fgithub.com\u002Frapidsai\u002Fcuml)\n* [causalml](https:\u002F\u002Fgithub.com\u002Fuber\u002Fcausalml)\n* [mlpack](https:\u002F\u002Fgithub.com\u002Fmlpack\u002Fmlpack)\n* [MLxtend](https:\u002F\u002Fgithub.com\u002Frasbt\u002Fmlxtend)\n* [modAL](https:\u002F\u002Fgithub.com\u002FmodAL-python\u002FmodAL)\n* [Sparkit-learn](https:\u002F\u002Fgithub.com\u002Flensacom\u002Fsparkit-learn)\n* [hyperlearn](https:\u002F\u002Fgithub.com\u002Fdanielhanchen\u002Fhyperlearn)\n* [dlib](https:\u002F\u002Fgithub.com\u002Fdavisking\u002Fdlib)\n* [imodels](https:\u002F\u002Fgithub.com\u002Fcsinva\u002Fimodels)\n* [jSciPy](https:\u002F\u002Fgithub.com\u002Fhissain\u002Fjscipy) - SciPy信号处理模块的Java移植版，提供滤波器、变换及其他科学计算工具。\n* [RuleFit](https:\u002F\u002Fgithub.com\u002FchristophM\u002Frulefit)\n* [pyGAM](https:\u002F\u002Fgithub.com\u002Fdswah\u002FpyGAM)\n* [Deepchecks](https:\u002F\u002Fgithub.com\u002Fdeepchecks\u002Fdeepchecks)\n* [scikit-survival](https:\u002F\u002Fscikit-survival.readthedocs.io\u002Fen\u002Fstable)\n* [interpretable](https:\u002F\u002Fpypi.org\u002Fproject\u002Finterpretable)\n* [XGBoost](https:\u002F\u002Fgithub.com\u002Fdmlc\u002Fxgboost)\n* [LightGBM](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FLightGBM)\n* [CatBoost](https:\u002F\u002Fgithub.com\u002Fcatboost\u002Fcatboost)\n* [PerpetualBooster](https:\u002F\u002Fgithub.com\u002Fperpetual-ml\u002Fperpetual)\n* [JAX](https:\u002F\u002Fgithub.com\u002Fgoogle\u002Fjax)\n\n\n\n### 深度学习工具包\n\n#### PyTorch 生态系统\n* [PyTorch](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Fpytorch)\n* [torchvision](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Fvision)\n* [torchtext](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Ftext)\n* [torchaudio](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Faudio)\n* [ignite](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Fignite)\n* [PyTorchNet](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Ftnt)\n* [PyToune](https:\u002F\u002Fgithub.com\u002FGRAAL-Research\u002Fpoutyne)\n* [skorch](https:\u002F\u002Fgithub.com\u002Fskorch-dev\u002Fskorch)\n* [PyVarInf](https:\u002F\u002Fgithub.com\u002Fctallec\u002Fpyvarinf)\n* [pytorch_geometric](https:\u002F\u002Fgithub.com\u002Fpyg-team\u002Fpytorch_geometric)\n* [GPyTorch](https:\u002F\u002Fgithub.com\u002Fcornellius-gp\u002Fgpytorch)\n* [pyro](https:\u002F\u002Fgithub.com\u002Fpyro-ppl\u002Fpyro)\n* [Catalyst](https:\u002F\u002Fgithub.com\u002Fcatalyst-team\u002Fcatalyst)\n* [pytorch_tabular](https:\u002F\u002Fgithub.com\u002Fmanujosephv\u002Fpytorch_tabular)\n* [Yolov3](https:\u002F\u002Fgithub.com\u002Fultralytics\u002Fyolov3)\n* [Yolov5](https:\u002F\u002Fgithub.com\u002Fultralytics\u002Fyolov5)\n* [Yolov8](https:\u002F\u002Fgithub.com\u002Fultralytics\u002Fultralytics)\n\n#### TensorFlow 生态系统\n* [TensorFlow](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Ftensorflow)\n* [TensorLayer](https:\u002F\u002Fgithub.com\u002Ftensorlayer\u002FTensorLayer)\n* [TFLearn](https:\u002F\u002Fgithub.com\u002Ftflearn\u002Ftflearn)\n* [Sonnet](https:\u002F\u002Fgithub.com\u002Fdeepmind\u002Fsonnet)\n* [tensorpack](https:\u002F\u002Fgithub.com\u002Ftensorpack\u002Ftensorpack)\n* [TRFL](https:\u002F\u002Fgithub.com\u002Fdeepmind\u002Ftrfl)\n* [Polyaxon](https:\u002F\u002Fgithub.com\u002Fpolyaxon\u002Fpolyaxon)\n* [NeuPy](https:\u002F\u002Fgithub.com\u002Fitdxer\u002Fneupy)\n* [tfdeploy](https:\u002F\u002Fgithub.com\u002Friga\u002Ftfdeploy)\n* [tensorflow-upstream](https:\u002F\u002Fgithub.com\u002FROCmSoftwarePlatform\u002Ftensorflow-upstream)\n* [TensorFlow Fold](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Ffold)\n* [tensorlm](https:\u002F\u002Fgithub.com\u002Fbatzner\u002Ftensorlm)\n* [TensorLight](https:\u002F\u002Fgithub.com\u002Fbsautermeister\u002Ftensorlight)\n* [Mesh TensorFlow](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Fmesh)\n* [Ludwig](https:\u002F\u002Fgithub.com\u002Fludwig-ai\u002Fludwig)\n* [TF-Agents](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Fagents)\n* [TensorForce](https:\u002F\u002Fgithub.com\u002Ftensorforce\u002Ftensorforce)\n\n#### Keras 生态系统\n\n* [Keras](https:\u002F\u002Fkeras.io)\n* [keras-contrib](https:\u002F\u002Fgithub.com\u002Fkeras-team\u002Fkeras-contrib)\n* [Hyperas](https:\u002F\u002Fgithub.com\u002Fmaxpumperla\u002Fhyperas)\n* [Elephas](https:\u002F\u002Fgithub.com\u002Fmaxpumperla\u002Felephas)\n* [Hera](https:\u002F\u002Fgithub.com\u002Fkeplr-io\u002Fhera)\n* [Spektral](https:\u002F\u002Fgithub.com\u002Fdanielegrattarola\u002Fspektral)\n* [qkeras](https:\u002F\u002Fgithub.com\u002Fgoogle\u002Fqkeras)\n* [keras-rl](https:\u002F\u002Fgithub.com\u002Fkeras-rl\u002Fkeras-rl)\n* [Talos](https:\u002F\u002Fgithub.com\u002Fautonomio\u002Ftalos)\n\n#### 可视化工具\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n- [altair](https:\u002F\u002Faltair-viz.github.io\u002F)\n- [amcharts](https:\u002F\u002Fwww.amcharts.com\u002F)\n- [anychart](https:\u002F\u002Fwww.anychart.com\u002F)\n- [bokeh](https:\u002F\u002Fbokeh.org\u002F)\n- [Comet](https:\u002F\u002Fwww.comet.com\u002Fsite\u002Fproducts\u002Fml-experiment-tracking\u002F?utm_source=awesome-datascience)\n- [slemma](https:\u002F\u002Fslemma.com\u002F)\n- [cartodb](https:\u002F\u002Fcartodb.github.io\u002Fodyssey.js\u002F)\n- [Cube](https:\u002F\u002Fsquare.github.io\u002Fcube\u002F)\n- [d3plus](https:\u002F\u002Fd3plus.org\u002F)\n- [Data-Driven Documents(D3js)](https:\u002F\u002Fd3js.org\u002F)\n- [dygraphs](https:\u002F\u002Fdygraphs.com\u002F)\n- [exhibit](https:\u002F\u002Fwww.simile-widgets.org\u002Fexhibit\u002F)\n- [gephi](https:\u002F\u002Fgephi.org\u002F)\n- [ggplot2](https:\u002F\u002Fggplot2.tidyverse.org\u002F)\n- [Glue](http:\u002F\u002Fdocs.glueviz.org\u002Fen\u002Flatest\u002Findex.html)\n- [Google Chart Gallery](https:\u002F\u002Fdevelopers.google.com\u002Fchart\u002Finteractive\u002Fdocs\u002Fgallery)\n- [Highcharts](https:\u002F\u002Fwww.highcharts.com\u002F)\n- [import.io](https:\u002F\u002Fwww.import.io\u002F)\n- [Matplotlib](https:\u002F\u002Fmatplotlib.org\u002F)\n- [nvd3](https:\u002F\u002Fnvd3.org\u002F)\n- [Netron](https:\u002F\u002Fgithub.com\u002Flutzroeder\u002Fnetron)\n- [Openrefine](https:\u002F\u002Fopenrefine.org\u002F)\n- [plot.ly](https:\u002F\u002Fplot.ly\u002F)\n- [raw](https:\u002F\u002Frawgraphs.io)\n- [Resseract Lite](https:\u002F\u002Fgithub.com\u002Fabistarun\u002Fresseract-lite)\n- [Seaborn](https:\u002F\u002Fseaborn.pydata.org\u002F)\n- [techanjs](https:\u002F\u002Ftechanjs.org\u002F)\n- [Timeline](https:\u002F\u002Ftimeline.knightlab.com\u002F)\n- [variancecharts](https:\u002F\u002Fvariancecharts.com\u002Findex.html)\n- [vida](https:\u002F\u002Fvida.io\u002F)\n- [vizzu](https:\u002F\u002Fgithub.com\u002Fvizzuhq\u002Fvizzu-lib)\n- [Wrangler](http:\u002F\u002Fvis.stanford.edu\u002Fwrangler\u002F)\n- [r2d3](http:\u002F\u002Fwww.r2d3.us\u002Fvisual-intro-to-machine-learning-part-1\u002F)\n- [NetworkX](https:\u002F\u002Fnetworkx.org\u002F)\n- [Redash](https:\u002F\u002Fredash.io\u002F)\n- [Metabase](https:\u002F\u002Fwww.metabase.com\u002F)\n- [C3](https:\u002F\u002Fc3js.org\u002F)\n- [TensorWatch](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Ftensorwatch)\n- [geomap](https:\u002F\u002Fpypi.org\u002Fproject\u002Fgeomap\u002F)\n- [Dash](https:\u002F\u002Fplotly.com\u002Fdash\u002F)\n- [MetaReview](https:\u002F\u002Fmetareview-8c1.pages.dev\u002F) - 免费在线元分析平台，配备11种交互式D3.js统计图表（森林图、漏斗图、Galbraith图、L'Abbé图、Baujat图等），5种效应量指标，AI文献筛选功能以及可直接发表的报告导出功能。[github.com](https:\u002F\u002Fgithub.com\u002FTerryFYL\u002Fmetareview)\n\n### 其他工具\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n| 链接 | 描述 |\n| --- | --- |\n| [数据科学生命周期流程](https:\u002F\u002Fgithub.com\u002Fdslp\u002Fdslp) | 数据科学生命周期流程是一种帮助数据科学团队从想法到价值持续、可重复实现的流程。该流程在此仓库中被详细记录。 |\n| [数据科学生命周期模板仓库](https:\u002F\u002Fgithub.com\u002Fdslp\u002Fdslp-repo-template) | 数据科学生命周期项目的模板仓库。 |\n| [TabGAN](https:\u002F\u002Fgithub.com\u002FDiyago\u002FTabular-data-generation) | 使用GAN、扩散模型和LLM生成合成表格数据，并结合对抗性过滤和隐私度量。 |\n| [RexMex](https:\u002F\u002Fgithub.com\u002FAstraZeneca\u002Frexmex) | 用于公平评估的通用推荐指标库。 |\n| [ChemicalX](https:\u002F\u002Fgithub.com\u002FAstraZeneca\u002Fchemicalx) | 基于PyTorch的深度学习库，用于药物对评分。 |\n| [FileShot.io](https:\u002F\u002Fgithub.com\u002FFileShot\u002FFileShotZKE) | 安全的零知识加密文件共享（浏览器端AES-256-GCM）。无需注册，MIT许可，可自行部署，支持可选链接过期功能。 |\n| [CorpusExplorer](http:\u002F\u002Fcorpusexplorer.de\u002F) | 面向语料库语言学家和文本\u002F数据挖掘爱好者的软件。可构建超过60种语言的语料库，并使用50多种工具和可视化方法。 |\n| [PyTorch Geometric Temporal](https:\u002F\u002Fgithub.com\u002Fbenedekrozemberczki\u002Fpytorch_geometric_temporal) | 动态图上的表示学习。 |\n| [Little Ball of Fur](https:\u002F\u002Fgithub.com\u002Fbenedekrozemberczki\u002Flittleballoffur) | 一个具有Scikit-Learn风格API的NetworkX图采样库。 |\n| [Karate Club](https:\u002F\u002Fgithub.com\u002Fbenedekrozemberczki\u002Fkarateclub) | 一个具有Scikit-Learn风格API的NetworkX无监督机器学习扩展库。 |\n| [ML Workspace](https:\u002F\u002Fgithub.com\u002Fml-tooling\u002Fml-workspace) | 一体化的基于Web的机器学习和数据科学IDE。该工作区以Docker容器形式部署，预装了多种流行的数据科学库（如TensorFlow、PyTorch）和开发工具（如Jupyter、VS Code）。 |\n| [xonsh shell](https:\u002F\u002Fgithub.com\u002Fxonsh\u002Fxonsh) | 一款由Python驱动的Shell，能够集成、管理和编排主要用Python编写的数据科学库，从而构建流水线、代码和基于命令的工作流。它也可以用作Jupyter Notebook的内核。 |\n| [Neptune.ai](https:\u002F\u002Fneptune.ai) | 一个社区友好的平台，支持数据科学家创建和分享机器学习模型。Neptune促进团队协作、基础设施管理、模型比较和可重复性。 |\n| [steppy](https:\u002F\u002Fgithub.com\u002Fminerva-ml\u002Fsteppy) | 轻量级的Python库，用于快速且可重复的机器学习实验。提供简洁的接口，便于设计清晰的机器学习流水线。 |\n| [steppy-toolkit](https:\u002F\u002Fgithub.com\u002Fminerva-ml\u002Fsteppy-toolkit) | 精选的神经网络、变换器和模型集合，使您的机器学习工作更高效、更有效。 |\n| [Google Datalab](https:\u002F\u002Fcloud.google.com\u002Fdatalab\u002Fdocs\u002F) | 使用熟悉的语言（如Python和SQL）交互式地轻松探索、可视化、分析和转换数据。 |\n| [Hortonworks Sandbox](https:\u002F\u002Fwww.cloudera.com\u002Fdownloads\u002Fhortonworks-sandbox.html) | 一个个人化的便携式Hadoop环境，附带十几个交互式Hadoop教程。 |\n| [R](https:\u002F\u002Fwww.r-project.org\u002F) | 一个用于统计计算和图形绘制的免费软件环境。 |\n| [Tidyverse](https:\u002F\u002Fwww.tidyverse.org\u002F) | 一组专为数据科学设计的R包集合。所有包共享共同的设计理念、语法和数据结构。 |\n| [RStudio](https:\u002F\u002Fwww.rstudio.com) | R的强大用户界面——IDE。它是免费开源的，可在Windows、Mac和Linux上运行。 |\n| [Python - Pandas - Anaconda](https:\u002F\u002Fwww.anaconda.com) | 完全免费的企业级Python发行版，适用于大规模数据处理、预测分析和科学计算。 |\n| [Pandas GUI](https:\u002F\u002Fgithub.com\u002Fadrotog\u002FPandasGUI) | Pandas的GUI界面。 |\n| [Polars](https:\u002F\u002Fgithub.com\u002Fpola-rs\u002Fpolars) | 一个面向Rust和Python的快速DataFrame库，旨在作为Pandas的更快替代品。 |\n| [CiteMe](https:\u002F\u002Fciteme.app) | 基于AI的学术引用生成器。搜索11+个学术数据库（OpenAlex、PubMed、Semantic Scholar、CrossRef、SciELO），并按40+种引用格式生成参考文献。提供Web应用、浏览器扩展、Google Docs插件和公共API。 |\n| [Scikit-Learn](https:\u002F\u002Fscikit-learn.org\u002Fstable\u002F) | Python中的机器学习。 |\n| [NumPy](https:\u002F\u002Fnumpy.org\u002F) | NumPy是Python科学计算的基础。它支持大型多维数组和矩阵，并包含一系列高级数学函数来操作这些数组。 |\n| [Vaex](https:\u002F\u002Fvaex.io\u002F) | Vaex是一个Python库，允许您以高速可视化大型数据集并计算统计数据。 |\n| [SciPy](https:\u002F\u002Fscipy.org\u002F) | SciPy与NumPy数组协同工作，提供高效的数值积分和优化算法。 |\n| [数据科学工具箱](https:\u002F\u002Fwww.coursera.org\u002Flearn\u002Fdata-scientists-tools) | Coursera课程。 |\n| [数据科学工具箱](https:\u002F\u002Fdatasciencetoolbox.org\u002F) | 博客。 |\n| [Wolfram数据科学平台](https:\u002F\u002Fwww.wolfram.com\u002Fdata-science-platform\u002F) | 将数值、文本、图像、GIS或其他数据交由Wolfram处理，进行全面的数据科学分析和可视化，并自动生成丰富的交互式报告——这一切都由革命性的基于知识的Wolfram语言驱动。 |\n| [Datadog](https:\u002F\u002Fwww.datadoghq.com\u002F) | 高规模数据科学的解决方案、代码和DevOps工具。 |\n| [Variance](https:\u002F\u002Fvariancecharts.com\u002F) | 无需编写JavaScript即可构建强大的Web数据可视化。 |\n| [Kite开发工具包](http:\u002F\u002Fkitesdk.org\u002Fdocs\u002Fcurrent\u002Findex.html) | Kite软件开发工具包（Apache许可证，版本2.0），简称Kite，是一套专注于简化在Hadoop生态系统之上构建系统的库、工具、示例和文档。 |\n| [Domino Data Labs](https:\u002F\u002Fwww.dominodatalab.com) | 运行、扩展、共享和部署您的模型——无需任何基础设施或设置。 |\n| [Apache Flink](https:\u002F\u002Fflink.apache.org\u002F) | 一个高效、分布式、通用的数据处理平台。 |\n| [Apache Hama](https:\u002F\u002Fhama.apache.org\u002F) | Apache Hama是Apache顶级开源项目，允许您进行超越MapReduce的高级分析。 |\n| [Weka](https:\u002F\u002Fml.cms.waikato.ac.nz\u002Fweka\u002Findex.html) | Weka是一组用于数据挖掘任务的机器学习算法。 |\n| [Octave](https:\u002F\u002Fwww.gnu.org\u002Fsoftware\u002Foctave\u002F) | GNU Octave是一种高级解释型语言，主要用于数值计算。（免费Matlab） |\n| [Apache Spark](https:\u002F\u002Fspark.apache.org\u002F) | 极速集群计算。 |\n| [Hydrosphere Mist](https:\u002F\u002Fgithub.com\u002FHydrospheredata\u002Fmist) | 一个服务，用于将Apache Spark分析作业和机器学习模型暴露为实时、批处理或响应式Web服务。 |\n| [Data Mechanics](https:\u002F\u002Fwww.datamechanics.co) | 一个数据科学和工程平台，使Apache Spark对开发者更加友好且更具成本效益。 |\n| [Caffe](https:\u002F\u002Fcaffe.berkeleyvision.org\u002F) | 深度学习框架。 |\n| [Torch](http:\u002F\u002Ftorch.ch\u002F) | 一个用于LuaJIT的科学计算框架。 |\n| [Nervana基于Python的深度学习框架](https:\u002F\u002Fgithub.com\u002FNervanaSystems\u002Fneon) | 英特尔® Nervana™参考深度学习框架，致力于在所有硬件上实现最佳性能。 |\n| [Skale](https:\u002F\u002Fgithub.com\u002Fskale-me\u002Fskale) | 在NodeJS中进行高性能分布式数据处理。 |\n| [Aerosolve](https:\u002F\u002Fairbnb.io\u002Faerosolve\u002F) | 一个为人类设计的机器学习软件包。 |\n| [英特尔框架](https:\u002F\u002Fgithub.com\u002Fintel\u002Fidlf) | 英特尔®深度学习框架。 |\n| [Datawrapper](https:\u002F\u002Fwww.datawrapper.de\u002F) | 一个开源数据可视化平台，帮助每个人创建简单、正确且可嵌入的图表。也在[github.com](https:\u002F\u002Fgithub.com\u002Fdatawrapper\u002Fdatawrapper)上。 |\n| [Tensor Flow](https:\u002F\u002Fwww.tensorflow.org\u002F) | TensorFlow是一个用于机器智能的开源软件库。 |\n| [自然语言工具包](https:\u002F\u002Fwww.nltk.org\u002F) | 一个入门级但功能强大的自然语言处理和分类工具包。 |\n| [Annotation Lab](https:\u002F\u002Fwww.johnsnowlabs.com\u002Fannotation-lab\u002F) | 免费的端到端无代码文本标注和DL模型训练\u002F调优平台。开箱即用支持命名实体识别、分类、关系抽取和断言状态等Spark NLP模型。对用户、团队、项目和文档提供无限支持。 |\n| [nlp-toolkit for node.js](https:\u002F\u002Fwww.npmjs.com\u002Fpackage\u002Fnlp-toolkit) | 本模块涵盖一些基本的NLP原理和实现。重点在于性能。在处理NLP样本或训练数据时，我们很快就会耗尽内存。因此，本模块中的每个实现都以流式方式编写，只在当前步骤处理的数据保留在内存中。 |\n| [Julia](https:\u002F\u002Fjulialang.org) | 一种用于技术计算的高级、高性能动态编程语言。 |\n| [IJulia](https:\u002F\u002Fgithub.com\u002FJuliaLang\u002FIJulia.jl) | 一个结合了Jupyter交互式环境的Julia语言后端。 |\n| [Apache Zeppelin](https:\u002F\u002Fzeppelin.apache.org\u002F) | 一个基于Web的笔记本，支持数据驱动的交互式数据分析和协作文档，使用SQL、Scala等语言。 |\n| [Featuretools](https:\u002F\u002Fgithub.com\u002Falteryx\u002Ffeaturetools) | 一个用Python编写的开源自动特征工程框架。 |\n| [Optimus](https:\u002F\u002Fgithub.com\u002Fhi-primus\u002Foptimus) | 清洗、预处理、特征工程、探索性数据分析以及易于使用的ML，后端基于PySpark。 |\n| [Albumentations](https:\u002F\u002Fgithub.com\u002Falbumentations-team\u002Falbumentations) | 一个快速且与框架无关的图像增强库，实现了多样化的增强技术。开箱即用支持分类、分割和检测。曾用于赢得Kaggle、Topcoder以及CVPR研讨会相关的一系列深度学习竞赛。 |\n| [DVC](https:\u002F\u002Fgithub.com\u002Fiterative\u002Fdvc) | 一个开源的数据科学版本控制系统。它有助于跟踪、组织和使数据科学项目可重复。在最基本的情况下，它可以帮助版本控制和共享大型数据及模型文件。 |\n| [Lambdo](https:\u002F\u002Fgithub.com\u002Fasavinov\u002Flambdo) | 一个工作流引擎，通过将特征工程和机器学习、模型训练和预测、表格填充和列评估结合在一个分析管道中，显著简化了数据分析过程。 |\n| [Feast](https:\u002F\u002Fgithub.com\u002Ffeast-dev\u002Ffeast) | 一个用于管理、发现和访问机器学习特征的特征存储库。Feast为模型训练和模型服务提供一致的特征视图。 |\n| [Polyaxon](https:\u002F\u002Fgithub.com\u002Fpolyaxon\u002Fpolyaxon) | 一个用于可重复和可扩展的机器学习和深度学习的平台。 |\n| [UBIAI](https:\u002F\u002Fubiai.tools) | 一个易于使用的文本标注工具，适用于团队，具有最全面的自动标注功能。支持NER、关系和文档分类，以及发票标签的OCR标注。 |\n| [Trains](https:\u002F\u002Fgithub.com\u002Fallegroai\u002Fclearml) | 自动化实验管理器、版本控制和AI的DevOps工具。 |\n| [Hopsworks](https:\u002F\u002Fgithub.com\u002Flogicalclocks\u002Fhopsworks) | 一个开源的密集型数据机器学习平台，带有特征存储库。可以摄取并管理用于在线（MySQL Cluster）和离线（Apache Hive）访问的特征，在大规模上训练和部署模型。 |\n| [MindsDB](https:\u002F\u002Fgithub.com\u002Fmindsdb\u002Fmindsdb) | MindsDB是一个面向开发者的可解释AutoML框架。使用MindsDB，您可以在一行代码中构建、训练和使用最先进的ML模型。 |\n| [Lightwood](https:\u002F\u002Fgithub.com\u002Fmindsdb\u002Flightwood) | 一个基于PyTorch的框架，将机器学习问题分解为更小的模块，这些模块可以无缝拼接在一起，目标是用一行代码构建预测模型。 |\n| [AWS Data Wrangler](https:\u002F\u002Fgithub.com\u002Fawslabs\u002Faws-data-wrangler) | 一个开源的Python包，将Pandas库的功能扩展到AWS，连接DataFrames和AWS相关的数据服务（Amazon Redshift、AWS Glue、Amazon Athena、Amazon EMR等）。 |\n| [Amazon Rekognition](https:\u002F\u002Faws.amazon.com\u002Frekognition\u002F) | AWS Rekognition是一项服务，允许使用亚马逊云服务的开发人员将其应用程序添加图像分析功能。对资产进行编目、自动化工作流，并从您的媒体和应用程序中提取意义。|\n| [Amazon Textract](https:\u002F\u002Faws.amazon.com\u002Ftextract\u002F) | 自动从任何文档中提取印刷文本、手写内容和数据。 |\n| [Amazon Lookout for Vision](https:\u002F\u002Faws.amazon.com\u002Flookout-for-vision\u002F) | 使用计算机视觉检测产品缺陷，以自动化质量检验。识别缺失的产品组件、车辆和结构损坏，以及不规则之处，从而实现全面的质量控制。|\n| [Amazon CodeGuru](https:\u002F\u002Faws.amazon.com\u002Fcodeguru\u002F) | 使用ML驱动的建议自动审查代码并优化应用程序性能。|\n| [CML](https:\u002F\u002Fgithub.com\u002Fiterative\u002Fcml) | 一个开源工具包，用于在数据科学项目中使用持续集成。通过GitHub Actions和GitLab CI，在类似生产环境的环境中自动训练和测试模型，并在拉取\u002F合并请求上自动生成可视化报告。 |\n| [Dask](https:\u002F\u002Fdask.org\u002F) | 一个开源的Python库，帮助您轻松地将分析代码迁移到分布式计算系统（大数据）。 |\n| [DuckDB](https:\u002F\u002Fgithub.com\u002Fduckdb\u002Fduckdb) | 一个进程内SQL OLAP数据库管理系统。 |\n| [Statsmodels](https:\u002F\u002Fwww.statsmodels.org\u002Fstable\u002Findex.html) | 一个基于Python的推论统计、假设检验和回归框架。 |\n| [Gensim](https:\u002F\u002Fradimrehurek.com\u002Fgensim\u002F) | 一个用于自然语言文本主题建模的开源库。 |\n| [spaCy](https:\u002F\u002Fspacy.io\u002F) | 一个高性能的自然语言处理工具包。 |\n| [Grid Studio](https:\u002F\u002Fgithub.com\u002Fricklamers\u002Fgridstudio) | Grid Studio是一个基于Web的电子表格应用程序，完全集成了Python编程语言。 |\n|[Python数据科学手册](https:\u002F\u002Fgithub.com\u002Fjakevdp\u002FPythonDataScienceHandbook)|Python数据科学手册：完整文本以Jupyter Notebooks形式呈现|\n| [Shapley](https:\u002F\u002Fgithub.com\u002Fbenedekrozemberczki\u002Fshapley) | 一个数据驱动的框架，用于量化机器学习集成中分类器的价值。 |\n| [DAGsHub](https:\u002F\u002Fdagshub.com) | 一个基于开源工具构建的数据、模型和管道管理平台。 |\n| [Deepnote](https:\u002F\u002Fdeepnote.com) | 一种新型的数据科学笔记本。兼容Jupyter，支持实时协作，并在云端运行。 |\n| [Valohai](https:\u002F\u002Fvalohai.com) | 一个MLOps平台，负责机器编排、自动可重复性和部署。 |\n| [PyMC3](https:\u002F\u002Fdocs.pymc.io\u002F) | 一个用于概率编程的Python库（贝叶斯推断和机器学习）。 |\n| [PyStan](https:\u002F\u002Fpypi.org\u002Fproject\u002Fpystan\u002F) | Python接口，用于Stan（贝叶斯推断和建模）。 |\n| [hmmlearn](https:\u002F\u002Fpypi.org\u002Fproject\u002Fhmmlearn\u002F) | 无监督学习和隐马尔可夫模型的推断。 |\n| [Chaos Genius](https:\u002F\u002Fgithub.com\u002Fchaos-genius\u002Fchaos_genius\u002F) | 基于ML的分析引擎，用于异常检测和根本原因分析。 |\n| [Nimblebox](https:\u002F\u002Fnimblebox.ai\u002F) | 一个全栈MLOps平台，旨在帮助全球的数据科学家和机器学习从业者通过他们的网页浏览器发现、创建和发布多云应用。 |\n| [Towhee](https:\u002F\u002Fgithub.com\u002Ftowhee-io\u002Ftowhee) | 一个Python库，帮助您将非结构化数据编码为嵌入。 |\n| [LineaPy](https:\u002F\u002Fgithub.com\u002FLineaLabs\u002Flineapy) | 曾经因为清理冗长、混乱的Jupyter笔记本而感到沮丧吗？借助LineaPy，一个开源的Python库，只需两行代码就能将混乱的开发代码转化为生产流水线。 |\n| [envd](https:\u002F\u002Fgithub.com\u002Ftensorchord\u002Fenvd) | 🏕️ 为数据科学和AI\u002FML工程团队提供的机器学习开发环境。 |\n| [探索数据科学库](https:\u002F\u002Fkandi.openweaver.com\u002Fexplore\u002Fdata-science) | 一个搜索引擎🔍工具，用于发现和查找精选的热门和新兴库、顶尖作者、热门项目工具包、讨论、教程和学习资源。 |\n| [MLEM](https:\u002F\u002Fgithub.com\u002Fiterative\u002Fmlem) | 🐶 按照GitOps原则版本化和部署您的ML模型。 |\n| [MLflow](https:\u002F\u002Fmlflow.org\u002F) | 一个MLOps框架，用于管理ML模型的整个生命周期。 |\n| [cleanlab](https:\u002F\u002Fgithub.com\u002Fcleanlab\u002Fcleanlab) | 一个Python库，专注于以数据为中心的AI，并自动检测ML数据集中的各种问题。 |\n| [AutoGluon](https:\u002F\u002Fgithub.com\u002Fawslabs\u002Fautogluon) | AutoML，轻松为图像、文本、表格、时间序列和多模态数据生成准确的预测。 |\n| [Arize AI](https:\u002F\u002Farize.com\u002F) | Arize AI社区级别的可观ility工具，用于监控生产中的机器学习模型，并找出数据质量和性能漂移等问题的根本原因。 |\n| [Aureo.io](https:\u002F\u002Faureo.io) | Aureo.io是一个低代码平台，专注于构建人工智能。它为用户提供创建流水线、自动化并与人工智能模型集成的能力——所有这些都基于他们自己的基础数据。 |\n| [ERD Lab](https:\u002F\u002Fwww.erdlab.io\u002F) | 免费的云端实体关系图（ERD）工具，专为开发者设计。 |\n| [Arize-Phoenix](https:\u002F\u002Fdocs.arize.com\u002Fphoenix) | MLOps在一个笔记本中——揭示洞察、发现问题、监控并微调您的模型。 |\n| [Comet](https:\u002F\u002Fgithub.com\u002Fcomet-ml\u002Fcomet-examples) | 一个MLOps平台，具备实验跟踪、模型生产管理、模型注册表和完整的数据 lineage，支持您的ML工作流程从训练直接到生产。 |\n| [Opik](https:\u002F\u002Fgithub.com\u002Fcomet-ml\u002Fopik) | 评估、测试和交付LLM应用程序，贯穿您的开发和生产周期。 |\n| [Synthical](https:\u002F\u002Fsynthical.com) | 基于AI的协作研究环境。在同一平台上查找相关论文、创建书目管理集合并总结内容。 |\n| [teeplot](https:\u002F\u002Fgithub.com\u002Fmmore500\u002Fteeplot) | 一个工作流工具，用于自动整理数据可视化输出。 |\n| [Streamlit](https:\u002F\u002Fgithub.com\u002Fstreamlit\u002Fstreamlit) | 一个用于机器学习和数据科学项目的应用框架。 |\n| [Gradio](https:\u002F\u002Fgithub.com\u002Fgradio-app\u002Fgradio) | 围绕机器学习模型创建可定制的UI组件。 |\n| [Weights & Biases](https:\u002F\u002Fgithub.com\u002Fwandb\u002Fwandb) | 实验跟踪、数据集版本化和模型管理。 |\n| [DVC](https:\u002F\u002Fgithub.com\u002Fiterative\u002Fdvc) | 一个开源的机器学习项目版本控制系统。 |\n| [Optuna](https:\u002F\u002Fgithub.com\u002Foptuna\u002Foptuna) | 自动超参数优化软件框架。 |\n| [Ray Tune](https:\u002F\u002Fgithub.com\u002Fray-project\u002Fray) | 可扩展的超参数调优库。 |\n| [Apache Airflow](https:\u002F\u002Fgithub.com\u002Fapache\u002Fairflow) | 一个用于以编程方式编写、调度和监控工作流的平台。 |\n| [Prefect](https:\u002F\u002Fgithub.com\u002FPrefectHQ\u002Fprefect) | 一个用于现代数据堆栈的工作流管理系统。 |\n| [Kedro](https:\u002F\u002Fgithub.com\u002Fkedro-org\u002Fkedro) | 一个开源的Python框架，用于创建可重复、可维护的数据科学代码。 |\n| [Hamilton](https:\u002F\u002Fgithub.com\u002Fdagworks-inc\u002Fhamilton) | 一个轻量级的库，用于编写和管理可靠的数据转换。 |\n| [SHAP](https:\u002F\u002Fgithub.com\u002Fslundberg\u002Fshap) | 一种博弈论方法，用于解释任何机器学习模型的输出。 |\n| [InterpretML](https:\u002F\u002Fgithub.com\u002Finterpretml\u002Finterpret) | InterpretML实现了可解释提升机（EBM），这是一种基于广义加性模型（GAMs）的现代、完全可解释的机器学习模型。这个开源包还提供了EBM、其他玻璃盒模型以及黑盒解释的可视化工具。 |\n| [LIME](https:\u002F\u002Fgithub.com\u002Fmarcotcr\u002Flime) | 解释任何机器学习分类器的预测结果。 |\n| [flyte](https:\u002F\u002Fgithub.com\u002Fflyteorg\u002Fflyte) | 一个用于机器学习的工作流自动化平台。 |\n| [dbt](https:\u002F\u002Fgithub.com\u002Fdbt-labs\u002Fdbt-core) | 一个用于数据构建的工具。 |\n| [zasper](https:\u002F\u002Fgithub.com\u002Fzasper-io\u002Fzasper) | 一个为数据科学量身定制的超级IDE。 |\n| [skrub](https:\u002F\u002Fgithub.com\u002Fskrub-data\u002Fskrub\u002F) | 一个Python库，用于简化表格型机器学习的预处理和特征工程。 |\n| [Codeflash](https:\u002F\u002Fwww.codeflash.ai\u002F) | 每次都能快速交付Python代码。 |\n| [Hugging Face](https:\u002F\u002Fhuggingface.co\u002F) | 一个流行的开源平台，用于分享ML模型、数据集以及合作开展NLP和生成式AI项目。 |\n| [Chinese-Elite](https:\u002F\u002Fgithub.com\u002Fanonym-g\u002FChinese-Elite) | 一个开源项目，利用LLM解析公开数据，自动绘制关系网络，并以交互式图的形式展示。 |\n| [Desbordante](https:\u002F\u002Fgithub.com\u002Fdesbordante\u002Fdesbordante-core\u002F) | 一个开源的数据剖析工具，专门用于发现和验证复杂模式，例如[数值关联规则](https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002FDesbordante\u002Fdesbordante-core\u002Fblob\u002Fmain\u002Fexamples\u002Fnotebooks\u002FNumerical_Association_Rules.ipynb)、[差异依赖](https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002FDesbordante\u002Fdesbordante-core\u002Fblob\u002Fmain\u002Fexamples\u002Fnotebooks\u002FDifferential_Dependencies.ipynb)、[否定约束](https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002FDesbordante\u002Fdesbordante-core\u002Fblob\u002Fmain\u002Fexamples\u002Fnotebooks\u002FDenial_Constraints.ipynb)等。 |\n| [dna-claude-analysis](https:\u002F\u002Fgithub.com\u002Fshmlkv\u002Fdna-claude-analysis) | 一个个人基因组分析工具包，使用Python脚本对原始DNA数据进行17类分析（健康风险、祖先、药效基因组学、营养、心理学等），并生成终端风格的单页HTML可视化。 |\n| [RunMat](https:\u002F\u002Fgithub.com\u002Frunmat-org\u002Frunmat) | 快速MATLAB语法运行时，具备自动CPU\u002FGPU执行和融合数组内核。 |\n| [Turbostream](https:\u002F\u002Fgithub.com\u002Fturboline-ai\u002Fturbostream) | 一个终端UI，用于试验自定义规则引擎和对实时数据流进行选择性LLM分析，无需担心流媒体基础设施或背压问题。 |\n| [WFGY ProblemMap](https:\u002F\u002Fgithub.com\u002Fonestardao\u002FWFGY\u002Fblob\u002Fmain\u002FProblemMap\u002FREADME.md) | 开源的“失败地图”，列出了LLM和RAG流水线中常见的16种问题，附有可观察的症状和针对数据科学团队的建议修复方案。 |\n| [Deploybase](https:\u002F\u002Fdeploybase.ai\u002F) | 实时追踪所有云和推理提供商的GPU和LLM价格。 |\n| [DeepAnalyze](https:\u002F\u002Fgithub.com\u002Fruc-datalab\u002FDeepAnalyze) | 一个自主型LLM，用于自主数据科学，能够在无需人工干预的情况下完成广泛的数据科学任务。 |\n\n## 文学与媒体\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n本节包含一些额外的阅读材料、值得关注的频道以及值得收听的演讲。\n\n### 书籍\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n- 《从零开始的数据科学：Python基础原理》（https:\u002F\u002Fwww.amazon.com\u002FData-Science-Scratch-Principles-Python-dp-1492041130\u002Fdp\u002F1492041130\u002Fref=dp_ob_title_bk）\n- 《Python人工智能——Tutorialspoint教程》（https:\u002F\u002Fwww.tutorialspoint.com\u002Fartificial_intelligence_with_python\u002Fartificial_intelligence_with_python_tutorial.pdf）\n- 《从零开始的机器学习》（https:\u002F\u002Fdafriedman97.github.io\u002Fmlbook\u002Fcontent\u002Fintroduction.html）\n- 《概率机器学习导论》（https:\u002F\u002Fprobml.github.io\u002Fpml-book\u002Fbook1.html）\n- 《如何在数据科学领域领导》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fhow-to-lead-in-data-science）——抢先体验版\n- 《用数据对抗用户流失》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Ffighting-churn-with-data）\n- 《使用Python和Dask的大规模数据科学》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fdata-science-with-python-and-dask）\n- 《Python数据科学手册》（https:\u002F\u002Fjakevdp.github.io\u002FPythonDataScienceHandbook\u002F）\n- 《数据科学家手册：25位杰出数据科学家的建议与洞见》（https:\u002F\u002Fwww.thedatasciencehandbook.com\u002F）\n- 《像数据科学家一样思考》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fthink-like-a-data-scientist）\n- 《数据科学入门》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fintroducing-data-science）\n- 《R语言实战数据科学》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fpractical-data-science-with-r）\n- 《日常数据科学》（https:\u002F\u002Fwww.amazon.com\u002Fdp\u002FB08TZ1MT3W\u002Fref=cm_sw_r_cp_apa_fabc_a0ceGbWECF9A8）及[更便宜的PDF版本]（https:\u002F\u002Fgum.co\u002Feverydaydata）\n- 《探索数据科学》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fexploring-data-science）——免费电子书试读版\n- 《探索数据丛林》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fexploring-the-data-jungle）——免费电子书试读版\n- 《Python经典计算机科学问题》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fclassic-computer-science-problems-in-python）\n- 《程序员数学》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fmath-for-programmers）——抢先体验版\n- 《R语言实战（第三版）》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fr-in-action-third-edition）——抢先体验版\n- 《数据科学读书营》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fdata-science-bookcamp）——抢先体验版\n- 《数据科学思维：下一场科学、技术和经济革命》（https:\u002F\u002Fwww.springer.com\u002Fgp\u002Fbook\u002F9783319950914）\n- 《应用数据科学：数据驱动型企业的经验教训》（https:\u002F\u002Fwww.springer.com\u002Fgp\u002Fbook\u002F9783030118204）\n- 《数据科学家手册》（https:\u002F\u002Fwww.amazon.com\u002FData-Science-Handbook-Field-Cady\u002Fdp\u002F1119092949）\n- 《自然语言处理 essentials》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fgetting-started-with-natural-language-processing）——抢先体验版\n- 《挖掘海量数据集》（http:\u002F\u002Fwww.mmds.org\u002F）——由在线课程配套的免费电子书\n- 《Pandas实战》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fpandas-in-action）——抢先体验版\n- 《遗传算法与遗传编程》（https:\u002F\u002Fwww.taylorfrancis.com\u002Fbooks\u002F9780429141973）\n- 《进化算法进展》（https:\u002F\u002Fwww.intechopen.com\u002Fbooks\u002Fadvances_in_evolutionary_algorithms）——免费下载\n- 《遗传编程：新方法与成功应用》（https:\u002F\u002Fwww.intechopen.com\u002Fbooks\u002Fgenetic-programming-new-approaches-and-successful-applications）——免费下载\n- 《进化算法》（https:\u002F\u002Fwww.intechopen.com\u002Fbooks\u002Fevolutionary-algorithms）——免费下载\n- 《遗传编程进展，第3卷》（http:\u002F\u002Fwww0.cs.ucl.ac.uk\u002Fstaff\u002FW.Langdon\u002Faigp3\u002F）——免费下载\n- 《遗传算法与进化计算》（https:\u002F\u002Fwww.talkorigins.org\u002Ffaqs\u002Fgenalg\u002Fgenalg.html）——免费下载\n- 《凸优化》（https:\u002F\u002Fweb.stanford.edu\u002F~boyd\u002Fcvxbook\u002Fbv_cvxbook.pdf）——斯蒂芬·博伊德的《凸优化》一书——免费下载\n- 《使用Python和PySpark进行数据分析》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fdata-analysis-with-python-and-pyspark）——抢先体验版\n- 《R语言与数据科学》（https:\u002F\u002Fr4ds.had.co.nz\u002F）\n- 《打造数据科学职业生涯》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fbuild-a-career-in-data-science）\n- 《机器学习读书营》（https:\u002F\u002Fmlbookcamp.com\u002F）——抢先体验版\n- 《动手学机器学习：Scikit-Learn、Keras和TensorFlow 第2版》（https:\u002F\u002Fwww.oreilly.com\u002Flibrary\u002Fview\u002Fhands-on-machine-learning\u002F9781492032632\u002F）\n- 《高效的数据科学基础设施》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Feffective-data-science-infrastructure）\n- 《实用MLOps：如何为生产级模型做好准备》（https:\u002F\u002Fvalohai.com\u002Fmlops-ebook\u002F）\n- 《使用Python和PySpark进行数据分析》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fdata-analysis-with-python-and-pyspark）\n- 《回归分析：友好指南》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fregression-a-friendly-guide）——抢先体验版\n- 《流式系统：大规模数据处理的何、何地、何时和如何》（https:\u002F\u002Fwww.oreilly.com\u002Flibrary\u002Fview\u002Fstreaming-systems\u002F9781491983867\u002F）\n- 《命令行上的数据科学：用久经考验的工具迎接未来》（https:\u002F\u002Fwww.oreilly.com\u002Flibrary\u002Fview\u002Fdata-science-at\u002F9781491947845\u002F）\n- 《Python机器学习——Tutorialspoint教程》（https:\u002F\u002Fwww.tutorialspoint.com\u002Fmachine_learning_with_python\u002Fmachine_learning_with_python_tutorial.pdf）\n- 《深度学习》（https:\u002F\u002Fwww.deeplearningbook.org\u002F）\n- 《设计云数据平台》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fdesigning-cloud-data-platforms）——抢先体验版\n- 《统计学习导论及其在R中的应用》（https:\u002F\u002Fwww.statlearning.com\u002F）\n- 《统计学习要素：数据挖掘、推断与预测》（https:\u002F\u002Fhastie.su.domains\u002FElemStatLearn\u002F）\n- 《使用PyTorch的深度学习》（https:\u002F\u002Fwww.simonandschuster.com\u002Fbooks\u002FDeep-Learning-with-PyTorch\u002FEli-Stevens\u002F9781617295263）\n- 《神经网络与深度学习》（http:\u002F\u002Fneuralnetworksanddeeplearning.com）\n- 《深度学习烹饪书》（https:\u002F\u002Fwww.oreilly.com\u002Flibrary\u002Fview\u002Fdeep-learning-cookbook\u002F9781491995839\u002F）\n- 《Python机器学习入门》（https:\u002F\u002Fwww.oreilly.com\u002Flibrary\u002Fview\u002Fintroduction-to-machine\u002F9781449369880\u002F）\n- 《人工智能：计算智能体基础（第二版）》（https:\u002F\u002Fartint.info\u002Findex.html）——免费HTML版本\n- 《人工智能探秘：思想与成就的历史》（https:\u002F\u002Fai.stanford.edu\u002F~nilsson\u002FQAI\u002Fqai.pdf）——免费下载\n- 《数据科学中的图算法》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fgraph-algorithms-for-data-science）——抢先体验版\n- 《数据网格实战》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fdata-mesh-in-action）——抢先体验版\n- 《Julia用于数据分析》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fjulia-for-data-analysis）——抢先体验版\n- 《数据科学中的因果推断》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fjulia-for-data-analysis）——抢先体验版\n- 《正则表达式谜题与AI编码助手》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fregular-expression-puzzles-and-ai-coding-assistants）——作者：大卫·梅茨\n- 《深入深度学习》（https:\u002F\u002Fd2l.ai\u002F）\n- 《数据为所有人》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fdata-for-all）\n- 《可解释的机器学习：让黑盒模型变得可解释的指南》（https:\u002F\u002Fchristophm.github.io\u002Finterpretable-ml-book\u002F）——免费GitHub版本\n- 《数据科学基础》（https:\u002F\u002Fwww.cs.cornell.edu\u002Fjeh\u002Fbook.pdf）——免费下载\n- 《Comet for DataScience：提升您管理和优化数据科学项目生命周期的能力》（https:\u002F\u002Fwww.amazon.com\u002FComet-Data-Science-Enhance-optimize\u002Fdp\u002F1801814430）\n- 《面向数据科学家的软件工程》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fsoftware-engineering-for-data-scientists）——抢先体验版\n- 《Julia用于数据科学》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fjulia-for-data-science）——抢先体验版\n- 《统计学习入门》（https:\u002F\u002Fwww.statlearning.com\u002F）——下载页面\n- 《机器学习完全入门》（https:\u002F\u002Fwww.amazon.in\u002FMachine-Learning-Absolute-Beginners-Introduction-ebook\u002Fdp\u002FB07335JNW1）\n- 《统一业务、数据与代码：使用JSON Schema设计数据产品》（https:\u002F\u002Flearning.oreilly.com\u002Flibrary\u002Fview\u002Funifying-business-data\u002F9781098144999\u002F）\n- 《掌握贝叶斯》（https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fgrokking-bayes）\n- 《机器学习Q与AI》（https:\u002F\u002Fsebastianraschka.com\u002Fbooks\u002Fml-q-and-ai）\n- 《JavaScript用于数据科学》（https:\u002F\u002Fthird-bit.com\u002Fjs4ds\u002F）——免费HTML页面\n- 《应用数据科学》（https:\u002F\u002Fangewandtedatascience.de\u002F）——一本关于应用数据科学的德语书籍\n- 《人工智能背后的数学》（https:\u002F\u002Fwww.freecodecamp.org\u002Fnews\u002Fthe-math-behind-artificial-intelligence-book）：这是一本由FreeCodeCamp免费发布的书籍，以通俗易懂的语言从工程学角度讲解了人工智能背后的数学知识。\n\n#### 图书优惠（联盟营销）\n\n- [电子书促销 - 电子书最高可享45%折扣！](https:\u002F\u002Fwww.manning.com\u002F?utm_source=mikrobusiness&utm_medium=affiliate&utm_campaign=ebook_sale_8_8_22)\n\n- [因果机器学习](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fcausal-machine-learning?utm_source=mikrobusiness&utm_medium=affiliate&utm_campaign=book_ness_causal_7_26_22&a_aid=mikrobusiness&a_bid=43a2198b)\n- [管理机器学习项目](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fmanaging-machine-learning-projects?utm_source=mikrobusiness&utm_medium=affiliate&utm_campaign=book_thompson_managing_6_14_22)\n- [数据科学中的因果推断](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fcausal-inference-for-data-science?utm_source=mikrobusiness&utm_medium=affiliate&utm_campaign=book_ruizdevilla_causal_6_6_22)\n- [数据为所有人](https:\u002F\u002Fwww.manning.com\u002Fbooks\u002Fdata-for-all?utm_source=mikrobusiness&utm_medium=affiliate)\n\n### 期刊、出版物和杂志\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n- [ICML](https:\u002F\u002Ficml.cc\u002F2015\u002F) - 国际机器学习大会\n- [GECCO](https:\u002F\u002Fgecco-2019.sigevo.org\u002Findex.html\u002FHomePage) - 遗传与进化计算会议（GECCO）\n- [epjdatascience](https:\u002F\u002Fepjdatascience.springeropen.com\u002F)\n- [数据科学期刊](https:\u002F\u002Fjds-online.org\u002Fjournal\u002FJDS) - 一本致力于广泛应用统计方法的国际期刊\n- [大数据研究](https:\u002F\u002Fwww.journals.elsevier.com\u002Fbig-data-research)\n- [大数据期刊](https:\u002F\u002Fjournalofbigdata.springeropen.com\u002F)\n- [大数据与社会](https:\u002F\u002Fjournals.sagepub.com\u002Fhome\u002Fbds)\n- [数据科学期刊](https:\u002F\u002Fwww.jstage.jst.go.jp\u002Fbrowse\u002Fdsj)\n- [datatau.com\u002Fnews](https:\u002F\u002Fwww.datatau.com\u002Fnews) - 类似于Hacker News，但专注于数据领域\n- [数据科学Trello看板](https:\u002F\u002Ftrello.com\u002Fb\u002FrbpEfMld\u002Fdata-science)\n- [Medium数据科学专题](https:\u002F\u002Fmedium.com\u002Ftag\u002Fdata-science) - Medium上关于数据科学的相关文章\n- [Towards Data Science遗传算法专题](https:\u002F\u002Ftowardsdatascience.com\u002Fintroduction-to-genetic-algorithms-including-example-code-e396e98d8bf3#:~:text=A%20genetic%20algorithm%20is%20a,offspring%20of%20the%20next%20generation.) - Towards Data Science上关于遗传算法的相关文章\n- [Maxim AI](https:\u002F\u002Fgetmaxim.ai)。用于AI智能体模拟、评估及可观测性的工具。\n\n### 新闻通讯\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n- [AI Weekly](https:\u002F\u002Faiweekly.co) - 由行业领袖精选的人工智能情报简报，涵盖模型、融资、政策及应用等内容。自2017年起每周三刊，订阅用户超过4万人。\n- [DataTalks.Club](https:\u002F\u002Fdatatalks.club)。一份关于数据相关话题的每周通讯。[存档](https:\u002F\u002Fus19.campaign-archive.com\u002Fhome\u002F?u=0d7822ab98152f5afc118c176&id=97178021aa)。\n- [The Analytics Engineering Roundup](https:\u002F\u002Froundup.getdbt.com\u002Fabout)。一份关于数据科学的通讯。[存档](https:\u002F\u002Froundup.getdbt.com\u002Farchive)。\n- [Techpresso](https:\u002F\u002Fdupple.com\u002Ftechpresso)。一份免费的每日通讯，覆盖人工智能、机器学习及科技领域中最具影响力的发展动态。[存档](https:\u002F\u002Fdupple.com\u002Ftechpresso)。\n\n### 邮件列表\n**[`^        返回顶部        ^`](#awesome-data-science)**\n- [工作组 - 数字人文领域的研究软件工程](https:\u002F\u002Fwww.listserv.dfn.de\u002Fsympa\u002Finfo\u002Fag-dhrse)。这是数字人文领域研究软件工程（DH-RSE）工作组的邮件列表。\n\n### 博主\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n- [Wes McKinney](https:\u002F\u002Fwesmckinney.com\u002Farchives.html) - Wes McKinney 档案。\n- [Matthew Russell](https:\u002F\u002Fminingthesocialweb.com\u002F) - 开采社交网络。\n- [Greg Reda](http:\u002F\u002Fwww.gregreda.com\u002F) - Greg Reda 个人博客\n- [Julia Evans](https:\u002F\u002Fjvns.ca\u002F) - Recurse Center 校友\n- [Hakan Kardas](https:\u002F\u002Fwww.cse.unr.edu\u002F~hkardes\u002F) - 个人主页\n- [Sean J. Taylor](https:\u002F\u002Fseanjtaylor.com\u002F) - 个人主页\n- [Drew Conway](http:\u002F\u002Fdrewconway.com\u002F) - 个人主页\n- [Hilary Mason](https:\u002F\u002Fhilarymason.com\u002F) - 个人主页\n- [Noah Iliinsky](http:\u002F\u002Fcomplexdiagrams.com\u002F) - 个人博客\n- [Matt Harrison](https:\u002F\u002Fhairysun.com\u002F) - 个人博客\n- [Vamshi Ambati](https:\u002F\u002Fallthingsds.wordpress.com\u002F) - AllThings Data Sciene\n- [Prash Chan](https:\u002F\u002Fwww.mdmgeek.com\u002F) - 关于主数据管理和相关热点的技术博客\n- [Clare Corthell](http:\u002F\u002Fdatasciencemasters.org\u002F) - 开源数据科学硕士项目\n- [Datawrangling](http:\u002F\u002Fwww.datawrangling.org) 由 Peter Skomoroch 运营。机器学习、数据挖掘等。\n- [Quora 数据科学](https:\u002F\u002Fwww.quora.com\u002Ftopic\u002FData-Science) - 来自专家的数据科学问答\n- [Siah](https:\u002F\u002Fopenresearch.wordpress.com\u002F) 是伯克利大学的一名博士生\n- [Louis Dorard](https:\u002F\u002Fwww.ownml.co\u002Fblog\u002F) 是一位对网络和大小数据情有独钟的技术爱好者\n- [Machine Learning Mastery](https:\u002F\u002Fmachinelearningmastery.com\u002F) 致力于帮助专业程序员自信地应用机器学习算法来解决复杂问题。\n- [Daniel Forsyth](https:\u002F\u002Fwww.danielforsyth.me\u002F) - 个人博客\n- [Data Science Weekly](https:\u002F\u002Fwww.datascienceweekly.org\u002F) - 每周新闻博客\n- [Revolution Analytics](https:\u002F\u002Fblog.revolutionanalytics.com\u002F) - 数据科学博客\n- [R Bloggers](https:\u002F\u002Fwww.r-bloggers.com\u002F) - R 语言博主\n- [The Practical Quant](https:\u002F\u002Fpracticalquant.blogspot.com\u002F) 大数据\n- [Yet Another Data Blog](https:\u002F\u002Fyet-another-data-blog.blogspot.com\u002F) 又一个数据博客\n- [KD Nuggets](https:\u002F\u002Fwww.kdnuggets.com\u002F) 数据挖掘、分析、大数据、数据、科学——这不是博客，而是一个门户网站\n- [Meta Brown](https:\u002F\u002Fwww.metabrown.com\u002Fblog\u002F) - 个人博客\n- [Data Scientist](https:\u002F\u002Fdatascientists.com\u002F) 正在构建数据科学家文化。\n- [WhatSTheBigData](https:\u002F\u002Fwhatsthebigdata.com\u002F) 涉及上述内容的一部分、全部，甚至更多，本博客探讨其对信息技术、商业世界、政府机构以及我们生活的影响。\n- [Tevfik Kosar](https:\u002F\u002Fmagnus-notitia.blogspot.com\u002F) - Magnus Notitia\n- [New Data Scientist](https:\u002F\u002Fnewdatascientist.blogspot.com\u002F) 社会科学家如何进入大数据领域\n- [Harvard Data Science](https:\u002F\u002Fharvarddatascience.com\u002F) - 关于统计计算与可视化的思考\n- [Data Science 101](https:\u002F\u002Fryanswanstrom.com\u002Fdatascience101\u002F) - 学习成为一名数据科学家\n- [Kaggle 历年解决方案](https:\u002F\u002Fwww.chioka.in\u002Fkaggle-competition-solutions\u002F)\n- [DataScientistJourney](https:\u002F\u002Fdatascientistjourney.wordpress.com\u002Fcategory\u002Fdata-science\u002F)\n- [纽约出租车可视化博客](https:\u002F\u002Fchriswhong.github.io\u002Fnyctaxi\u002F)\n- [Data-Mania](https:\u002F\u002Fwww.data-mania.com\u002F)\n- [Data-Magnum](https:\u002F\u002Fdata-magnum.com\u002F)\n- [datascopeanalytics](https:\u002F\u002Fdatascopeanalytics.com\u002Fblog\u002F)\n- [数字化转型](https:\u002F\u002Ftarrysingh.com\u002F)\n- [datascientistjourney](https:\u002F\u002Fdatascientistjourney.wordpress.com\u002Fcategory\u002Fdata-science\u002F)\n- [Data Mania 博客](https:\u002F\u002Fwww.data-mania.com\u002Fblog\u002F) - [文件抽屉](https:\u002F\u002Fchris-said.io\u002F) - Chris Said 的科学博客\n- [Emilio Ferrara 的个人主页](http:\u002F\u002Fwww.emilio.ferrara.name\u002F)\n- [DataNews](https:\u002F\u002Fdatanews.tumblr.com\u002F)\n- [Reddit 文本挖掘](https:\u002F\u002Fwww.reddit.com\u002Fr\u002Ftextdatamining\u002F)\n- [Periscopic](https:\u002F\u002Fperiscopic.com\u002F#!\u002Fnews)\n- [Hilary Parker](https:\u002F\u002Fhilaryparker.com\u002F)\n- [Data Stories](https:\u002F\u002Fdatastori.es\u002F)\n- [数据科学实验室](https:\u002F\u002Fdatasciencelab.wordpress.com\u002F)\n- [意义之所在](https:\u002F\u002Fwww.kennybastani.com\u002F)\n- [数据之地的冒险](https:\u002F\u002Fblog.smola.org)\n- [Dataclysm](https:\u002F\u002Ftheblog.okcupid.com\u002F)\n- [FlowingData](https:\u002F\u002Fflowingdata.com\u002F) - 可视化与统计\n- [Calculated Risk](https:\u002F\u002Fwww.calculatedriskblog.com\u002F)\n- [O'Reilly 学习博客](https:\u002F\u002Fwww.oreilly.com\u002Fcontent\u002Ftopics\u002Foreilly-learning\u002F)\n- [Dominodatalab](https:\u002F\u002Fblog.dominodatalab.com\u002F)\n- [i am trask](https:\u002F\u002Fiamtrask.github.io\u002F) - 机器学习工艺博客\n- [实用数据科学手册](https:\u002F\u002Fdatasciencevademecum.wordpress.com\u002F) - 面向现实问题的数据驱动解决方案指南与配方\n- [Dataconomy](https:\u002F\u002Fdataconomy.com\u002F) - 关注新兴数据经济的博客\n- [Springboard](https:\u002F\u002Fwww.springboard.com\u002Fblog\u002F) - 为数据科学学习者提供资源的博客\n- [Analytics Vidhya](https:\u002F\u002Fwww.analyticsvidhya.com\u002F) - 一个关于数据科学和分析学习资料的综合性网站。\n- [奥卡姆剃刀](https:\u002F\u002Fwww.kaushik.net\u002Favinash\u002F) - 专注于网络分析。\n- [Data School](https:\u002F\u002Fwww.dataschool.io\u002F) - 针对初学者的数据科学教程！\n- [Colah 的博客](https:\u002F\u002Fcolah.github.io) - 理解神经网络的博客！\n- [Sebastian 的博客](https:\u002F\u002Fruder.io\u002F#open) - 自然语言处理与迁移学习的博客！\n- [Distill](https:\u002F\u002Fdistill.pub) - 专门用于清晰解释机器学习的平台！\n- [Chris Albon 的网站](https:\u002F\u002Fchrisalbon.com\u002F) - 数据科学与人工智能笔记\n- [Andrew Carr](https:\u002F\u002Fandrewnc.github.io\u002Fblog\u002Fblog.html) - 使用冷门编程语言进行数据科学\n- [floydhub](https:\u002F\u002Fblog.floydhub.com\u002Fintroduction-to-genetic-algorithms\u002F) - 进化算法博客\n- [Jingles](https:\u002F\u002Fjinglescode.github.io\u002F) - 复习并提炼学术论文中的关键概念\n- [nbshare](https:\u002F\u002Fwww.nbshare.io\u002Fnotebooks\u002Fdata-science\u002F) - 数据科学笔记本\n- [Loic Tetrel](https:\u002F\u002Fltetrel.github.io\u002F) - 数据科学博客\n- [Chip Huyen 的博客](https:\u002F\u002Fhuyenchip.com\u002Fblog\u002F) - 机器学习工程、MLOps 以及初创企业中机器学习的应用\n- [Maria Khalusova](https:\u002F\u002Fwww.mariakhalusova.com\u002F) - 数据科学博客\n- [Aditi Rastogi](https:\u002F\u002Fmedium.com\u002F@aditi2507rastogi) - ML、DL、数据科学博客\n- [Santiago Basulto](https:\u002F\u002Fmedium.com\u002F@santiagobasulto) - 使用 Python 进行数据科学\n- [Akhil Soni](https:\u002F\u002Fmedium.com\u002F@akhil0435) - ML、DL 和数据科学\n- [Akhil Soni](https:\u002F\u002Fakhilworld.hashnode.dev\u002F) - ML、DL 和数据科学\n- [Applied AI Blogs](https:\u002F\u002Fwww.appliedaicourse.com\u002Fblog\u002F) - 深入探讨人工智能、机器学习和数据科学概念，并结合实际应用的文章。\n- [Scaler Blogs](https:\u002F\u002Fwww.scaler.com\u002Fblog\u002F) - 软件开发、人工智能以及科技行业职业发展的教育内容。\n- [Mlu github](https:\u002F\u002Fmlu-explain.github.io\u002F) - Mlu 是亚马逊开发的工具，旨在帮助机器学习从业者。你可以在这里通过实时图表学习从基础到高级的所有知识。\n- [Jan Oliver Rüdiger](https:\u002F\u002Fnotesjor.de\u002F) - ML、DL 和数据科学——专注于文本\u002F数据挖掘\n\n### 演示文稿\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n- [如何成为一名数据科学家](https:\u002F\u002Fwww.slideshare.net\u002Fryanorban\u002Fhow-to-become-a-data-scientist)\n- [数据科学导论](https:\u002F\u002Fwww.slideshare.net\u002FNikoVuokko\u002Fintroduction-to-data-science-25391618)\n- [面向企业大数据的数据科学入门](https:\u002F\u002Fwww.slideshare.net\u002Fpacoid\u002Fintro-to-data-science-for-enterprise-big-data)\n- [如何面试数据科学家](https:\u002F\u002Fwww.slideshare.net\u002Fdtunkelang\u002Fhow-to-interview-a-data-scientist)\n- [如何与统计学家共享数据](https:\u002F\u002Fgithub.com\u002Fjtleek\u002Fdatasharing)\n- [数据科学领域卓越职业生涯的科学](https:\u002F\u002Fwww.slideshare.net\u002Fkatemats\u002Fthe-science-of-a-great-career-in-data-science)\n- [数据科学家是做什么的？](https:\u002F\u002Fwww.slideshare.net\u002Fdatasciencelondon\u002Fbig-data-sorry-data-science-what-does-a-data-scientist-do)\n- [构建数据初创公司：快速、大规模且专注](https:\u002F\u002Fwww.slideshare.net\u002Fmedriscoll\u002Fdriscoll-strata-buildingdatastartups25may2011clean)\n- [如何用深度学习赢得数据科学竞赛](https:\u002F\u002Fwww.slideshare.net\u002F0xdata\u002Fhow-to-win-data-science-competitions-with-deep-learning)\n- [全栈数据科学家](https:\u002F\u002Fwww.slideshare.net\u002FAlexeyGrigorev\u002Ffullstack-data-scientist)\n\n### 播客\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n- [AI在家](https:\u002F\u002Fpodcasts.apple.com\u002Fus\u002Fpodcast\u002Fdata-science-at-home\u002Fid1069871378)\n- [AI今日](https:\u002F\u002Fwww.cognilytica.com\u002Faitoday\u002F)\n- [对抗性学习](https:\u002F\u002Fadversariallearning.com\u002F)\n- [下午茶时间数据科学](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PLLvvXm0q8zUbiNdoIazGzlENMXvZ9bd3x)\n- [思维链](https:\u002F\u002Fwww.chainofthought.show\u002F)\n- [数据工程播客](https:\u002F\u002Fwww.dataengineeringpodcast.com\u002F)\n- [在家学数据科学](https:\u002F\u002Fdatascienceathome.com\u002F)\n- [数据科学混合](https:\u002F\u002Fcommunity.alteryx.com\u002Ft5\u002FData-Science-Mixer\u002Fbg-p\u002Fmixer)\n- [数据怀疑论者](https:\u002F\u002Fdataskeptic.com\u002F)\n- [数据故事](https:\u002F\u002Fdatastori.es\u002F)\n- [数据广播](https:\u002F\u002Fjameskle.com\u002Fwrites\u002Fcategory\u002FDatacast)\n- [数据框架](https:\u002F\u002Fwww.datacamp.com\u002Fcommunity\u002Fpodcast)\n- [DataTalks.Club](https:\u002F\u002Fanchor.fm\u002Fdatatalksclub)\n- [梯度下降](https:\u002F\u002Fwandb.ai\u002Ffully-connected\u002Fgradient-descent)\n- [机器学习101](https:\u002F\u002Fwww.learningmachines101.com\u002F)\n- [让我们谈数据（巴西）](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PLn_z5E4dh_Lj5eogejMxfOiNX3nOhmhmM)\n- [线性离题](https:\u002F\u002Flineardigressions.com\u002F)\n- [非标准偏差](https:\u002F\u002Fnssdeviations.com\u002F)\n- [O'Reilly数据秀播客](https:\u002F\u002Fwww.oreilly.com\u002Fradar\u002Ftopics\u002Foreilly-data-show-podcast\u002F)\n- [偏导数](http:\u002F\u002Fpartiallyderivative.com\u002F)\n- [超级数据科学](https:\u002F\u002Fwww.superdatascience.com\u002Fpodcast\u002F)\n- [数据工程秀](https:\u002F\u002Fwww.dataengineeringshow.com\u002F)\n- [激进AI播客](https:\u002F\u002Fwww.radicalai.org\u002F)\n- [重点是什么](https:\u002F\u002Ffivethirtyeight.com\u002Ftag\u002Fwhats-the-point\u002F)\n- [分析工程播客](https:\u002F\u002Froundup.getdbt.com\u002Fs\u002Fthe-analytics-engineering-podcast)\n\n### YouTube 视频与频道\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n- [什么是机器学习？](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=WXHM_i-fgGo)\n- [吴恩达：深度学习、自学学习和无监督特征学习](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=n1ViNeWhC24)\n- [Data36 - 托米·梅斯特为初学者讲解的数据科学](https:\u002F\u002Fwww.youtube.com\u002Fc\u002FTomiMesterData36comDataScienceForBeginners)\n- [深度学习：来自大数据的智能](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=czLI3oLDe8M)\n- [采访谷歌人工智能与深度学习“教父”杰弗里·辛顿](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=1Wp3IIpssEc)\n- [使用Python入门深度学习](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=S75EdAcXHKk)\n- [什么是机器学习，它是如何工作的？](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=elojMnjn4kk)\n- [CampusX](https:\u002F\u002Fwww.youtube.com\u002F@campusx-official)\n- [数据学校](https:\u002F\u002Fwww.youtube.com\u002Fchannel\u002FUCnVzApLJE2ljPZSeQylSEyg) - 数据科学教育\n- [梅拉妮·沃里克为新手讲解的神经网络（2015年5月）](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=Cu6A96TUy_o)\n- [于戈·拉罗谢尔的神经网络视频系列](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PL6Xpj9I5qXYEcOhn7TqghAJ6NAPrNmUBH)\n- [Google DeepMind联合创始人谢恩·莱格 - 机器超智能](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=evNCyRL3DOU)\n- [数据科学入门](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=cHzvYxBN9Ls&list=PLPqVjP3T4RIRsjaW07zoGzH-Z4dBACpxY)\n- [利用遗传算法的数据科学](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=lpD38NxTOnk)\n- [面向初学者的数据科学](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PL2zq7klxX5ATMsmyRazei7ZXkP1GHt-vs)\n- [DataTalks.Club](https:\u002F\u002Fwww.youtube.com\u002Fchannel\u002FUCDvErgK0j5ur3aLgn6U-LqQ)\n- [Mildlyoverfitted - 中级机器学习\u002F深度学习主题教程](https:\u002F\u002Fwww.youtube.com\u002Fchannel\u002FUCYBSjwkGTK06NnDnFsOcR7g)\n- [mlops.community - 对行业专家关于生产级机器学习的访谈](https:\u002F\u002Fwww.youtube.com\u002Fchannel\u002FUCYBSjwkGTK06NnDnFsOcR7g)\n- [ML Street Talk - 不加掩饰的技术性和非商业性，因此你不会听到任何烦人的推销。](https:\u002F\u002Fwww.youtube.com\u002Fc\u002Fmachinelearningstreettalk)\n- [3Blue1Brown的神经网络](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PLZHQObOWTQDNU6R1_67000Dx_ZCJB-3pi)\n- [Sentdex从零开始讲解的神经网络](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PLQVvvaa0QuDcjD5BAw2DxE6OF2tius3V3)\n- [Manning Publications YouTube频道](https:\u002F\u002Fwww.youtube.com\u002Fc\u002FManningPublications\u002Ffeatured)\n- [请教钟博士：如何在数据科学领域领导 - 第1部分](https:\u002F\u002Fyoutu.be\u002FJYuQZii5o58)\n- [请教钟博士：如何在数据科学领域领导 - 第2部分](https:\u002F\u002Fyoutu.be\u002FSzqIXV-O-ko)\n- [请教钟博士：如何在数据科学领域领导 - 第3部分](https:\u002F\u002Fyoutu.be\u002FOgwm7k_smTA)\n- [请教钟博士：如何在数据科学领域领导 - 第4部分](https:\u002F\u002Fyoutu.be\u002Fa9usjdzTxTU)\n- [请教钟博士：如何在数据科学领域领导 - 第5部分](https:\u002F\u002Fyoutu.be\u002FMYdQq-F3Ws0)\n- [请教钟博士：如何在数据科学领域领导 - 第6部分](https:\u002F\u002Fyoutu.be\u002FLOOt4OVC3hY)\n- [回归模型：应用简单的泊松回归](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=9Hk8K8jhiOo)\n- [深度学习架构](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PLv8Cp2NvcY8DpVcsmOT71kymgMmcr59Mf)\n- [时间序列建模与分析](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PL3N9eeOlCrP5cK0QRQxeJd6GrQvhAtpBK)\n- [塞拉诺学院](https:\u002F\u002Fwww.youtube.com\u002F@SerranoAcademy)\n- [端到端数据科学播放列表](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=S_F_c9e2bz4&list=PLZoTAELRMXVPS-dOaVbAux22vzqdgoGhG)\n- [数据科学导论 - LinkedIn](https:\u002F\u002Fwww.linkedin.com\u002Flearning\u002Fintroduction-to-data-science-22668235\u002Fbeginning-your-data-science-exploration?u=42458916)\n\n## 社交\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n以下是一些社交媒体链接。与更多数据科学家交流吧！\n\n- [Facebook 账号](#facebook-accounts)\n- [Twitter 账号](#twitter-accounts)\n- [Telegram 频道](#telegram-channels)\n- [Slack 社区](#slack-communities)\n- [GitHub 群组](#github-groups)\n- [数据科学竞赛](#data-science-competitions)\n\n\n### Facebook 账号\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n- [Data](https:\u002F\u002Fwww.facebook.com\u002Fdata)\n- [Big Data Scientist](https:\u002F\u002Fwww.facebook.com\u002FBigdatascientist)\n- [Data Science Day](https:\u002F\u002Fwww.facebook.com\u002Fdatascienceday\u002F)\n- [Data Science Academy](https:\u002F\u002Fwww.facebook.com\u002Fnycdatascience)\n- [Facebook 数据科学页面](https:\u002F\u002Fwww.facebook.com\u002Fpages\u002FData-science\u002F431299473579193?ref=br_rs)\n- [Data Science London](https:\u002F\u002Fwww.facebook.com\u002Fpages\u002FData-Science-London\u002F226174337471513)\n- [Data Science Technology and Corporation](https:\u002F\u002Fwww.facebook.com\u002FDataScienceTechnologyCorporation?ref=br_rs)\n- [数据科学 - 封闭群组](https:\u002F\u002Fwww.facebook.com\u002Fgroups\u002F1394010454157077\u002F?ref=br_rs)\n- [数据科学中心](https:\u002F\u002Fwww.facebook.com\u002Fcenterdatasciences?ref=br_rs)\n- [大数据 Hadoop NoSQL Hive HBase](https:\u002F\u002Fwww.facebook.com\u002Fgroups\u002Fbigdatahadoop\u002F)\n- [分析、数据挖掘、预测建模、人工智能](https:\u002F\u002Fwww.facebook.com\u002Fgroups\u002Fdata.analytics\u002F)\n- [使用 R 语言进行大数据分析](https:\u002F\u002Fwww.facebook.com\u002Fgroups\u002F434352233255448\u002F)\n- [R 与 Hadoop 结合的大数据分析](https:\u002F\u002Fwww.facebook.com\u002Fgroups\u002Frhadoop\u002F)\n- [大数据学习](https:\u002F\u002Fwww.facebook.com\u002Fgroups\u002Fbigdatalearnings\u002F)\n- [大数据、数据科学、数据挖掘与统计学](https:\u002F\u002Fwww.facebook.com\u002Fgroups\u002Fbigdatastatistics\u002F)\n- [大数据\u002FHadoop 专家](https:\u002F\u002Fwww.facebook.com\u002Fgroups\u002FBigDataExpert\u002F)\n- [数据挖掘\u002F机器学习\u002F人工智能](https:\u002F\u002Fwww.facebook.com\u002Fgroups\u002Fmachinelearningforum\u002F)\n- [数据挖掘\u002F大数据 - 社交网络分析](https:\u002F\u002Fwww.facebook.com\u002Fgroups\u002Fdataminingsocialnetworks\u002F)\n- [实用数据科学指南](https:\u002F\u002Fwww.facebook.com\u002Fdatasciencevademecum)\n- [伊斯坦布尔数据科学](https:\u002F\u002Fwww.facebook.com\u002Fgroups\u002Fveribilimiistanbul\u002F)\n- [数据科学博客](https:\u002F\u002Fwww.facebook.com\u002FtheDataScienceBlog\u002F)\n\n\n### Twitter 账号\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n| Twitter | 描述 |\n| --- | --- |\n| [Big Data Combine](https:\u002F\u002Ftwitter.com\u002FBigDataCombine) | 面向希望将其模型转化为交易策略的数据科学家的快速、实时选拔活动 |\n| Big Data Mania | 数据可视化达人、数据记者、增长黑客、《数据科学傻瓜书》（2015年）作者 |\n| [Big Data Science](https:\u002F\u002Ftwitter.com\u002Fanalyticbridge) | 大数据、数据科学、预测建模、商业分析、Hadoop、决策与运筹学。 |\n| Charlie Greenbacker | @ExploreAltamira 的数据科学总监 |\n| [Chris Said](https:\u002F\u002Ftwitter.com\u002FChris_Said) | Twitter 的数据科学家 |\n| [Clare Corthell](https:\u002F\u002Ftwitter.com\u002Fclarecorthell) | 开发、设计、数据科学 @mattermark #hackerei |\n| [DADI Charles-Abner](https:\u002F\u002Ftwitter.com\u002FDadiCharles) | #数据科学家 @Ekimetrics. , #机器学习 #数据可视化 #动态图表 #Hadoop #R #Python #NLP #比特币 #数据爱好者 |\n| [Data Science Central](https:\u002F\u002Ftwitter.com\u002FDataScienceCtrl) | Data Science Central 是面向大数据从业者的行业唯一资源。 |\n| [Data Science London](https:\u002F\u002Ftwitter.com\u002Fds_ldn)  | 数据科学。大数据。数据黑客。数据成瘾者。数据初创企业。开放数据 |\n| [Data Science Renee](https:\u002F\u002Ftwitter.com\u002FBecomingDataSci) | 记录我从 SQL 数据分析师到攻读工程硕士学位并最终成为数据科学家的成长历程 |\n| [Data Science Report](https:\u002F\u002Ftwitter.com\u002FTedOBrien93) | 我们的使命是帮助指导和推动数据科学与分析领域的职业发展 |\n| [Data Science Tips](https:\u002F\u002Ftwitter.com\u002Fdatasciencetips) | 全球数据科学家的技巧与窍门！#数据科学 #大数据 |\n| [Data Vizzard](https:\u002F\u002Ftwitter.com\u002FDataVisualizati) | 数据可视化、安全、军事 |\n| [DataScienceX](https:\u002F\u002Ftwitter.com\u002FDataScienceX) | |\n| deeplearning4j | |\n| [DJ Patil](https:\u002F\u002Ftwitter.com\u002Fdpatil) | 白宫数据主管，RelateIQ 副总裁。 |\n| [Domino Data Lab](https:\u002F\u002Ftwitter.com\u002FDominoDataLab) | |\n| [Drew Conway](https:\u002F\u002Ftwitter.com\u002Fdrewconway) | 数据极客、黑客、冲突研究者。 |\n| Emilio Ferrara | #网络、#机器学习 和 #数据科学。我在 #社交媒体 领域工作。印第安纳大学博士后研究员 |\n| [Erin Bartolo](https:\u002F\u002Ftwitter.com\u002Ferinbartolo) | 与 #大数据 同行——对它的炒作既爱又恨。@iSchoolSU #数据科学 项目负责人。 |\n| [Greg Reda](https:\u002F\u002Ftwitter.com\u002Fgjreda)  | 在 _GrubHub_ 工作，专注于数据和 pandas |\n| [Gregory Piatetsky](https:\u002F\u002Ftwitter.com\u002Fkdnuggets) | KDnuggets 总裁，分析\u002F大数据\u002F数据挖掘\u002F数据科学专家，KDD 和 SIGKDD 联合创始人，曾担任两家初创公司的首席科学家，兼职哲学家。 |\n| [Hadley Wickham](https:\u002F\u002Ftwitter.com\u002Fhadleywickham) | RStudio 的首席科学家，同时兼任奥克兰大学、斯坦福大学和莱斯大学的统计学兼职教授。 |\n| [Hakan Kardas](https:\u002F\u002Ftwitter.com\u002Fhakan_kardes) | 数据科学家 |\n| [Hilary Mason](https:\u002F\u002Ftwitter.com\u002Fhmason) | @accel 的驻院数据科学家。 |\n| [Jeff Hammerbacher](https:\u002F\u002Ftwitter.com\u002Fhackingdata)  | 转发关于数据科学的内容 |\n| [John Myles White](https:\u002F\u002Ftwitter.com\u002Fjohnmyleswhite)  | Facebook 的科学家兼 Julia 开发者。著有《黑客机器学习》和《用于网站优化的赌徒算法》。推文仅代表个人观点。 |\n| [Juan Miguel Lavista](https:\u002F\u002Ftwitter.com\u002FBDataScientist) | 微软数据科学团队的首席数据科学家 |\n| [Julia Evans](https:\u002F\u002Ftwitter.com\u002Fb0rk) | 黑客——Pandas——数据分析 |\n| [Kenneth Cukier](https:\u002F\u002Ftwitter.com\u002Fkncukier) | 《经济学人》的数据编辑，以及《大数据》一书的共同作者（http:\u002F\u002Fwww.big-data-book.com\u002F）。 |\n| Kevin Davenport | https:\u002F\u002Fwww.meetup.com\u002FSan-Diego-Data-Science-R-Users-Group\u002F 的组织者 |\n| [Kevin Markham](https:\u002F\u002Ftwitter.com\u002Fjustmarkham) | 数据科学讲师，[Data School](https:\u002F\u002Fwww.dataschool.io\u002F) 的创始人 |\n| [Kim Rees](https:\u002F\u002Ftwitter.com\u002Fkrees) | 交互式数据可视化及工具。数据漫游者。 |\n| [Kirk Borne](https:\u002F\u002Ftwitter.com\u002FKirkDBorne) | 数据科学家，天体物理学博士，顶级 #大数据 影响者。 |\n| Linda Regber | 数据故事讲述者，擅长数据可视化。 |\n| [Luis Rei](https:\u002F\u002Ftwitter.com\u002Flmrei) | 博士生。编程、移动、Web。人工智能、智能机器人、机器学习、数据挖掘、自然语言处理、数据科学。 |\n| Mark Stevenson | Salt (@SaltJobs) 的数据分析招聘专家 分析 - 洞察 - 大数据 - 数据科学 |\n| [Matt Harrison](https:\u002F\u002Ftwitter.com\u002F__mharrison__) | 全栈 Python 爱好者的观点，作家、讲师，目前从事数据科学家工作。偶尔也会兼顾父亲、丈夫的角色，以及有机园艺。 |\n| [Matthew Russell](https:\u002F\u002Ftwitter.com\u002Fptwobrussell) | 挖掘社交网络。 |\n| [Mert Nuhoğlu](https:\u002F\u002Ftwitter.com\u002Fmertnuhoglu)  | BizQualify 的数据科学家，开发者 |\n| [Monica Rogati](https:\u002F\u002Ftwitter.com\u002Fmrogati) | Jawbone 的数据工作者。曾在 LinkedIn 将数据转化为故事和产品。文本挖掘、应用机器学习、推荐系统。前游戏玩家、前机器编码员；也是一名取名者。 |\n| [Noah Iliinsky](https:\u002F\u002Ftwitter.com\u002Fnoahi) | 可视化与交互设计师。务实的骑行者。著有可视化相关书籍：https:\u002F\u002Fwww.oreilly.com\u002Fpub\u002Fau\u002F4419 |\n| [Paul Miller](https:\u002F\u002Ftwitter.com\u002FPaulMiller) | 云计算\u002F大数据\u002F开放数据分析师及顾问。作家、演讲者和主持人。Gigaom 研究分析师。 |\n| [Peter Skomoroch](https:\u002F\u002Ftwitter.com\u002Fpeteskomoroch) | 创建智能系统以自动化任务并改善决策。企业家，曾任 LinkedIn 首席数据科学家。机器学习、产品设计、网络 |\n| [Prash Chan](https:\u002F\u002Ftwitter.com\u002FMDMGeek) | IBM 的解决方案架构师，主攻主数据管理、数据质量和数据治理，同时也是博客作者。关注数据科学、Hadoop、大数据和云技术。 |\n| [Quora Data Science](https:\u002F\u002Ftwitter.com\u002Fq_datascience)  | Quora 的数据科学话题 |\n| [R-Bloggers](https:\u002F\u002Ftwitter.com\u002FRbloggers) | 转发来自 R 社区博客、数据科学会议，以及（！）面向数据科学家的招聘信息。 |\n| [Rand Hindi](https:\u002F\u002Ftwitter.com\u002Frandhindi) | |\n| [Randy Olson](https:\u002F\u002Ftwitter.com\u002Frandal_olson) | 研究人工智能的计算机科学家。数据极客。@DataIsBeautiful 社区领袖。#开放科学 的倡导者。 |\n| [Recep Erol](https:\u002F\u002Ftwitter.com\u002FEROLRecep) | UALR 的数据科学极客 |\n| [Ryan Orban](https:\u002F\u002Ftwitter.com\u002Fryanorban) | 数据科学家，遗传折纸爱好者，硬件发烧友 |\n| [Sean J. Taylor](https:\u002F\u002Ftwitter.com\u002Fseanjtaylor) | 社会科学家。黑客。Facebook 数据科学团队成员。关键词：实验、因果推断、统计学、机器学习、经济学。 |\n| [Silvia K. Spiva](https:\u002F\u002Ftwitter.com\u002Fsilviakspiva) | 思科的 #数据科学 工作 |\n| [Harsh B. Gupta](https:\u002F\u002Ftwitter.com\u002Fharshbg) | BBVA Compass 的数据科学家 |\n| [Spencer Nelson](https:\u002F\u002Ftwitter.com\u002Fspenczar_n) | 数据极客 |\n| [Talha Oz](https:\u002F\u002Ftwitter.com\u002FtozCSS) | 喜欢 ABM、SNA、DM、ML、NLP、HI、Python、Java。顶尖的 Kaggle 用户\u002F数据科学家 |\n| [Tasos Skarlatidis](https:\u002F\u002Ftwitter.com\u002Fanskarl) | 复杂事件处理、大数据、人工智能和机器学习。热衷于编程和开源。 |\n| [Terry Timko](https:\u002F\u002Ftwitter.com\u002FTerry_Timko) | 信息政府；大数据；数据即服务；数据科学；开放、社交和商业数据的融合 |\n| [Tony Baer](https:\u002F\u002Ftwitter.com\u002FTonyBaer) | Ovum 的 IT 分析师，主要关注大数据和数据管理，并涉及部分系统工程领域。 |\n| [Tony Ojeda](https:\u002F\u002Ftwitter.com\u002Ftonyojeda3) | 数据科学家、作家、企业家。@DataCommunityDC 的联合创始人，@DistrictDataLab 的创始人。#数据科学 #大数据 #DataDC |\n| [Vamshi Ambati](https:\u002F\u002Ftwitter.com\u002Fvambati) | PayPal 的数据科学工作。#NLP、#机器学习；卡内基梅隆大学校友（博客：https:\u002F\u002Fallthingsds.wordpress.com ） |\n| [Wes McKinney](https:\u002F\u002Ftwitter.com\u002Fwesmckinn) | Pandas（Python 数据分析库）。 |\n| [WileyEd](https:\u002F\u002Ftwitter.com\u002FWileyEd) | 高级经理——@Seagate 大数据分析——麦肯锡校友 #大数据 + #分析 推广者 #Hadoop、#云、#数字 和 #R 的狂热爱好者 |\n| [WNYC 数据新闻团队](https:\u002F\u002Ftwitter.com\u002Fdatanews) | @WNYC 的数据新闻团队。践行数据驱动型新闻报道，将其可视化，并公开我们的工作过程。 |\n| [Alexey Grigorev](https:\u002F\u002Ftwitter.com\u002FAl_Grigor) | 数据科学作家 |\n| [İlker Arslan](https:\u002F\u002Ftwitter.com\u002Filkerarslan_35) | 数据科学作家。主要分享关于 Julia 编程的内容 |\n| [INEVITABLE](https:\u002F\u002Ftwitter.com\u002FWeAreInevitable) | 怀揣 AI 和数据科学梦想的初创公司，位于英国英格兰 |\n| [Jan Oliver Rüdiger](https:\u002F\u002Fx.com\u002FnotesJOR) | ML、DL 和 数据科学——尤其侧重于文本\u002F数据挖掘 |\n\n### Telegram 频道\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n- [Open Data Science](https:\u002F\u002Ft.me\u002Fopendatascience) – 首个 Telegram 数据科学频道。涵盖与数据科学相关的所有技术和热门内容：人工智能、大数据、机器学习、统计学、基础数学及其应用。\n- [Loss function porn](https:\u002F\u002Ft.me\u002Floss_function_porn) — 以视频或图形可视化形式呈现的精美数据科学\u002F机器学习主题帖子。\n- [Machinelearning](https:\u002F\u002Ft.me\u002Fai_machinelearning_big_data) – 每日机器学习新闻。\n\n\n### Slack 社区\n[返回顶部](#awesome-data-science)\n\n- [DataTalks.Club](https:\u002F\u002Fdatatalks.club)\n\n### GitHub 群组\n- [伯克利数据科学研究所](https:\u002F\u002Fgithub.com\u002FBIDS)\n\n### 数据科学竞赛\n\n一些数据挖掘竞赛平台\n\n- [Kaggle](https:\u002F\u002Fwww.kaggle.com\u002F)\n- [DrivenData](https:\u002F\u002Fwww.drivendata.org\u002F)\n- [Analytics Vidhya](https:\u002F\u002Fdatahack.analyticsvidhya.com\u002F)\n- [InnoCentive](https:\u002F\u002Fwww.innocentive.com\u002F)\n- [Microprediction](https:\u002F\u002Fwww.microprediction.com\u002Fpython-1)\n\n## 有趣的内容\n\n- [信息图](#infographics)\n- [数据集](#datasets)\n- [漫画](#comics)\n\n\n### 信息图\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n| 预览                                                                                                                                                                                                                                     | 描述                                                                                                                                                                                                                                                  |\n| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |\n| [\u003Cimg src=\"https:\u002F\u002Fi.imgur.com\u002F0OoLaa5.png\" width=\"150\" \u002F>](https:\u002F\u002Fi.imgur.com\u002F0OoLaa5.png)                                                                                                                                                | [数据科学家与数据工程师的关键区别](https:\u002F\u002Fsearchbusinessanalytics.techtarget.com\u002Ffeature\u002FKey-differences-of-a-data-scientist-vs-data-engineer)                                                                                         |\n| [\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Facademic_awesome-datascience_readme_8865c0342320.png\" width=\"150\" \u002F>](https:\u002F\u002Fs3.amazonaws.com\u002Fassets.datacamp.com\u002Fblog_assets\u002FDataScienceEightSteps_Full.png)                    | 由[DataCamp](https:\u002F\u002Fwww.datacamp.com) 提供的“成为数据科学家的八步”可视化指南 [(img)](https:\u002F\u002Fs3.amazonaws.com\u002Fassets.datacamp.com\u002Fblog_assets\u002FDataScienceEightSteps_Full.png)                                                              |\n| [\u003Cimg src=\"https:\u002F\u002Fi.imgur.com\u002FW2t2Roz.png\" width=\"150\" \u002F>](https:\u002F\u002Fi.imgur.com\u002FFxsL3b8.png)                                                                                                                                                | 关于所需技能的思维导图 ([img](https:\u002F\u002Fi.imgur.com\u002FFxsL3b8.png))                                                                                                                                                                                          |\n| [\u003Cimg src=\"https:\u002F\u002Fi.imgur.com\u002Frb9ruaa.png\" width=\"150\" \u002F>](https:\u002F\u002Fnirvacana.com\u002Fthoughts\u002Fwp-content\u002Fuploads\u002F2013\u002F07\u002FRoadToDataScientist1.png)                                                                                              | Swami Chandrasekaran 制作了一份基于地铁线路图的课程体系([链接](http:\u002F\u002Fnirvacana.com\u002Fthoughts\u002F2013\u002F07\u002F08\u002Fbecoming-a-data-scientist\u002F))。                                                                                                                                            |\n| [\u003Cimg src=\"https:\u002F\u002Fi.imgur.com\u002FXBgKF2l.png\" width=\"150\" \u002F>](https:\u002F\u002Fi.imgur.com\u002F4ZBBvb0.png)                                                                                                                                                | 由[@kzawadz](https:\u002F\u002Ftwitter.com\u002Fkzawadz) 通过[twitter](https:\u002F\u002Ftwitter.com\u002FMktngDistillery\u002Fstatus\u002F538671811991715840) 发布                                                                                                                                      |\n| [\u003Cimg src=\"https:\u002F\u002Fi.imgur.com\u002Fl9ZGtal.jpg\" width=\"150\" \u002F>](https:\u002F\u002Fi.imgur.com\u002FxLY3XZn.jpg)                                                                                                                                                | 由[Data Science Central](https:\u002F\u002Fwww.datasciencecentral.com\u002F) 制作                                                                                                                                                                                                |\n| [\u003Cimg src=\"https:\u002F\u002Fi.imgur.com\u002FTWkB4X6.png\" width=\"150\" \u002F>](https:\u002F\u002Fi.imgur.com\u002F0TydZ4M.png)                                                                                                                                                | 数据科学之争：R 语言 vs Python                                                                                                                                                                                                                               |\n| [\u003Cimg src=\"https:\u002F\u002Fi.imgur.com\u002FgtTlW5I.png\" width=\"150\" \u002F>](https:\u002F\u002Fi.imgur.com\u002FHnRwlce.png)                                                                                                                                                | 如何选择统计学或机器学习方法                                                                                                                                                                                                     |\n| [\u003Cimg src=\"https:\u002F\u002Fscikit-learn.org\u002F1.5\u002F_downloads\u002Fb82bf6cd7438a351f19fac60fbc0d927\u002Fml_map.svg\" width=\"150\" \u002F>](https:\u002F\u002Fscikit-learn.org\u002F1.5\u002F_downloads\u002Fb82bf6cd7438a351f19fac60fbc0d927\u002Fml_map.svg)                                                                                                           | [如何选择合适的估计器](https:\u002F\u002Fscikit-learn.org\u002F1.5\u002Fmachine_learning_map.html#choosing-the-right-estimator)                                                                                                                                                                                                                                 |\n| [\u003Cimg src=\"https:\u002F\u002Fi.imgur.com\u002F3JSyUq1.png\" width=\"150\" \u002F>](https:\u002F\u002Fi.imgur.com\u002FuEqMwZa.png)                                                                                                                                                | 数据科学行业：各角色及其职责                                                                                                                                                                                                                     |\n| [\u003Cimg src=\"https:\u002F\u002Fi.imgur.com\u002FDQqFwwy.png\" width=\"150\" \u002F>](https:\u002F\u002Fi.imgur.com\u002FRsHqY84.png)                                                                                                                                                | 数据科学~~维恩~~欧拉图                                                                                                                                                                                                                          |\n| [\u003Cimg src=\"https:\u002F\u002Fwww.springboard.com\u002Fblog\u002Fwp-content\u002Fuploads\u002F2016\u002F03\u002F20160324_springboard_vennDiagram.png\" width=\"150\" height=\"150\" \u002F>](https:\u002F\u002Fwww.springboard.com\u002Fblog\u002Fwp-content\u002Fuploads\u002F2016\u002F03\u002F20160324_springboard_vennDiagram.png) | 来自[Springboard](https:\u002F\u002Fwww.springboard.com) 的不同数据科学技能与岗位角色                                                                                                                                                       |\n| [\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Facademic_awesome-datascience_readme_dc617a4aeade.jpg\" width=\"150\" alt=\"避免数据谬误\" \u002F>](https:\u002F\u002Fdata-literacy.geckoboard.com\u002Fposter\u002F)                                                 | 一种简单友好的方式，用于教导非数据科学家或非统计学家的同事[如何避免数据中的错误](https:\u002F\u002Fdata-literacy.geckoboard.com\u002Fposter\u002F)。出自 Geckoboard 的[数据素养课程](https:\u002F\u002Fdata-literacy.geckoboard.com\u002F)。 |\n\n### 数据集\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n- [Academic Torrents](https:\u002F\u002Facademictorrents.com\u002F)\n- [ADS-B Exchange](https:\u002F\u002Fwww.adsbexchange.com\u002Fdata-samples\u002F) - 针对飞机及自动相关监视广播（ADS-B）数据源的特定数据集。\n- [AI Displacement Tracker](https:\u002F\u002Fgithub.com\u002Fnoahaust2\u002Fai-displacement-tracker) - 结构化数据集，追踪92起与人工智能相关的裁员事件，涉及12个国家、11个行业的453,748名员工。提供JSON和CSV格式，采用CC-BY-4.0许可。\n- [hadoopilluminated.com](https:\u002F\u002Fhadoopilluminated.com\u002Fhadoop_illuminated\u002FPublic_Bigdata_Sets.html)\n- [data.gov](https:\u002F\u002Fcatalog.data.gov\u002Fdataset) - 美国政府开放数据的官方平台\n- [美国人口普查局](https:\u002F\u002Fwww.census.gov\u002F)\n- [enigma.com](https:\u002F\u002Fenigma.com\u002F) - 探索公共数据的世界——快速搜索并分析由政府、公司和组织发布的数十亿条公开记录。\n- [datahub.io](https:\u002F\u002Fdatahub.io\u002F)\n- [aws.amazon.com\u002Fdatasets](https:\u002F\u002Faws.amazon.com\u002Fdatasets\u002F)\n- [datacite.org](https:\u002F\u002Fdatacite.org\u002F)\n- [欧洲数据官方门户](https:\u002F\u002Fdata.europa.eu\u002Fen)\n- [NASDAQ:DATA](https:\u002F\u002Fdata.nasdaq.com\u002F) - Nasdaq Data Link，一流的金融、经济及另类数据源。\n- [国会股票大脑](https:\u002F\u002Fcongressionalstockbrain.com) - 免费的AI驱动工具，根据重要性对美国国会议员的《STOCK法案》交易披露进行评分。基于537位议员的公开交易申报生成机器评分信号。\n- [figshare.com](https:\u002F\u002Ffigshare.com\u002F)\n- [GeoLite Legacy 可下载数据库](https:\u002F\u002Fdev.maxmind.com\u002Fgeoip)\n- [Hugging Face 数据集](https:\u002F\u002Fhuggingface.co\u002Fdatasets)\n- [Quora关于大型数据集的回答](https:\u002F\u002Fwww.quora.com\u002FWhere-can-I-find-large-datasets-open-to-the-public)\n- [公共大数据集](https:\u002F\u002Fhadoopilluminated.com\u002Fhadoop_illuminated\u002FPublic_Bigdata_Sets.html)\n- [Kaggle 数据集](https:\u002F\u002Fwww.kaggle.com\u002Fdatasets)\n- [人类遗传变异深度目录](https:\u002F\u002Fwww.internationalgenome.org\u002Fdata)\n- [社区维护的知名人物、地点和事物数据库](https:\u002F\u002Fdevelopers.google.com\u002Ffreebase\u002F)\n- [谷歌公共数据](https:\u002F\u002Fwww.google.com\u002Fpublicdata\u002Fdirectory)\n- [世界银行数据](https:\u002F\u002Fdata.worldbank.org\u002F)\n- [纽约出租车数据](https:\u002F\u002Fchriswhong.github.io\u002Fnyctaxi\u002F)\n- [Open Data Philly](https:\u002F\u002Fwww.opendataphilly.org\u002F) 为费城市民连接数据资源\n- [grouplens.org](https:\u002F\u002Fgrouplens.org\u002Fdatasets\u002F) 提供电影（含评分）、书籍和维基数据集示例\n- [加州大学欧文分校机器学习存储库](https:\u002F\u002Farchive.ics.uci.edu\u002Fml\u002F) - 包含适合机器学习的数据集\n- [研究级数据集](https:\u002F\u002Fweb.archive.org\u002Fweb\u002F20150320022752\u002Fhttps:\u002F\u002Fbitly.com\u002Fbundles\u002Fhmason\u002F1) 由[Hilary Mason](https:\u002F\u002Fweb.archive.org\u002Fweb\u002F20150501033715\u002Fhttps:\u002F\u002Fbitly.com\u002Fu\u002Fhmason\u002Fbundles)整理\n- [国家环境信息中心](https:\u002F\u002Fwww.ncei.noaa.gov\u002F)\n- [ClimateData.us](https:\u002F\u002Fwww.climatedata.us\u002F)（相关：[美国气候韧性工具箱](https:\u002F\u002Ftoolkit.climate.gov\u002F))\n- [r\u002Fdatasets](https:\u002F\u002Fwww.reddit.com\u002Fr\u002Fdatasets\u002F)\n- [MapLight](https:\u002F\u002Fwww.maplight.org\u002Fdata-series) - 为公众免费提供多种数据集。点击下方数据集了解更多详情。\n- [GHDx](https:\u002F\u002Fghdx.healthdata.org\u002F) - 健康指标与评估研究所——全球健康与人口统计数据目录，包含IHME的研究成果\n- [圣路易斯联邦储备经济数据 - FRED](https:\u002F\u002Ffred.stlouisfed.org\u002F)\n- [新西兰经济研究所 – Data1850](https:\u002F\u002Fdata1850.nz\u002F)\n- [开放数据源](https:\u002F\u002Fgithub.com\u002Fdatasciencemasters\u002Fdata)\n- [联合国儿童基金会数据](https:\u002F\u002Fdata.unicef.org\u002F)\n- [undata](https:\u002F\u002Fdata.un.org\u002F)\n- [NASA社会经济数据与应用中心 - SEDAC](https:\u002F\u002Fearthdata.nasa.gov\u002Fcenters\u002Fsedac-daac)\n- [GDELT项目](https:\u002F\u002Fwww.gdeltproject.org\u002F)\n- [瑞典统计局](https:\u002F\u002Fwww.scb.se\u002Fen\u002F)\n- [StackExchange 数据探索器](https:\u002F\u002Fdata.stackexchange.com) - 开源工具，用于对Stack Exchange网络中的公开数据运行任意查询。\n- [旧金山政府开放数据](https:\u002F\u002Fdatasf.org\u002Fopendata\u002F)\n- [IBM资产数据集](https:\u002F\u002Fdeveloper.ibm.com\u002Fexchanges\u002Fdata\u002F)\n- [开放数据指数](http:\u002F\u002Findex.okfn.org\u002F)\n- [公共Git档案](https:\u002F\u002Fgithub.com\u002Fsrc-d\u002Fdatasets\u002Ftree\u002Fmaster\u002FPublicGitArchive)\n- [GHTorrent](https:\u002F\u002Fghtorrent.org\u002F)\n- [微软研究院开放数据](https:\u002F\u002Fmsropendata.com\u002F)\n- [印度开放政府数据平台](https:\u002F\u002Fdata.gov.in\u002F)\n- [谷歌数据集搜索（测试版）](https:\u002F\u002Fdatasetsearch.research.google.com\u002F)\n- [NAYN.CO土耳其新闻分类数据](https:\u002F\u002Fgithub.com\u002Fnaynco\u002Fnayn.data)\n- [新冠疫情数据](https:\u002F\u002Fgithub.com\u002Fdatasets\u002Fcovid-19)\n- [谷歌新冠疫情开放数据](https:\u002F\u002Fgithub.com\u002Fgoogle-research\u002Fopen-covid-19-data)\n- [安然邮件数据集](https:\u002F\u002Fwww.cs.cmu.edu\u002F~.\u002Fenron\u002F)\n- [5000张服装图片](https:\u002F\u002Fgithub.com\u002Falexeygrigorev\u002Fclothing-dataset)\n- [IBB开放门户](https:\u002F\u002Fdata.ibb.gov.tr\u002Fen\u002F)\n- [人道主义数据交换平台](https:\u002F\u002Fdata.humdata.org\u002F)\n- [25万+份职位招聘信息](https:\u002F\u002Faws.amazon.com\u002Fmarketplace\u002Fpp\u002Fprodview-p2554p3tczbes) - 一份不断扩充的历史职位数据集，涵盖2020年至今的卢森堡地区招聘信息。AWS数据交换平台上托管了超过25万份职位信息，可免费获取。\n- [FinancialData.Net](https:\u002F\u002Ffinancialdata.net\u002Fdocumentation) - 金融数据集（股市数据、财务报表、可持续发展数据等）。\n- [谷歌数据集搜索](https:\u002F\u002Fdatasetsearch.research.google.com\u002F) – 在全网查找各类数据集。\n- [notesjor语料库集合](https:\u002F\u002Fnotes.jan-oliver-ruediger.de\u002Fkorpora\u002F) - 免费语料库（超过60亿词素），以德语为主，涵盖历史与现代德语。\n- [CLARIN存储库](https:\u002F\u002Flindat.mff.cuni.cz\u002Frepository\u002Fhome) - CLARIN是欧洲的科学数据存储库。\n- [GBIF](https:\u002F\u002Fwww.gbif.org\u002F) - 全球生物多样性信息设施：24亿+物种出现记录。免费开放API，适用于生态建模和机器学习研究。\n- [FAOSTAT](https:\u002F\u002Fwww.fao.org\u002Ffaostat\u002Fen\u002F) - 联合国粮农组织关于食品生产、贸易、土地利用和排放的统计数据，覆盖245多个国家。提供免费API和批量下载功能。\n- [FirstData](https:\u002F\u002Fgithub.com\u002FMLT-OSS\u002FFirstData) - 全球最全面的权威数据源知识库。收录来自各国政府、国际组织和研究机构的210余种精选来源。支持MCP集成，适用于AI智能体。采用MIT许可证。\n- [latamdata-py](https:\u002F\u002Fgithub.com\u002Fjuanmoisesd\u002Flatamdata-py) - 一个Python软件包，可一键访问来自拉丁美洲的38个开放研究数据集（健康、神经科学、心理健康、经济学等）。使用pip install latamdata-py即可安装。\n- [ZipCheckup](https:\u002F\u002Fgithub.com\u002Fartakulov\u002Fus-water-quality-data) - 面向美国42,000多个邮政编码区的免费ZIP级别环境安全数据：水质、空气质量、PFAS污染、氡气、铅含量、洪水风险等11个领域。提供公共REST API、npm\u002FPyPI软件包，并采用CC BY 4.0许可。\n\n### 漫画\n**[`^        返回顶部        ^`](#awesome-data-science)**\n\n- [漫画合集](https:\u002F\u002Fmedium.com\u002F@nikhil_garg\u002Fa-compilation-of-comics-explaining-statistics-data-science-and-machine-learning-eeefbae91277)\n- [卡通](https:\u002F\u002Fwww.kdnuggets.com\u002Fwebsites\u002Fcartoons.html)\n- [数据科学卡通](https:\u002F\u002Fwww.cartoonstock.com\u002Fdirectory\u002Fd\u002Fdata_science.asp)\n- [数据科学：XKCD版](https:\u002F\u002Fdavidlindelof.com\u002Fdata-science-the-xkcd-edition\u002F)\n\n## 其他超赞列表\n\n- 更多令人惊叹的列表可以在 [awesome-awesomeness](https:\u002F\u002Fgithub.com\u002Fbayandin\u002Fawesome-awesomeness) 中找到。\n- [超赞机器学习](https:\u002F\u002Fgithub.com\u002Fjosephmisiti\u002Fawesome-machine-learning)\n- [列表](https:\u002F\u002Fgithub.com\u002Fjnv\u002Flists)\n- [超赞数据可视化](https:\u002F\u002Fgithub.com\u002Fjavierluraschi\u002Fawesome-dataviz)\n- [超赞 Python](https:\u002F\u002Fgithub.com\u002Fvinta\u002Fawesome-python)\n- [数据科学 IPython 笔记本。](https:\u002F\u002Fgithub.com\u002Fdonnemartin\u002Fdata-science-ipython-notebooks)\n- [超赞 R](https:\u002F\u002Fgithub.com\u002Fqinwf\u002Fawesome-R)\n- [超赞数据集](https:\u002F\u002Fgithub.com\u002Fawesomedata\u002Fawesome-public-datasets)\n- [超赞机器学习与深度学习教程](https:\u002F\u002Fgithub.com\u002Fujjwalkarn\u002FMachine-Learning-Tutorials\u002Fblob\u002Fmaster\u002FREADME.md)\n- [超赞数据科学创意](https:\u002F\u002Fgithub.com\u002FJosPolfliet\u002Fawesome-ai-usecases)\n- [面向软件工程师的机器学习](https:\u002F\u002Fgithub.com\u002FZuzooVn\u002Fmachine-learning-for-software-engineers)\n- [社区精选的数据科学资源](https:\u002F\u002Fhackr.io\u002Ftutorials\u002Flearn-data-science)\n- [源代码上的超赞机器学习](https:\u002F\u002Fgithub.com\u002Fsrc-d\u002Fawesome-machine-learning-on-source-code)\n- [超赞社区发现](https:\u002F\u002Fgithub.com\u002Fbenedekrozemberczki\u002Fawesome-community-detection)\n- [超赞图分类](https:\u002F\u002Fgithub.com\u002Fbenedekrozemberczki\u002Fawesome-graph-classification)\n- [超赞决策树论文](https:\u002F\u002Fgithub.com\u002Fbenedekrozemberczki\u002Fawesome-decision-tree-papers)\n- [超赞欺诈检测论文](https:\u002F\u002Fgithub.com\u002Fbenedekrozemberczki\u002Fawesome-fraud-detection-papers)\n- [超赞梯度提升论文](https:\u002F\u002Fgithub.com\u002Fbenedekrozemberczki\u002Fawesome-gradient-boosting-papers)\n- [超赞计算机视觉模型](https:\u002F\u002Fgithub.com\u002Fnerox8664\u002Fawesome-computer-vision-models)\n- [超赞蒙特卡洛树搜索](https:\u002F\u002Fgithub.com\u002Fbenedekrozemberczki\u002Fawesome-monte-carlo-tree-search-papers)\n- [常见统计与机器学习术语表](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fglossary-of-common-statistics-and-machine-learning-terms\u002F)\n- [100 篇自然语言处理论文](https:\u002F\u002Fgithub.com\u002Fmhagiwara\u002F100-nlp-papers)\n- [超赞游戏数据集](https:\u002F\u002Fgithub.com\u002Fleomaurodesenv\u002Fgame-datasets#readme)\n- [数据科学面试题](https:\u002F\u002Fgithub.com\u002Falexeygrigorev\u002Fdata-science-interviews)\n- [超赞可解释图推理](https:\u002F\u002Fgithub.com\u002FAstraZeneca\u002Fawesome-explainable-graph-reasoning)\n- [顶级数据科学面试题](https:\u002F\u002Fwww.interviewbit.com\u002Fdata-science-interview-questions\u002F)\n- [超赞药物协同、相互作用及多药联用预测](https:\u002F\u002Fgithub.com\u002FAstraZeneca\u002Fawesome-drug-pair-scoring)\n- [深度学习面试题](https:\u002F\u002Fwww.adaface.com\u002Fblog\u002Fdeep-learning-interview-questions\u002F)\n- [2023 年数据科学未来趋势](https:\u002F\u002Fmedium.com\u002Fthe-modern-scientist\u002Ftop-future-trends-in-data-science-in-2023-3e616c8998b8)\n- [生成式 AI 如何改变创意工作](https:\u002F\u002Fhbr.org\u002F2022\u002F11\u002Fhow-generative-ai-is-changing-creative-work)\n- [什么是生成式 AI？](https:\u002F\u002Fwww.techtarget.com\u002Fsearchenterpriseai\u002Fdefinition\u002Fgenerative-AI)\n- [100 多道机器学习面试题（从入门到高级）](https:\u002F\u002Fwww.appliedaicourse.com\u002Fblog\u002Fmachine-learning-interview-questions\u002F)\n- [数据科学项目](https:\u002F\u002Fgithub.com\u002Fveb-101\u002FData-Science-Projects)\n- [数据科学是好职业吗？](https:\u002F\u002Fwww.scaler.com\u002Fblog\u002Fis-data-science-a-good-career\u002F)\n- [数据科学的未来：预测与趋势](https:\u002F\u002Fwww.appliedaicourse.com\u002Fblog\u002Ffuture-of-data-science\u002F)\n- [数据科学与机器学习：有什么区别？](https:\u002F\u002Fwww.appliedaicourse.com\u002Fblog\u002Fdata-science-and-machine-learning-whats-the-difference\u002F)\n- [数据科学中的 AI：用途、角色和工具](https:\u002F\u002Fwww.scaler.com\u002Fblog\u002Fai-in-data-science\u002F)\n- [顶级 13 种数据科学编程语言](https:\u002F\u002Fwww.appliedaicourse.com\u002Fblog\u002Fdata-science-programming-languages\u002F)\n- [40 多个数据分析项目创意](https:\u002F\u002Fwww.appliedaicourse.com\u002Fblog\u002Fdata-analytics-projects-ideas\u002F)\n- [最佳带证书的数据科学课程](https:\u002F\u002Fwww.appliedaicourse.com\u002Fblog\u002Fbest-data-science-courses\u002F)\n- [生成式 AI 模型](https:\u002F\u002Fwww.appliedaicourse.com\u002Fblog\u002Fgenerative-ai-models\u002F)\n- [超赞数据分析](https:\u002F\u002Fgithub.com\u002FPavelGrigoryevDS\u002Fawesome-data-analysis) - 一个精心挑选的数据分析工具、库和资源列表。\n- [超赞证据综合](https:\u002F\u002Fgithub.com\u002Fevidencesynthesis-tools\u002Fawesome-evidence-synthesis) - 一个精选的开源工具列表，用于系统综述、荟萃分析和证据综合。\n\n\n### 爱好\n- [超赞音乐制作](https:\u002F\u002Fgithub.com\u002Fad-si\u002Fawesome-music-production)","# Awesome Data Science 快速上手指南\n\n`awesome-datascience` 并非一个可直接安装的软件包，而是一个 curated（精选）的开源资源列表，旨在为数据科学初学者和从业者提供学习路径、工具库、教程及社区资源。本指南将帮助你利用该仓库构建本地数据科学开发环境，并开启学习之旅。\n\n## 环境准备\n\n在开始之前，请确保你的系统满足以下要求：\n\n*   **操作系统**：Windows, macOS 或 Linux (推荐 Ubuntu\u002FCentOS)。\n*   **编程语言**：推荐安装 **Python 3.8+** (数据科学领域最主流语言)，也可选择 R。\n*   **包管理器**：\n    *   **pip**: Python 自带。\n    *   **Anaconda\u002FMiniconda** (强烈推荐): 用于管理复杂的科学计算依赖，避免环境冲突。\n*   **开发工具**：Git (用于克隆仓库), Jupyter Notebook\u002FLab 或 VS Code。\n\n### 国内加速方案\n由于网络原因，建议配置国内镜像源以提升下载速度：\n*   **PyPI 镜像**: 清华大学 (`https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple`) 或 阿里云 (`https:\u002F\u002Fmirrors.aliyun.com\u002Fpypi\u002Fsimple\u002F`)\n*   **Conda 镜像**: 清华大学 (`https:\u002F\u002Fmirrors.tuna.tsinghua.edu.cn\u002Fanaconda\u002Fpkgs\u002Fmain\u002F`)\n\n## 安装步骤\n\n### 1. 获取资源列表\n克隆 `awesome-datascience` 仓库到本地，以便随时查阅分类资源。\n\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience.git\ncd awesome-datascience\n```\n\n### 2. 构建基础数据科学环境\n根据仓库推荐的 \"The Data Science Toolbox\"，使用 Conda 创建一个隔离环境并安装核心库（Pandas, NumPy, Scikit-Learn, Matplotlib, Seaborn）。\n\n**使用 Conda (推荐):**\n```bash\n# 创建名为 ds-env 的环境，指定 Python 版本\nconda create -n ds-env python=3.9\n\n# 激活环境\nconda activate ds-env\n\n# 配置清华镜像源 (可选，但推荐国内用户执行)\nconda config --add channels https:\u002F\u002Fmirrors.tuna.tsinghua.edu.cn\u002Fanaconda\u002Fpkgs\u002Fmain\u002F\nconda config --set show_channel_urls yes\n\n# 安装核心数据科学栈\nconda install pandas numpy scikit-learn matplotlib seaborn jupyterlab\n```\n\n**或使用 Pip:**\n```bash\npython -m pip install --upgrade pip\npip install pandas numpy scikit-learn matplotlib seaborn jupyterlab -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n```\n\n## 基本使用\n\n安装完成后，你可以立即开始实践仓库中提到的基础项目。以下是一个基于 `Scikit-Learn` 和 `Pandas` 的最小化示例，演示数据加载、预处理及模型训练流程。\n\n### 启动 Jupyter Lab\n```bash\njupyter lab\n```\n\n### 代码示例：泰坦尼克号生存预测简化版\n在新建的 Notebook 中运行以下代码：\n\n```python\nimport pandas as pd\nimport numpy as np\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import accuracy_score\n\n# 1. 构造简易数据集 (实际使用中可从 Kaggle 下载完整 CSV)\ndata = {\n    'Age': [22, 38, 26, 35, 28, 45, 31, 19],\n    'Fare': [7.25, 71.28, 7.92, 53.10, 8.45, 13.00, 27.72, 7.89],\n    'Sex': ['male', 'female', 'female', 'female', 'male', 'male', 'female', 'male'],\n    'Survived': [0, 1, 1, 1, 0, 0, 1, 0]\n}\ndf = pd.DataFrame(data)\n\n# 2. 数据预处理\n# 将性别转换为数值 (male=0, female=1)\ndf['Sex'] = df['Sex'].map({'male': 0, 'female': 1})\n\n# 特征与标签分离\nX = df[['Age', 'Fare', 'Sex']]\ny = df['Survived']\n\n# 处理缺失值 (填充平均值)\nX = X.fillna(X.mean())\n\n# 3. 划分训练集和测试集\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n\n# 4. 训练模型 (随机森林)\nmodel = RandomForestClassifier(n_estimators=100, random_state=42)\nmodel.fit(X_train, y_train)\n\n# 5. 评估模型\npredictions = model.predict(X_test)\naccuracy = accuracy_score(y_test, predictions)\n\nprint(f\"模型准确率：{accuracy:.2f}\")\nprint(\"恭喜！你已完成第一个数据科学工作流程。\")\n```\n\n### 下一步学习\n参考本地 `awesome-datascience\u002FREADME.md` 文件中的目录结构：\n*   查看 **Training Resources** 章节寻找免费课程。\n*   前往 **Datasets** 章节寻找真实数据进行练习。\n*   探索 **Algorithms** 章节深入学习监督学习与非监督学习算法。","某初创公司的数据分析师李明刚转岗负责构建用户流失预测模型，面对海量且分散的技术资源感到无从下手。\n\n### 没有 awesome-datascience 时\n- **学习路径迷茫**：在知乎、谷歌和各类博客间反复跳转，花费数天仍无法理清“数据科学到底该学什么”的核心知识体系。\n- **工具选型困难**：面对 PyTorch、TensorFlow 等众多深度学习框架及评估工具，缺乏权威的对比指南，担心选错技术栈导致后期重构。\n- **资源质量参差不齐**：找到的免费课程和教程大多过时或缺乏实战案例，难以直接应用于解决公司真实的业务问题。\n- **社区融入缓慢**：不知道有哪些活跃的 Slack 群组或 GitHub 组织，遇到报错只能独自摸索，严重拖慢项目进度。\n\n### 使用 awesome-datascience 后\n- **路线清晰明确**：直接参照\"Where do I Start\"和分类详细的目录，快速建立起从基础算法到高级架构的系统化学习地图。\n- **高效决策工具**：利用\"The Data Science Toolbox\"中整理好的生态列表（如 Evidently AI 用于模型监控），迅速锁定最适合当前场景的开源库。\n- **实战资源直达**：通过精选的 MOOCs、书籍和真实数据集链接，直接获取经过验证的高质量内容，将理论快速转化为代码实现。\n- **融入专业圈子**：一键访问推荐的社交媒体账号和竞赛平台，迅速连接行业专家，在遇到瓶颈时能及时获得社区支持。\n\nawesome-datascience 将原本需要数周的信息搜集与筛选工作压缩至几小时，让数据科学家能专注于解决真正的业务难题而非寻找入门钥匙。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Facademic_awesome-datascience_3237deef.jpg","academic","Academic.io","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Facademic_b9a42655.png","Open-Source applications and services for academic people, organizations and countries.",null,"hi@academic.io","https:\u002F\u002Facademic.io","https:\u002F\u002Fgithub.com\u002Facademic",28785,6447,"2026-04-07T23:10:38","MIT",1,"","未说明",{"notes":88,"python":89,"dependencies":90},"该项目是一个数据科学资源列表（Awesome List），而非单一的可执行软件工具，因此没有特定的运行环境、GPU 或内存硬性要求。文中建议初学者使用 Python 语言，并通过 Anaconda 或 Pip 安装 Scikit-Learn、Pandas、Numpy 和 Seaborn 等核心库开始学习。具体的环境需求取决于用户选择使用的列表中提到的某个特定子项目或算法。","未说明 (文中提及 Python 是主流语言，推荐使用 Anaconda 或 Pip 安装)",[91,92,93,94,95],"Scikit-Learn","Pandas","Numpy","Seaborn","Matplotlib",[97,14,16],"其他",[99,100,101,102,103,104,105,106,107,108],"data-science","machine-learning","data-visualization","science","data-mining","awesome-list","deep-learning","analytics","data-scientists","hacktoberfest","2026-03-27T02:49:30.150509","2026-04-08T12:11:30.956566",[112,117,122,127,132,137,142,147],{"id":113,"question_zh":114,"answer_zh":115,"source_url":116},24269,"如何向列表贡献内容或建议新工具（如 Deepnote）？","维护者建议直接提交 Pull Request (PR)。如果您想添加新工具或资源，请先在 Issue 中提出建议，待维护者确认后，请创建相应的 PR 来合并更改。例如，有用户建议添加 Deepnote 后，维护者回复：'请为此提交一个 PR'，随后用户创建了 PR #499。","https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fissues\u002F497",{"id":118,"question_zh":119,"answer_zh":120,"source_url":121},24270,"列表中的内容（如 MOOCs、数据集等链接）是否按字母顺序排序？","为了更有条理，社区建议将各部分下的链接按字母顺序排列。维护者欢迎社区贡献者对此进行整理。您可以分类地重新排列每个部分下的内容，并提交 PR。维护者已确认同意此类重构：'当然可以'。","https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fissues\u002F190",{"id":123,"question_zh":124,"answer_zh":125,"source_url":126},24271,"可以在列表中推广我自己的数据科学仓库或课程吗？","可以请求添加，但必须严格遵守版权规定。维护者明确指出：'你不应该分享任何受版权保护的材料'。如果您的仓库包含来自 Codecademy 或 Google 等平台的材料，必须确保没有侵犯版权，否则不会被接受。","https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fissues\u002F207",{"id":128,"question_zh":129,"answer_zh":130,"source_url":131},24272,"是否有专门的数据科学家工具箱或环境配置列表？","是的，项目中已经合并了数据科学家环境工具的列表。贡献者已将相关项目添加到列表中，并经过维护者审核合并。您可以查看主仓库中关于 'Toolbox' 或环境配置的相关章节获取详细信息。","https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fissues\u002F2",{"id":133,"question_zh":134,"answer_zh":135,"source_url":136},24273,"哪里可以找到推荐的数据科学相关的 Twitter 账号？","项目已专门建立了一个文件来收录推荐的 Twitter 账号。该文件名为 `dataSci_twitter_accounts.md`，其中包含了经过筛选和添加的账号列表，您可以直接在仓库中查看该文件。","https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fissues\u002F4",{"id":138,"question_zh":139,"answer_zh":140,"source_url":141},24274,"是否有推荐的数据科学博客或博主列表？","有的，博客文件（Blogger file）已经更新。贡献者添加了带有描述的新链接，并已获得维护者确认。您可以查阅仓库中关于博客推荐的部分，那里列出了经过审核的优质数据科学博客。","https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fissues\u002F6",{"id":143,"question_zh":144,"answer_zh":145,"source_url":146},24275,"是否有数据科学相关的 Facebook 群组推荐？","是的，一些 Facebook 群组已经被添加到了列表中。贡献者已将多个群组信息合并，并得到了维护者的批准。您可以在相关章节找到这些社群链接。","https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fissues\u002F5",{"id":148,"question_zh":149,"answer_zh":150,"source_url":151},24276,"项目是否提供邮件列表（Mail Lists）供社区交流？","目前由于足够的邮件列表数量不足以单独创建一个页面，因此暂未设立专门的邮件列表章节。不过，维护者表示可以将少量的电子邮件组直接添加到主 README 文件中，或者未来考虑为社区创建邮件列表。","https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fissues\u002F3",[153,158,163,168,173,178,183,188,193,198,203,208,213,218,223,228,233,238,243],{"id":154,"version":155,"summary_zh":156,"released_at":157},149006,"v2026.04.01.1","## 变更内容\n* @CarlosAlbertoFurtado 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F542 中将 Polars 添加到“其他工具”部分。\n* @onestardao 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F543 中添加了 WFGY ProblemMap——针对 LLM 和 RAG 流程的 16 种失败模式。\n* @TerryFYL 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F545 中将 MetaReview 添加到“可视化工具”部分。\n* @anki-code 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F546 中将 xonsh shell 添加到工具列表。\n* @CarlosAlbertoFurtado 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F548 中添加了 DuckDB 和 Metabase。\n* @noahaust2 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F547 中添加了 AI Displacement Tracker 数据集。\n* @shmlkv 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F549 中将 dna-claude-analysis 添加到“其他工具”部分。\n* @stuckvgn 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F550 中添加了 GBIF 和 FAOSTAT 开放数据集。\n* @nicalevras 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F552 中添加了 Deploybase。\n* @Robocular 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F553 中将 Frostbyte MCP 添加到“智能体 > 工具”部分。\n* @danielnichiata96 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F554 中将 CiteMe 添加到“其他工具”部分。\n* @firstdata-dev 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F557 中将 FirstData 添加到“数据集”部分。\n* @Oshgig 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F555 中修复：更新 GitHub Actions 并修复已弃用的 CDN 链接。\n* @connerlambden 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F544 中将 BGPT MCP 添加到“智能体 > 研究与知识检索”部分。\n* @Godswill-code 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F556 中修复：更新 CI 配置并移除分析脚本。\n* @Deesmo 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F558 中将 Arch Tools 添加到“工具”部分。\n* @zhangshaolei1998 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F559 中将 DeepAnalyze 添加到 README 的工具列表。\n* @aurumz-rgb 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F567 中将 Awesome Evidence Synthesis 添加到“其他 Awesome 列表”。\n* @alexisdufresne 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F568 中将 AI Weekly 添加到新闻简报\u002F资源列表。\n* @Diyago 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F569 中添加了 TabGAN——基于 GAN、扩散模型和 LLM 的合成表格数据生成工具。\n* @conorbronsdon 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F570 中添加了 Chain of Thought 播客。\n* @juanmoisesd 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F573 中添加了 latamdata-py——用于拉丁美洲研究数据集的 Python 包。\n* @Nadeus 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F571 中添加了 Techpresso 新闻简报。\n\n## 新贡献者\n* @CarlosAlbertoFurtado 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F542 中完成了首次贡献。\n* @onestardao 完成了首次 c","2026-04-01T11:11:33",{"id":159,"version":160,"summary_zh":161,"released_at":162},149007,"v2026.02.03.1","## 变更内容\n* @tiagomonteiro0715 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F540 中将《人工智能背后的数学》添加到“书籍”部分\n\n## 新贡献者\n* @tiagomonteiro0715 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F540 中完成了首次贡献\n\n**完整变更日志**: https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fcompare\u002Fv2026.01.24.1...v2026.02.03.1","2026-02-03T18:13:05",{"id":164,"version":165,"summary_zh":166,"released_at":167},149008,"v2026.01.24.1","## 变更内容\n* 由 @notesjor 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F537 中向 README.md 添加了新的资源和链接\n\n**完整变更日志**: https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fcompare\u002Fv2025.12.31.1...v2026.01.24.1","2026-01-23T23:00:33",{"id":169,"version":170,"summary_zh":171,"released_at":172},149009,"v2025.12.31.1","\u003Cimg width=\"1024\" height=\"1024\" alt=\"图片\" src=\"https:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Fassets\u002Fae2242d8-09ec-4714-846b-aa5edf9d3d1b\" \u002F>\n\n\n## 变更内容\n* @paulbkoch 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F526 中将 InterpretML 添加到机器学习工具列表。\n* @amarjithanand 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F527 中更新了 README，加入了现代数据挖掘算法。\n* @chernishev 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F528 中将 Desbordante 添加到机器学习工具列表。\n* @finrunsfar 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F530 中将 RunMat 添加到杂项工具部分。\n* @jkmaina 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F532 中将 ADK-Rust 添加到智能体框架部分。\n* @manasmudbari 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F534 中将 Turbostream 添加到杂项工具列表。\n* @23f2004661 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F531 中在 README 文件中添加了一本新书及其链接。\n* @kamakshipal1-tech 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F533 中修复了 README 的格式和拼写错误。\n* @hissain 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F535 中将 jSciPy 添加到通用机器学习软件包部分。\n\n## 新贡献者\n* @paulbkoch 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F526 中完成了首次贡献。\n* @amarjithanand 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F527 中完成了首次贡献。\n* @chernishev 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F528 中完成了首次贡献。\n* @finrunsfar 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F530 中完成了首次贡献。\n* @jkmaina 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F532 中完成了首次贡献。\n* @manasmudbari 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F534 中完成了首次贡献。\n* @23f2004661 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F531 中完成了首次贡献。\n* @kamakshipal1-tech 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F533 中完成了首次贡献。\n* @hissain 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F535 中完成了首次贡献。\n\n**完整变更日志**: https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fcompare\u002Fv2025.11.03.1...v2025.12.31.1","2025-12-31T14:21:11",{"id":174,"version":175,"summary_zh":176,"released_at":177},149010,"v2025.11.03.1","## 变更内容\n* 在“数据集”部分添加 Hugging Face 数据集，由 @Aaditya-Chunekar 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F516 中完成\n* hacktoberfest：在 README 中添加 DataCamp 备忘单链接，由 @Aaditya-Chunekar 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F517 中完成\n* great learning，由 @rahulguggilam 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F519 中完成\n* Code acadamey 课程，由 @rahulguggilam 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F520 中完成\n* 添加 mlu 网站，由 @rahulguggilam 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F521 中完成\n* 在 README 中添加 CampusX YouTube 频道链接，由 @shail-patel-321 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F522 中完成\n* 添加 evidently-ai，由 @harshverma4028 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F509 中完成\n* 在 MOOCs 部分添加 Google 高级数据分析证书资源链接，由 @mayenbk 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F523 中完成\n* 将 WorldQuant University 添加到强化项目列表中，由 @Aaditya-Chunekar 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F524 中完成\n\n## 新贡献者\n* @Aaditya-Chunekar 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F516 中完成了首次贡献\n* @rahulguggilam 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F519 中完成了首次贡献\n* @shail-patel-321 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F522 中完成了首次贡献\n* @harshverma4028 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F509 中完成了首次贡献\n* @mayenbk 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F523 中完成了首次贡献\n\n**完整变更日志**：https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fcompare\u002Fv2025.10.21.1...v2025.11.03.1","2025-11-03T10:35:48",{"id":179,"version":180,"summary_zh":181,"released_at":182},149011,"v2025.10.21.1","## 变更内容\n* 在 @stjepanjurekovic 的贡献下，将《Grokking Bayes》添加到书籍列表中，详情见：https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F500\n* 在 @vitoriapena 的贡献下，将《Machine Learning Q and AI》添加到书籍列表中，详情见：https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F501\n* 在 @fakturk 的贡献下，新增了 Hugging Face 相关条目，详情见：https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F506\n* 在 @fakturk 的贡献下，新增了 Google 数据集搜索工具，详情见：https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F504\n* 在 @fakturk 的贡献下，清理了重复条目，详情见：https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F505\n* 在 @fakturk 的贡献下，修复了 t-SNE 的失效链接，详情见：https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F507\n* 在 @fakturk 的贡献下，移除了 https:\u002F\u002Fusgovxml.com\u002F，详情见：https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F503\n* 在 @fakturk 的贡献下，移除了 https:\u002F\u002Fspenczar.com\u002F，详情见：https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F502\n* 在 @Jvictorvieira 的贡献下，向 README 添加了 Channel Serrano.Academy 的链接，详情见：https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F508\n* 在 @hans-r7 的贡献下，添加了更多数据科学相关资源的链接，详情见：https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F510\n* 在 @anonym-g 的贡献下，新增了“Chinese-Elite”条目，详情见：https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F511\n* 在 @PavelGrigoryevDS 的贡献下，将“Awesome Data Analysis”仓库添加到“其他 Awesome 列表”部分，详情见：https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F512\n\n## 新贡献者\n* @vitoriapena 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F501 中完成了首次贡献\n* @Jvictorvieira 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F508 中完成了首次贡献\n* @hans-r7 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F510 中完成了首次贡献\n* @anonym-g 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F511 中完成了首次贡献\n* @PavelGrigoryevDS 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F512 中完成了首次贡献\n\n**完整变更日志**：https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fcompare\u002Fv2025.09.13.1...v2025.10.21.1","2025-10-21T14:54:08",{"id":184,"version":185,"summary_zh":186,"released_at":187},149012,"v2025.09.13.1","## 变更内容\n* 由 @financialdatanet 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F493 中将 FinancialData.Net 添加到数据集列表中\n* 由 @somya-codeflash 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F494 中添加 Codeflash\n\n## 新贡献者\n* @financialdatanet 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F493 中完成了首次贡献\n* @somya-codeflash 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F494 中完成了首次贡献\n\n**完整变更日志**: https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fcompare\u002Fv2025.08.18.1...v2025.09.13.1","2025-09-12T20:59:20",{"id":189,"version":190,"summary_zh":191,"released_at":192},149013,"v2025.08.18.1","## 变更内容\n* @Paras-96 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F470 中新增了一项实用资源\n* @Paras-96 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F471 中新增了两项实用资源\n* @Paras-96 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F472 中新增了两项实用资源\n* 新增数据集：“25万+ 招聘信息”——由 @joergrech 提供，链接为 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F480\n* @Paras-96 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F478 中新增了一项实用资源\n* @deadsoul44 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F484 中添加了 PerpetualBooster\n* @Paras-96 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F483 中新增了一项实用资源\n* @rcap107 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F489 中添加了 skrub 包\n* 更新 README.md——新增用于智能体仿真、评估和可观测性的工具，由 @akmadan 提供，链接为 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F490\n* @Paras-96 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F488 中新增了三项实用资源\n* @solegalli 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F492 中将某库添加到通用机器学习列表\n\n## 新贡献者\n* @joergrech 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F480 中完成了首次贡献\n* @deadsoul44 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F484 中完成了首次贡献\n* @rcap107 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F489 中完成了首次贡献\n* @akmadan 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F490 中完成了首次贡献\n* @solegalli 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F492 中完成了首次贡献\n\n**完整变更日志**：https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fcompare\u002Fv2025.01.02.1...v2025.08.18.1","2025-08-18T19:20:24",{"id":194,"version":195,"summary_zh":196,"released_at":197},149014,"v2025.01.02.1","## 变更内容\n* @anmorgan24 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F446 中添加了 Opik\n* @sjtushi 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F450 中将数据科学技能树添加到 MOOCs 部分\n* @kevin1kevin1k 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F451 中添加了机器学习相关包和杂项工具\n* @zilto 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F453 中添加了 Hamilton\n* @Paras-96 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F452 中添加了一条实用资源\n* @rashi07dashore 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F454 中添加了一条新的重要资源\n* 关于 Java 中的数据科学——@rashi07dashore 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F455 中添加了新资源\n* @makifdb 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F456 中将“数据分析工程播客”添加到播客部分\n* @makifdb 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F457 中添加了新的数据科学漫画资源\n* @sailxjx 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F460 中添加了两条 MOOC 资源\n* @Paras-96 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F463 中添加了两条实用资源\n* @Paras-96 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F465 中添加了一条实用资源\n* @fakturk 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F467 中检查并修复了已移除或更改的链接\n* @JasonnnW3000 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F468 中更新了 LICENSE 文件，并修正了版权许可年份\n\n## 新贡献者\n* @sjtushi 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F450 中完成了首次贡献\n* @kevin1kevin1k 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F451 中完成了首次贡献\n* @zilto 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F453 中完成了首次贡献\n* @rashi07dashore 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F454 中完成了首次贡献\n* @makifdb 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F456 中完成了首次贡献\n* @sailxjx 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F460 中完成了首次贡献\n* @JasonnnW3000 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F468 中完成了首次贡献\n\n**完整变更日志**: https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fcompare\u002Fv2024.08.29.1...v2025.01.02.1","2025-01-02T19:23:13",{"id":199,"version":200,"summary_zh":201,"released_at":202},149015,"v2024.08.29.1","## 变更内容\n* 书籍：《整合业务、数据与代码》（O'Reilly），作者为 @jviotti，详情见 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F444\n\n## 新贡献者\n* @jviotti 在 https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F444 中完成了首次贡献\n\n**完整变更日志**：https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fcompare\u002Fv2024.07.01.1...v2024.08.29.1","2024-08-29T14:27:06",{"id":204,"version":205,"summary_zh":206,"released_at":207},149016,"v2024.07.01.1","**Full Changelog**: https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fcompare\u002Fv2024.06.02.1...v2024.07.01.1","2024-07-01T11:26:42",{"id":209,"version":210,"summary_zh":211,"released_at":212},149017,"v2024.06.02.1","## What's Changed\r\n* Yolo models by @SannketNikam in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F427\r\n* added hum data by @rasitds in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F428\r\n* add generative ai by @alikvkli in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F429\r\n* List data visualization workflow tool by @mmore500 in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F430\r\n* Added Resseract Lite by @abistarun in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F431\r\n* Add free course by @anmorgan24 in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F435\r\n* Update README.md by @rraadd88 in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F437\r\n* Add free course by @anmorgan24 in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F441\r\n* Added one new resource by @Paras-96 in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F442\r\n\r\n## New Contributors\r\n* @rasitds made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F428\r\n* @alikvkli made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F429\r\n* @mmore500 made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F430\r\n* @abistarun made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F431\r\n* @rraadd88 made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F437\r\n* @Paras-96 made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F442\r\n\r\n**Full Changelog**: https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fcompare\u002Fv2023.10.31...v2024.06.02.1","2024-06-02T17:45:40",{"id":214,"version":215,"summary_zh":216,"released_at":217},149018,"v2023.10.31","## What's Changed\r\n* Update README.md by @mixeden in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F407\r\n* Update README.md by @akhil-maker in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F411\r\n* Update README.md by @akhil-maker in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F412\r\n* Update README.md by @Archit-Kohli in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F414\r\n* Added @WeAreINEVITABLE to Twitter Accounts by @Sean12697 in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F413\r\n* Fixed Some Grammatical mistakes in  README.md by @JanumalaAkhilendra in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F416\r\n* Updated README.md with clickable links for AdaBoost and Bagging by @RuchNas-Pottah in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F417\r\n* Rectified text formatting issue by @dellucifer in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F418\r\n* add clickable links for ID3 and C4.5 algorithms by @goyalpramod in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F419\r\n* Update README.md by @SannketNikam in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F420\r\n* Updated README.md by @15diksha in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F421\r\n* Updated README.md by @15diksha in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F422\r\n\r\n## New Contributors\r\n* @mixeden made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F407\r\n* @akhil-maker made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F411\r\n* @Archit-Kohli made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F414\r\n* @JanumalaAkhilendra made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F416\r\n* @RuchNas-Pottah made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F417\r\n* @dellucifer made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F418\r\n* @goyalpramod made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F419\r\n* @SannketNikam made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F420\r\n* @15diksha made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F421\r\n\r\n**Full Changelog**: https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fcompare\u002Fv2023.09.29...v2023.10.31","2023-10-31T06:59:31",{"id":219,"version":220,"summary_zh":221,"released_at":222},149019,"v2023.09.29","## What's Changed\r\n* Updated the repos. by @letscode-17 in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F394\r\n* Added Software Engineering for Data Scientists by @stjepanjurekovic in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F395\r\n* Added Julia for Data Science by @stjepanjurekovic in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F396\r\n* Added my udemy course and twitter address by @ilkerarslan in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F398\r\n* feat: Add arize-phoenix by @JackyxCS in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F399\r\n* 12 ready-to-solve Python Pandas projects by @martinzugnoni in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F401\r\n* Adding my blog by @santiagobasulto in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F400\r\n* fixed mistyping, removed extra ']' by @xaviduds in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F402\r\n* Update README.md by @bikashdaga in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F404\r\n* Fixed one formatting issue by @bikashdaga in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F405\r\n* Add comet to miscellaneous tools by @anmorgan24 in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F409\r\n* Adding Book: \"An Introduction to Statistical Learning, With Applications in Python (ISLP)\" by @manideepreddym in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F410\r\n* Link created for Python Introduction by @CarlosDeL3on in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F408\r\n\r\n## New Contributors\r\n* @letscode-17 made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F394\r\n* @ilkerarslan made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F398\r\n* @JackyxCS made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F399\r\n* @martinzugnoni made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F401\r\n* @santiagobasulto made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F400\r\n* @xaviduds made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F402\r\n* @bikashdaga made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F404\r\n* @anmorgan24 made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F409\r\n* @manideepreddym made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F410\r\n* @CarlosDeL3on made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F408\r\n\r\n**Full Changelog**: https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fcompare\u002Fv2023.03.13...v2023.09.29","2023-09-29T19:37:31",{"id":224,"version":225,"summary_zh":226,"released_at":227},149020,"v2023.04.3","## What's Changed\r\n* Added Regular Expression Puzzles and AI Coding Assistants by @stjepanjurekovic in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F387\r\n* Update README.md by @ugursaricam in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F391\r\n* Update README.md by @ugursaricam in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F392\r\n* Update README.md by @KeskinHakan in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F393\r\n\r\n## New Contributors\r\n* @ugursaricam made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F391\r\n* @KeskinHakan made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F393\r\n\r\n**Full Changelog**: https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fcompare\u002Fv2023.02.21...v2023.04.3","2023-04-03T19:22:28",{"id":229,"version":230,"summary_zh":231,"released_at":232},149021,"v2023.03.13","## What's Changed\r\n* Added Regular Expression Puzzles and AI Coding Assistants by @stjepanjurekovic in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F387\r\n* Update README.md by @ugursaricam in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F391\r\n* Update README.md by @ugursaricam in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F392\r\n* Update README.md by @KeskinHakan in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F393\r\n\r\n## New Contributors\r\n* @ugursaricam made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F391\r\n* @KeskinHakan made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F393\r\n\r\n**Full Changelog**: https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fcompare\u002Fv2023.02.21...v2023.03.13","2023-03-13T18:47:25",{"id":234,"version":235,"summary_zh":236,"released_at":237},149022,"v2023.02.21","## Real World\r\n\r\n*TÜRKIYE AND SYRIA EARTHQUAKES*\r\n\r\nThe devastating 7.7 magnitude earthquake which has killed thousands of people and injured many more in the region. [AYA: Açık Yazılım Ağı](https:\u002F\u002Flinktr.ee\u002Facikyazilimagi) (+25k developers) is trying to help disaster response using artificial intelligence. Everything is open-sourced [afet.org](https:\u002F\u002Fafet.org\u002F). Please contribute to solve problems of next disasters with open source software and data science.\r\n\r\n## What's Changed\r\n* Update README.md by @nerdyespresso in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F376\r\n* Add my favorite tools  by @jwmueller in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F378\r\n* Added a new link for Other Awesome Lists by @SooRya2 in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F380\r\n* add free course + cool new tool by @axiomofjoy in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F383\r\n* add W&B free course by @ekamioka in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F384\r\n* Add Aureo.io to tools by @bkawakami in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F385\r\n* add imodels by @csinva in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F386\r\n* tool made for developers by @shah743 in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F388\r\n\r\n## New Contributors\r\n* @nerdyespresso made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F376\r\n* @jwmueller made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F378\r\n* @SooRya2 made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F380\r\n* @axiomofjoy made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F383\r\n* @ekamioka made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F384\r\n* @bkawakami made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F385\r\n* @csinva made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F386\r\n* @shah743 made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F388\r\n\r\n**Full Changelog**: https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fcompare\u002Fv2022.12.01...v2023.02.21","2023-02-21T15:47:11",{"id":239,"version":240,"summary_zh":241,"released_at":242},149023,"v2022.12.01","## What's Changed\r\n* Update README.md by @iamkunalpitale in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F356\r\n* Updated the Readme by @sharmaanj200 in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F357\r\n* Add Elements of Statistical Learning book by @jrinder42 in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F358\r\n* Add Survival Analysis ML Package by @jrinder42 in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F359\r\n* Remove duplicate MOOC by @osinkolu in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F361\r\n* Added ADS-B Exchange by @ajaj895 in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F362\r\n* added data science book by @dawoodwasif in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F363\r\n* Update README.md by @fakturk in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F364\r\n* Update README.md by @fakturk in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F365\r\n* Update README.md by @fakturk in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F366\r\n* Update README.md by @ilketaha in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F367\r\n* Update README.md by @ilketaha in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F368\r\n* Update README.md by @ilketaha in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F369\r\n* Update README.md by @ilketaha in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F370\r\n* Added links to dimensionality reduction algorithms by @RaInta in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F374\r\n* Added links to clustering algorithms by @RaInta in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F373\r\n* Added link to MLflow by @RaInta in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F372\r\n* Added link to ensemble methods by @RaInta in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F371\r\n\r\n## New Contributors\r\n* @iamkunalpitale made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F356\r\n* @sharmaanj200 made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F357\r\n* @jrinder42 made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F358\r\n* @osinkolu made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F361\r\n* @ajaj895 made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F362\r\n* @dawoodwasif made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F363\r\n* @ilketaha made their first contribution in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F367\r\n\r\n**Full Changelog**: https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fcompare\u002F2022.10.14...v2022.12.01","2022-12-01T16:36:20",{"id":244,"version":245,"summary_zh":246,"released_at":247},149024,"2022.10.14","# Your participation in this project brought huge value to the community. Thank you for your contribution.\r\n\r\n## What's Changed\r\n* Update README.MD by @Rogerh91 in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F67\r\n* Update README.md by @jaytaylopub in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F70\r\n* Update README.md by @jaytaylopub in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F71\r\n* Added one more university in Sweden by @SunnyBingoMe in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F72\r\n* Added one Facebook account and one awesome list by @ujjwalkarn in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F75\r\n* Add new Data sets: New Zealand Institute of Economic Research Data1850 by @thibaudcolas in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F76\r\n* Updated README.md by @jaytaylopub in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F77\r\n* add Data School to Bloggers section by @justmarkham in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F78\r\n* add Kevin Markham to Twitter section by @justmarkham in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F79\r\n* remove duplicate Google data set by @fredkelly in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F80\r\n* Update link Data Science Degree @ Berkeley #81 by @Devinsuit in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F82\r\n* Add \"Neural Networks video series\" by @jbenjos in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F83\r\n* Add \"Awesome Data Science Ideas\" list by @JosPolfliet in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F85\r\n* Added \"Machine Learning for Software Engineers\" by @ZuzooVn in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F86\r\n* Adding competition section. by @sara-02 in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F73\r\n* Add skale, high performance distributed data processing in NodeJS by @mvertes in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F87\r\n* NumPy and SciPy python libraries by @gokhanm in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F88\r\n* Add Hydrosphere Mist by @spushkarev in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F89\r\n* Update links by @Devinsuit in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F90\r\n* Add of LITS Dataset by @PatrickChrist in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F91\r\n* Added  blogs for understanding Neural Networks ! by @sarthusarth in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F93\r\n* small correction with the line break by @danklotz in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F92\r\n* Remove dot by @techmexdev in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F95\r\n* Minor grammar correction by @techmexdev in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F94\r\n* Add Trello Board and Datatau News by @murat in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F96\r\n* Added Podcasts Section by @kofoide in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F97\r\n* Added Data Science Medium Topic by @murat in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F98\r\n* Add zeppelin to tools and remove some dots by @sravan-s in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F99\r\n* Enigma.io to Enigma by @indiakerle in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F100\r\n* Add a books section by @jwood803 in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F101\r\n* Update README.md with one MOOC and a bunch of books by @Charismatron in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F102\r\n* One more book to add :) by @Charismatron in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F103\r\n* Fixed some small Markdown issues by @sheilnaik in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F104\r\n* adding skikit-learn choosing estimator infographic by @lhayhurst in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F105\r\n* fixed bad link by @lhayhurst in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F106\r\n* Add Featuretools to tools by @kmax12 in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F107\r\n* Add Data Fallacies poster to the infographic section by @BRMatt in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F109\r\n* Typo fixes in readme by @ferhatelmas in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F111\r\n* Add Optimus into Toolboxes section by @FavioVazquez in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F110\r\n* Fixed link to \"Sweden, Statistics\" by @gostaj in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F112\r\n* Add Syracuse MS in Applied Data Science to COLLEGES by @justinclarkhome in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F115\r\n* Added curated data science resources by @qualityjacks in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F116\r\n* Add tutorial section by @vivekimsit in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascience\u002Fpull\u002F117\r\n* General Data Science Channel on YouTube: Introduction, NLP, and Practice by @tomer-ben-david in https:\u002F\u002Fgithub.com\u002Facademic\u002Fawesome-datascien","2022-10-15T20:25:58"]