[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-rbhatia46--Data-Science-Interview-Resources":3,"tool-rbhatia46--Data-Science-Interview-Resources":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",153609,2,"2026-04-13T11:34:59",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",108322,"2026-04-10T11:39:34",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[52,13,15,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":32,"last_commit_at":59,"category_tags":60,"status":17},4721,"markitdown","microsoft\u002Fmarkitdown","MarkItDown 是一款由微软 AutoGen 团队打造的轻量级 Python 工具，专为将各类文件高效转换为 Markdown 格式而设计。它支持 PDF、Word、Excel、PPT、图片（含 OCR）、音频（含语音转录）、HTML 乃至 YouTube 链接等多种格式的解析，能够精准提取文档中的标题、列表、表格和链接等关键结构信息。\n\n在人工智能应用日益普及的今天，大语言模型（LLM）虽擅长处理文本，却难以直接读取复杂的二进制办公文档。MarkItDown 恰好解决了这一痛点，它将非结构化或半结构化的文件转化为模型“原生理解”且 Token 效率极高的 Markdown 格式，成为连接本地文件与 AI 分析 pipeline 的理想桥梁。此外，它还提供了 MCP（模型上下文协议）服务器，可无缝集成到 Claude Desktop 等 LLM 应用中。\n\n这款工具特别适合开发者、数据科学家及 AI 研究人员使用，尤其是那些需要构建文档检索增强生成（RAG）系统、进行批量文本分析或希望让 AI 助手直接“阅读”本地文件的用户。虽然生成的内容也具备一定可读性，但其核心优势在于为机器",93400,"2026-04-06T19:52:38",[52,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":72,"owner_name":73,"owner_avatar_url":74,"owner_bio":75,"owner_company":76,"owner_location":77,"owner_email":78,"owner_twitter":79,"owner_website":79,"owner_url":80,"languages":79,"stars":81,"forks":82,"last_commit_at":83,"license":84,"difficulty_score":85,"env_os":86,"env_gpu":87,"env_ram":87,"env_deps":88,"category_tags":91,"github_topics":93,"view_count":32,"oss_zip_url":79,"oss_zip_packed_at":79,"status":17,"created_at":102,"updated_at":103,"faqs":104,"releases":105},7185,"rbhatia46\u002FData-Science-Interview-Resources","Data-Science-Interview-Resources","A repository listing out the potential sources which will help you in preparing for a Data Science\u002FMachine Learning interview. New resources added frequently.","Data-Science-Interview-Resources 是一个专为数据科学和机器学习求职者打造的开源资源库。它系统性地整理了经过作者亲身验证的学习资料，涵盖从简历筛选到最终面试通关的全流程指南。\n\n在数据科学岗位竞争日益激烈的当下，许多求职者难以通过初筛或缺乏系统的备考方向。该仓库直击这一痛点，不仅提供了丰富的学习链接，更明确了核心技能图谱：包括商业理解、SQL 与数据库、Python 编程、数学基础（概率统计与线性代数）、机器学习建模、数据结构与算法，以及至关重要的论文阅读能力和沟通技巧。特别针对 FAANG 等顶尖科技公司，强调了算法与工程能力的重要性。\n\n这份资源适合所有阶段的数据科学从业者使用，无论是希望转行入门的新手，还是拥有多年经验想要跳槽资深岗位的工程师，亦或是从事相关研究的学者。其独特之处在于内容并非简单的链接堆砌，而是基于作者在大型产品公司和初创企业的大量面试经验提炼而成，兼具理论深度与实战针对性。仓库保持高频更新，致力于成为一站式备考指南，帮助用户夯实基础，从容应对各类技术挑战。","[![HitCount](http:\u002F\u002Fhits.dwyl.io\u002F{username}\u002F{repo}.svg)](http:\u002F\u002Fhits.dwyl.io\u002F{rbhatia46}\u002F{Data-Science-Interview-Resources})\n![Star this repository](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Frbhatia46\u002FData-Science-Interview-Resources?style=social)\n\n# Data-Science-Interview-Resources\n\n**Update : Drawing from extensive experience in interviews over the past few years, I recently decided to launch a dedicated channel to help individuals excel in Data Science. My goal is to create a comprehensive resource for anyone looking to revisit the basics before an upcoming interview or master the skills and in-depth knowledge required for both succeeding in Data Science interviews and applying Data Science in practice. This channel aims to provide a clear understanding of various techniques used on a day-to-day basis, covering a vast range of Machine Learning topics. Feel free to explore it here : \u003Cbr\u002F>[\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frbhatia46_Data-Science-Interview-Resources_readme_c1c7be8f5306.jpg\" width=70 height=70>](https:\u002F\u002Fwww.youtube.com\u002Fchannel\u002FUC2vHbm_9kzjrRamX8ArGdkA)**\n\n\nFirst of all, thanks for visiting this repo, congratulations on making a great career choice, I aim to help you land an amazing Data Science job that you have been dreaming for, by sharing my experience, interviewing heavily at both large product-based companies and fast-growing startups, hope you find it useful.\n\nWith an increase in demand for so many Data Scientists, it's really hard to successfully get screened and accepted for an interview. In this repo, I include everything from getting successfully screened and rocking that interview to land that amazing position, make sure to nail it with the following resources.\n\nEvery Resource I list here is personally verified by me and most of them I have used personally, which have helped me a lot.\n\n**Word of Caution:** Data Science\u002FMachine Learning has a very big domain and there are a lot of things to learn. This by no means is an exhaustive list and is just for helping you out if you are struggling to find some good resources to start your preparation. However, I try to cover and update this frequently and my goal is to cover and unify everything into one resource that you can use to rock those interviews! \n\n**Please leave a star if you appreciate the effort.**\n\n**Note:** For contribution, refer [Contribution.md](https:\u002F\u002Fgithub.com\u002Frbhatia46\u002FData-Science-Interview-Resources\u002Fblob\u002Fmaster\u002FContribution.md)\n\n## How to get an interview ?\n\n* First and foremost, **develop the necessary skills and be sound with the fundamentals**, these are some of the horizons you should be extremely comfortable with - \n  - Business Understanding(this is extremely critical across all seniority levels, but specifically for people with more than 3 years of experience)\n  - SQL and Databases(very crucial)\n  - Programming Skills(preferably in Python, if you know Scala, extra brownie points for some specific roles)\n  - Mathematics(Probability, Statistics, Linear Algebra and Calculus) - https:\u002F\u002Fmedium.com\u002F@rbhatia46\u002Fessential-probability-statistics-concepts-before-data-science-bb787b7a5aef \n  - Machine Learning(this includes Deep Learning) and Model building\n  - Data Structures and Algorithms(must and mandatory for top product based companies like FAANG)\n  - Domain Understanding(Optional for most openings, though very critical for some roles based on company's requirement)\n  - Literature Review(must for Research based roles) : Being able to read and understand a new research paper is one of the most essential and demanding skills needed in the industry today, as the culture of Research and Development, and innovation grows across most good organizations.\n  - Communication Skills - Being able to explain the analysis and results to business stakeholders and executives is becoming a really important skill for Data Scientists these days\n  - Some Engineering knowledge(Not mandatory, but good to have) - Being able to develop a RESTful API, writing clean and elegant code, Object Oriented programming are some of the things you can focus on for some extra brownie points.\n  - Big data knowledge(not mandatory for most openings, but good to have) - Spark, Hive, Hadoop, Sqoop.\n\n* **Build a personal Brand** \n  - Develop a good GitHub\u002Fportfolio of use-cases you have solved, always strive for solving end-to-end use cases, which demonstrate the entire Data Science lifecycle, from business understanding to model deployment. \n  - Write blogs, start a YouTube channel if you enjoy teaching, write a book.\n  - Work on a digital, easy-to-open, easy-to-read, clean, concise and easily customizable Resume\u002FCV, always include your demo links and source code of every use-case you have solved.\n  - Participate in Kaggle competitions, build a good Kaggle profile and send them to potential employers for increasing the chances of getting an interview call real-quick.\n\n* **Develop good connections**, through LinkedIn, by attending conferences, and doing everything you can, it's very important to land referrals and get yourself started with the interview process through good connections. Connect regularly with Data Scientists working at top product-based organizations, fast-growing startups, build a network, slowly and steadily, it's very important.`\n\n## Some Tips on Resume\u002FCV:\n* Describe past roles and an impact you made in a **quantifiable** way, be concise and I repeat, **quantify** the impact, rather than talking with facts that have no relevance. According to Google Recruiters, use the XYZ formula - \n```Accomplished [X] as measured by [Y], by doing [Z]```\n\n* Keep it short, ideally not more than 2 pages, as you might know, an average recruiter scans your resume only for 6 seconds, and makes a decision based on that.\n\n* If you are a fresher and don't have experience, try to solve end-to-end use-cases and mention them in your CV, preferably with the demo link(makes it easy for the recruiter) and the link to source code on GitHub.\n\n* Avoid too much technical jargon, and this goes without saying, do not mention anything you are not confident about, this might become a major bottleneck during your interview.\n\n* Some helpful links :\n  * [Advice on building Data Portfolio Projects](https:\u002F\u002Fmedium.com\u002F@jasonkgoodman\u002Fadvice-on-building-data-portfolio-projects-c5f96d8a0627) 📘\n  * [How to write a killer Software Engineering Resume](https:\u002F\u002Fwww.freecodecamp.org\u002Fnews\u002Fwriting-a-killer-software-engineering-resume-b11c91ef699d\u002F) 📘\n  * [Get your Data Science Resume past the ATS](https:\u002F\u002Ftowardsdatascience.com\u002Fup-level-your-data-science-resume-getting-past-ats-64322f0cbb73) 📘\n  * [How to write a developer résumé that hiring managers will actually read](https:\u002F\u002Fwww.freecodecamp.org\u002Fnews\u002Fhow-to-write-a-resume-that-works\u002F) 📘\n\n***\n\n* If you want to quickly revise your math basics, follow this : https:\u002F\u002Fmedia-exp2.licdn.com\u002Fdms\u002Fdocument\u002FC4D1FAQFzFmR919-Erw\u002Ffeedshare-document-pdf-analyzed\u002F0\u002F1655384106479?e=1656547200&v=beta&t=9bm4OUyWfM1dQR8LWXsLrGDqYz_Yr_e7TJxHXLXe36I\n\n* If you want to quick revise you Stats and ML basics, follow this : https:\u002F\u002Fmedia-exp2.licdn.com\u002Fdms\u002Fdocument\u002FC4D1FAQFLvzVgVxYAAA\u002Ffeedshare-document-pdf-analyzed\u002F0\u002F1656265480370?e=1657152000&v=beta&t=RD90ZEx3x2VLUGSthO-1uYKadzwTRixKRg3s8j2nvOc\n\n## Probability, Statistics and Linear Algebra\n* [Understand the basics of Descriptive Statistics(Really Important for an interview)](https:\u002F\u002Ftowardsdatascience.com\u002Funderstanding-descriptive-statistics-c9c2b0641291) 📘\n* [40 Question on **probability** for a Data Science Interview](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2017\u002F04\u002F40-questions-on-probability-for-all-aspiring-data-scientists\u002F) 📘\n* [40 Statistics Interview Problems and Answers for Data Scientists](https:\u002F\u002Ftowardsdatascience.com\u002F40-statistics-interview-problems-and-answers-for-data-scientists-6971a02b7eee) 📘\n* [Probability and Statistics in the context of Deep Learning](https:\u002F\u002Ftowardsdatascience.com\u002Fprobability-and-statistics-explained-in-the-context-of-deep-learning-ed1509b2eb3f) 📘\n* [Probability v\u002Fs Likelihood](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=pYxNSUDSFH4) 📹\n* [Bootstrap Methods - The Swiss Army Knife of any Data Scientist](https:\u002F\u002Fmedium.com\u002Fdata-science-journal\u002Fthe-bootstrap-the-swiss-army-knife-of-any-data-scientist-acd6e592be13) 📘\n* [Confidence Intervals Explained Simply for Data Scientists](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2020\u002F02\u002F21\u002Fci\u002F) 📘\n* [P-value Explained Simply for Data Scientists](https:\u002F\u002Ftowardsdatascience.com\u002Fp-value-explained-simply-for-data-scientists-4c0cd7044f14) 📘\n* [PDF is not a probability](https:\u002F\u002Ftowardsdatascience.com\u002Fpdf-is-not-a-probability-5a4b8a5d9531) 📘\n* [5 Sampling algorithms every Data Scientist should know](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2019\u002F07\u002F30\u002Fsampling\u002F) 📘\n* [The 10 Statistical Techniques Data Scientists Need to Master](https:\u002F\u002Fwww.kdnuggets.com\u002F2017\u002F11\u002F10-statistical-techniques-data-scientists-need-master.html) 📘\n* [Crash Course in Applied Linear Algebra](https:\u002F\u002Fyoutu.be\u002FwkxgZirbCr4?si=6jk888FeJQYDzIgy) 📹\n\n***\n\n## SQL and Data Acquisition\nThis is probably the entry point of your Data Science project, SQL is one of the most important skills for any Data Scientist.\n\n* [5 Common SQL Interview Problems for Data Scientists](https:\u002F\u002Ftowardsdatascience.com\u002F5-common-sql-interview-problems-for-data-scientists-1bfa02d8bae6) 📘\n* [46 Questions to test a Data Scientist on SQL](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2017\u002F01\u002F46-questions-on-sql-to-test-a-data-science-professional-skilltest-solution\u002F) 📘\n* [30 SQL Interview Questions curated for FAANG by an Ex-Facebook Data Scientist](https:\u002F\u002Fwww.nicksingh.com\u002Fposts\u002F30-sql-and-database-design-questions-from-real-data-science-interviews) 📘\n* [SQL Interview Questions](https:\u002F\u002F365datascience.com\u002Fsql-interview-questions\u002F) 📘\n* [How to ace Data Science Interviews - SQL](https:\u002F\u002Ftowardsdatascience.com\u002Fhow-to-ace-data-science-interviews-sql-b71de212e433) 📘\n* [3 Must Know SQL Questions to pass your Data Science Interview](https:\u002F\u002Fmedium.com\u002F@jayfeng\u002Fthree-must-know-sql-questions-to-pass-your-data-science-interview-463311c7eaea) 📘\n* [10 frequently asked SQL Queries in Interviews](https:\u002F\u002Fwww.java67.com\u002F2013\u002F04\u002F10-frequently-asked-sql-query-interview-questions-answers-database.html) 📘\n* [Technical Data Science Interview Questions: SQL and Coding](https:\u002F\u002Fhackernoon.com\u002Ftechnical-data-science-interview-questions-sql-and-coding-jv1k32bf) 📘\n* [How to optimize SQL Queries - Datacamp](https:\u002F\u002Fwww.datacamp.com\u002Fcommunity\u002Ftutorials\u002Fsql-tutorial-query) 📘\n* [Ten SQL Concepts You Should Know for Data Science Interviews](https:\u002F\u002Ftowardsdatascience.com\u002Ften-sql-concepts-you-should-know-for-data-science-interviews-7acf3e428185) 📘\n\n***\n## Data Preparation and Visualization\n\n* [5 Feature Selection Algorithms every Data Scientist should know](https:\u002F\u002Ftowardsdatascience.com\u002Fthe-5-feature-selection-algorithms-every-data-scientist-need-to-know-3a6b566efd2) 📘 \n* [6 Different Ways to Compensate for Missing Values In a Dataset ](https:\u002F\u002Ftowardsdatascience.com\u002F6-different-ways-to-compensate-for-missing-values-data-imputation-with-examples-6022d9ca0779) 📘 \n* [A Brief Overview of Outlier Detection Techniques](https:\u002F\u002Ftowardsdatascience.com\u002Fa-brief-overview-of-outlier-detection-techniques-1e0b2c19e561) 📘 \n* [Cleaning and Prepping Data with Python for Data Science — Best Practices and Helpful Packages](https:\u002F\u002Fmedium.com\u002F@rrfd\u002Fcleaning-and-prepping-data-with-python-for-data-science-best-practices-and-helpful-packages-af1edfbe2a3) 📘 \n* [When to use which plot for visualization](https:\u002F\u002Ftowardsdatascience.com\u002Fwhat-plot-why-this-plot-and-why-not-9508a0cb35ea) 📘\n* [Ways to detect and remove Outliers](https:\u002F\u002Ftowardsdatascience.com\u002Fways-to-detect-and-remove-the-outliers-404d16608dba) 📘\n* [Dealing with Class Imbalances in Machine Learning](https:\u002F\u002Ftowardsdatascience.com\u002Fdealing-with-imbalanced-classes-in-machine-learning-d43d6fa19d2) 📘\n* [Smarter ways to encode categorical data](https:\u002F\u002Ftowardsdatascience.com\u002Fsmarter-ways-to-encode-categorical-data-for-machine-learning-part-1-of-3-6dca2f71b159)\n* [Numpy and Pandas Cheatsheet](https:\u002F\u002Fgithub.com\u002Fjessicayung\u002Fdata-analyst-nd\u002Fblob\u002Fmaster\u002F2-intro-to-data-analysis\u002Fnumpy_pandas_cheatsheet.pdf) 📘\n* [3 Methods to deal with outliers](https:\u002F\u002Fwww.kdnuggets.com\u002F2017\u002F01\u002F3-methods-deal-outliers.html) 📘\n* [Feature Selection Techniques](https:\u002F\u002Ftowardsdatascience.com\u002Ffeature-selection-techniques-in-machine-learning-with-python-f24e7da3f36e) 📘\n* [Why, how and When to scale your features](https:\u002F\u002Fmedium.com\u002Fgreyatom\u002Fwhy-how-and-when-to-scale-your-features-4b30ab09db5e) 📘\n* [Everything you need to know about Scatter plots](https:\u002F\u002Ftowardsdatascience.com\u002Feverything-you-need-to-know-about-scatter-plots-for-data-visualisation-924144c0bc5) 📘\n* [How to Select Features for Machine Learning](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=YaKMeAlHgqQ) 📹\n* [10 ways for Feature Selection](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=Erp0bjEPoM8) 📹\n\n\n\n***\n## Classic Machine Learning Algorithms\n\n* [All Learning Algorithms Explained in 14 Minutes](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=BT6Aw6Q75Yg)\n\n### 1. Logistic Regression\n\n* [All about Logistic Regression in one article](https:\u002F\u002Ftowardsdatascience.com\u002Flogistic-regression-b0af09cdb8ad) 📘 \n* [Understanding Logistic Regression step-by-step](https:\u002F\u002Ftowardsdatascience.com\u002Funderstanding-logistic-regression-step-by-step-704a78be7e0a) 📘 \n* [Logistic Regression - Short and Clear Explanation - 9 Mins](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=yIYKR4sgzI8) 📹\n* [Linear Regression vs Logistic Regression](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=OCwZyYH14uw) 📹\n* [30 Questions to test a Data Scientist on Logistic Regression](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2017\u002F08\u002Fskilltest-logistic-regression\u002F) 📘 \n* [Logistic Regression - Understand Everything (Theory + Maths + Coding) in 1 video](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=VCJdg7YBbAQ) 📹\n* [Lasso, Ridge and Logistic Regression all in one video](https:\u002F\u002Fwww.youtube.com\u002Flive\u002FvaQxdBEcBzU?si=3judBH9xcRefRwGP) 📹\n\n\n### 2. Linear Regression\n\n* [30 Questions to test a Data Scientist on Linear Regression](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2017\u002F07\u002F30-questions-to-test-a-data-scientist-on-linear-regression\u002F) 📘 \n* [Linear Regression - Understand Everything (Theory + Maths + Coding) in 1 video](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=E5RjzSK0fvY) 📹\n* [5 Types of Regression and their properties](https:\u002F\u002Ftowardsdatascience.com\u002F5-types-of-regression-and-their-properties-c5e1fa12d55e) 📘\n* [Ridge Regression - Clearly Explained](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=Q81RR3yKn30) 📹\n* [Lasso Regression - Clearly Explained](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=NGf0voTMlcs) 📹\n\n\n### 3. Tree Based\u002FEnsemble Algorithms\n\n* [30 Questions to test a Data Scientist on Tree based models](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2017\u002F09\u002F30-questions-test-tree-based-models\u002F) 📘\n* [Gini-index v\u002Fs Information Entropy](https:\u002F\u002Ftowardsdatascience.com\u002Fgini-index-vs-information-entropy-7a7e4fed3fcb) 📘\n* [Decision Tree vs. Random Forest – Which Algorithm Should you Use?](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2020\u002F05\u002Fdecision-tree-vs-random-forest-algorithm\u002F) 📘\n* [Why Random Forest doesn't work well for Time-Series?](https:\u002F\u002Fmedium.com\u002Fdatadriveninvestor\u002Fwhy-wont-time-series-data-and-random-forests-work-very-well-together-3c9f7b271631) 📘\n* [Comprehensive guide to Ensemble Models](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2018\u002F06\u002Fcomprehensive-guide-for-ensemble-models\u002F) 📘\n* [The Simple Math behind 3 Decision Tree Splitting criterions](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2019\u002F11\u002F12\u002Fdtsplits\u002F) 📘\n\n\n### 4. K-Nearest-Neighbors\n\n* [Fundamental Interview Questions on KNN - A Quick refresh](http:\u002F\u002Ftheprofessionalspoint.blogspot.com\u002F2019\u002F01\u002Fknn-algorithm-in-machine-learning.html) 📘\n* [30 Questions to test a Data Scientist on KNN](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2017\u002F09\u002F30-questions-test-k-nearest-neighbors-algorithm\u002F) 📘\n* [Pros and Cons of KNN](https:\u002F\u002Fwww.fromthegenesis.com\u002Fpros-and-cons-of-k-nearest-neighbors\u002F) 📘\n* [KNN Algorithm - Understand Everything (Theory + Maths + Coding) in 1 video](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=6kZ-OPLNcgE) 📹\n\n### 5. Support Vector Machines\n\n* [All about SVMs - Math, Terminology, Intuition, Kernels in one article](https:\u002F\u002Ftowardsdatascience.com\u002Fsupport-vector-machines-svm-c9ef22815589) 📘\n* [25 Questions to test a Data Scientist on SVMs](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2017\u002F10\u002Fsvm-skilltest\u002F) 📘\n\n### 6. Naive Bayes\n\n* [12 tips to make most out of Naive Bayes](https:\u002F\u002Fmachinelearningmastery.com\u002Fbetter-naive-bayes\u002F) 📘\n* [Naive Bayes - Understand Everything (Theory + Maths + Coding) in 1 video](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=vz_xuxYS2PM) 📹\n* [6 easy steps to learn Naive Bayes](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2017\u002F09\u002Fnaive-bayes-explained\u002F) 📘\n\n\n***\n## Time Series\n* [40 Questions to test a Data Scientist on Time Series](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2017\u002F04\u002F40-questions-on-time-series-solution-skillpower-time-series-datafest-2017\u002F) 📘\n* [11 Classical Time Series Forecasting Methods](https:\u002F\u002Fmachinelearningmastery.com\u002Ftime-series-forecasting-methods-in-python-cheat-sheet\u002F) 📘\n* [Interview Questions on ARIMA](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=654SjiZO5ks) 📹\n\n***\n## Unsupervised Learning\n* [The DOs and DONTs of PCA(Principal Component Analysis)](https:\u002F\u002Fmedium.com\u002F@sadatnazrul\u002Fthe-dos-and-donts-of-principal-component-analysis-7c2e9dc8cc48) 📘\n* [An introduction to t-SNE : DataCamp](https:\u002F\u002Fwww.datacamp.com\u002Fcommunity\u002Ftutorials\u002Fintroduction-t-sne) 📘\n* [Dimensionally Reducing Squeezing out the good stuff](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=4QMFNg7tjbk) 📘\n* [Dimensionality Reduction for Dummies : Part 1 - Intuition](https:\u002F\u002Ftowardsdatascience.com\u002Fhttps-medium-com-abdullatif-h-dimensionality-reduction-for-dummies-part-1-a8c9ec7b7e79) 📘\n* [In-depth Explanation of DBSCAN Algorithm](https:\u002F\u002Ftowardsdatascience.com\u002Fexplaining-dbscan-clustering-18eaf5c83b31) 📘\n\n***\n## Recommender Systems\n* [Recommender Systems in a Nutshell](https:\u002F\u002Fwww.kdnuggets.com\u002F2020\u002F07\u002Frecommender-systems-nutshell.html)\n\n***\n## Deep Learning\n* [Why Regularization reduces overfitting in Deep Neural Networks](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=4nqD5TBlOWU) 📹\n* [Pros and Cons of Neural Networks](https:\u002F\u002Ftowardsdatascience.com\u002Fhype-disadvantages-of-neural-networks-6af04904ba5b) 📘\n* [When not to use Neural Networks](https:\u002F\u002Fmedium.com\u002Fdatadriveninvestor\u002Fwhen-not-to-use-neural-networks-89fb50622429) 📘\n* [40 Questions to test a Data Scientist on Deep learning](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2017\u002F04\u002F40-questions-test-data-scientist-deep-learning\u002F) 📘\n* [21 Popular Deep Learning Interview Questions](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2020\u002F04\u002Fcomprehensive-popular-deep-learning-interview-questions-answers\u002F) 📘\n* [Deep Learning Interview Questions - Edureka](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=HGXlFG_Rz4E) 📹\n* [Activation Functions in a Neural Network - Explained](https:\u002F\u002Ftowardsdatascience.com\u002Factivation-functions-neural-networks-1cbd9f8d91d6) 📘\n* [Vanishing and Exploding Gradient - Clearly Explained](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=qO_NLVjD6zE) 📹\n* [Bias and Variance - Very clearly explained](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=EuBBz3bI-aA) 📹\n* [Why use ReLU over Sigmoid](https:\u002F\u002Fstats.stackexchange.com\u002Fquestions\u002F126238\u002Fwhat-are-the-advantages-of-relu-over-sigmoid-function-in-deep-neural-networks) 📘\n* [25 Deep Learning Interview Qurstions to test your knowledge](https:\u002F\u002Ftowardsdatascience.com\u002F50-deep-learning-interview-questions-part-1-2-8bbc8a00ec61) 📘\n* [10 Deep Learning Best Practices to Keep in Mind in 2020](https:\u002F\u002Fnanonets.com\u002Fblog\u002F10-best-practices-deep-learning\u002F) 📘\n\n\n***\n## GenAI and LLMs\n\n* [LoRA Explained](https:\u002F\u002Fyoutu.be\u002FhcAx89iwkMU) 📹\n* [RAG v\u002Fs Fine-tuning v\u002Fs Prompt Engineering](https:\u002F\u002Fyoutu.be\u002F-ZwhMnNwvxA)📹\n* [Cross-encoders vs Bi-encoders : A deep-dive into text encoding methods](https:\u002F\u002Fmedium.com\u002F@rbhatia46\u002Fcross-encoders-vs-bi-encoders-a-deep-dive-into-text-encoding-methods-d9aa890d6ca4)📘\n* [RAG 101](https:\u002F\u002Fdeveloper.nvidia.com\u002Fblog\u002Frag-101-retrieval-augmented-generation-questions-answered\u002F)\n* [Generative AI in a Nutshell](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=2IK3DFHRFfw)📹\n* [BERT Theory in-depth explanation in one video](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=90mGPxR2GgY)📹\n* [Transformers Theory in-depth explanation in one video](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=bCz4OMemCcA)📹\n* [The math behind Attention: Keys, Queries, and Values matrices](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=UPtG_38Oq8o)📹\n  \n\n\n***\n## Machine Learning System Design\n\n* [How To Answer Any Machine Learning System Design Interview Question](https:\u002F\u002Ftowardsdatascience.com\u002Fhow-to-answer-any-machine-learning-system-design-interview-question-a98656bb7ff0) 📘\n\n\n\n\n***\n## Machine Learning Interpretability\n* [Four Questions on Deciphering the World of Machine Learning Models](https:\u002F\u002Fnarrativescience.com\u002Fresource\u002Fblog\u002Fmachine-learning-models\u002F) 📘\n* [Machine Learning Explanaibility - Crash Course by Kaggle](https:\u002F\u002Fwww.kaggle.com\u002Flearn\u002Fmachine-learning-explainability) 📘\n* [SHAP Values explained simply](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=VB9uV-x0gtg) 📹\n\n***\n## Case Studies\nCase studies are extremely important for interviews, below are some resources to practice, think first before looking at the solutions.\n* [Dawn of Taxi Aggregators](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2016\u002F04\u002Fcase-study-analytics-interviews-dawn-taxi-aggregators\u002F) 📘\n* [Optimizing product prices for an online vendor](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2016\u002F07\u002Fsolving-case-study-optimize-products-price-online-vendor-level-hard\u002F) 📘\n* [Tips for a Case-Study Interview](https:\u002F\u002Fworkera.ai\u002Fresources\u002Fdata-science-case-study-interview\u002F) 📘\n* [Mercari Price Prediction](https:\u002F\u002Ftowardsdatascience.com\u002Fa-data-science-case-study-with-python-mercari-price-prediction-4e852d95654) 📘\n* [End-to-End multiclass Text Classification pipeline](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2020\u002F05\u002F24\u002Fmultitextclass\u002F) 📘\n* [End-to-End multiclass Image Classification pipeline](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2020\u002F06\u002F06\u002Fmulticlass_image_classification_pytorch\u002F) 📘\n* [Large Scale Forecasting for 1000+ products - Nagarro](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=8jfDBD6xlFM) 📹\n* [Clustering and Classification in E-Commerce](https:\u002F\u002Flucidworks.com\u002Fpost\u002Fclustering-classification-supervised-unsupervised-learning-ecommerce\u002F) 📘\n* [The ABCs of Learning to Rank](https:\u002F\u002Flucidworks.com\u002Fpost\u002Fabcs-learning-to-rank\u002F) 📘\n* [Data Science Case Study: Optimizing Product Placement in Retail ](https:\u002F\u002Ftowardsdatascience.com\u002Fdata-science-case-study-optimizing-product-placement-in-retail-part-1-2e8b27e16e8d) 📘\n\n***\n## NLP\n* [30 Questions to test a Data Scientist on NLP](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2017\u002F07\u002F30-questions-test-data-scientist-natural-language-processing-solution-skilltest-nlp\u002F)\n* [11 Most Commonly Asked NLP Interview Questions For Beginners](https:\u002F\u002Fanalyticsindiamag.com\u002F11-most-commonly-asked-nlp-interview-questions-for-beginners\u002F)\n* [How to solve 90% of NLP Problems](https:\u002F\u002Fblog.insightdatascience.com\u002Fhow-to-solve-90-of-nlp-problems-a-step-by-step-guide-fda605278e4e)\n* [Questions asked for NLP Roles at Companies](https:\u002F\u002Fmedium.com\u002Fmodern-nlp\u002Fnlp-interview-questions-f062040f32f7)\n* [Vector-based Methods for Similarity Search (TF-IDF, BM25, SBERT)](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=ziiF1eFM3_4) 📹\n* [Understanding BERT in detail - one of the best playlist's to understand the fundamentals and inner workings of BERT, huge shoutout to Chris McCormick\n](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PLam9sigHPGwOBuH4_4fr-XvDbe5uneaf6) 📹\n* [Word Embeddings, CBoW and Skipgram](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=Q95SIG4g7SA) 📹\n* [CBoW v\u002Fs Skipgram : Data Science Interview Question](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=WssUXhPoELE) 📹\n\n\n***\n## Data Science Interviews at FAANG and Similar Companies\n* [Amazon’s Data Scientist Interview Practice Problems](https:\u002F\u002Ftowardsdatascience.com\u002Famazon-data-scientist-interview-practice-problems-15b9b86e86c6) 📘\n* [Microsoft Data Science Interview Questions and Answers](https:\u002F\u002Ftowardsdatascience.com\u002Fmicrosoft-data-science-interview-questions-and-answers-69ccac16bd9b) 📘\n* [Problem Solving Questions for Data Science interview at Google](https:\u002F\u002Ftowardsdatascience.com\u002Fgoogles-data-science-interview-brain-teasers-7f3c1dc4ea7f) 📘\n\n***\n## Becoming a Rockstar Data Scientist(read if you have extra time)\nGoing through these will definately add extra brownie points, so don't miss these if you got time.\n\n* [Top 13 Skills To Become a Rockstar Data Scientist](https:\u002F\u002Ftowardsdatascience.com\u002Ftop-13-skills-to-become-a-rockstar-data-scientist-faf2f97e655d) 📘 \n* [Understand these 4 ML concepts to sound like a master](https:\u002F\u002Ftowardsdatascience.com\u002Funderstand-these-4-advanced-concepts-to-sound-like-a-machine-learning-master-d32843840b52) 📘\n* [12 things I wish I knew before starting as a Data Scientist](https:\u002F\u002Fmedium.com\u002Fdeliberate-data-science\u002F12-things-i-wish-id-known-before-starting-as-a-data-scientist-45989be6300e) 📘\n* [Understand the Data Science pipeline](https:\u002F\u002Ftowardsdatascience.com\u002Fa-beginners-guide-to-the-data-science-pipeline-a4904b2d8ad3) 📘\n* [Kaggle Data Science Glossary](https:\u002F\u002Fwww.kaggle.com\u002Fshivamb\u002Fdata-science-glossary-on-kaggle) 📘\n* [Google Machine Learning Glossary](https:\u002F\u002Fdevelopers.google.com\u002Fmachine-learning\u002Fglossary\u002F) 📘\n* [Running your ML Predictions 50 times faster - Hummingbird](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2020\u002F06\u002F06\u002Fhummingbird_faster_ml_preds\u002F) 📘\n* [3 Mistakes you should not make in a Data Science Interview](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2019\u002F12\u002F24\u002Fmistakes\u002F) 📘\n* [How to find Feature importances for BlackBox Models?](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2019\u002F12\u002F04\u002Fblackbox\u002F) 📘\n\n***\n\n## Data Structures and Algorithms(Optional)\nAlthough this might be optional, but do not miss this if the Job Description explicitly asks for this, and especially never miss this if you are interviewing at FAANG and similar organizations, or if you have a CS Background. You don't have to be as good as an SDE at this, but at least know the basics.\n\n* [A Data Scientist's guide to Data Structures and Algorithms](https:\u002F\u002Ftowardsdatascience.com\u002Fa-data-scientists-guide-to-data-structures-algorithms-1176395015a0) 📘\n* [Handling Trees in Data Science Algorithmic Interview](https:\u002F\u002Ftowardsdatascience.com\u002Fhandling-trees-in-data-science-algorithmic-interview-ea14dd1b6236) 📘\n* [A simple introduction to Linked Lists for Data Scientists](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2020\u002F01\u002F28\u002Fll\u002F) 📘\n* [Dynamic Programming for Data Scientists](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2020\u002F01\u002F28\u002Fdp\u002F) 📘\n* [3 Programming concepts for Data Scientists](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2019\u002F12\u002F09\u002Fpc\u002F) 📘\n* [Data Scientists, The 5 Graph Algorithms that you should know](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2019\u002F09\u002F02\u002Fgraph_algs\u002F) 📘\n\n***\n## Engineering and Deployment\n* [A Layman’s Guide for Data Scientists to create APIs in minutes](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2020\u002F06\u002F06\u002Ffastapi_for_data_scientists\u002F) 📘\n* [Take your Machine Learning Models to Production with these 5 simple steps](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2019\u002F12\u002F25\u002Fprod\u002F) 📘\n* [2 way to deploy your ML models](https:\u002F\u002Ftowardsdatascience.com\u002Fthere-are-two-very-different-ways-to-deploy-ml-models-heres-both-ce2e97c7b9b1) 📘\n* [How to deploy a Keras model as a web app through Flask](https:\u002F\u002Ftowardsdatascience.com\u002Fdeploying-a-keras-deep-learning-model-as-a-web-application-in-p-fc0f2354a7ff) 📘\n* [How to write Web apps using simple Python for Data Scientists?](https:\u002F\u002Ftowardsdatascience.com\u002Fhow-to-write-web-apps-using-simple-python-for-data-scientists-a227a1a01582) 📘\n\n***\n## Big Data and Spark\n* [55 Apache Spark Interview Questions](https:\u002F\u002Fwww.edureka.co\u002Fblog\u002Finterview-questions\u002Ftop-apache-spark-interview-questions-2016\u002F) 📘\n* [10 Questions you can expect in a Spark Interview](https:\u002F\u002Fmedium.com\u002Fanalytics-vidhya\u002F10-questions-you-can-expect-in-spark-interview-24b89b807dfb) 📘\n* [Hive Interview Questions](https:\u002F\u002Fwww.tutorialspoint.com\u002Fhive\u002Fhive_interview_questions.htm) 📘\n* [Top 20 Apache Spark Interview Questions](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=Y8LKEDyA5iY) 📹\n* [Spark Interview Questions - The entire playlist](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PLtfmIPhU2DkNjQjL08kR3cd4kUzWqS0vg) 📹\n* [Another fabulous Playlist for Spark Interview Questions](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PL9sbKmQTkW05mXqnq1vrrT8pCsEa53std) 📹\n* [Practical PySpark tips for Data Scientists](https:\u002F\u002Ftowardsdatascience.com\u002Fpractical-spark-tips-for-data-scientists-145d85e9b2d8) 📘\n* [3 Ways to parallelize your code using Spark](https:\u002F\u002Ftowardsdatascience.com\u002F3-methods-for-parallelization-in-spark-6a1a4333b473) 📘\n* [Datashader - Revealing the Structure of Genuinely Big Data](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=6m3CFbKmK_c) 📹\n* [Lightnings Talk : What one should know about Spark-MLlib](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=DBxcua0Vmvk) 📹\n* [Solving “Container Killed by Yarn For Exceeding Memory Limits” Exception in Apache Spark](https:\u002F\u002Fmedium.com\u002Fanalytics-vidhya\u002Fsolving-container-killed-by-yarn-for-exceeding-memory-limits-exception-in-apache-spark-b3349685df16) 📘\n\n***\n## Some amazing stuff on Python and Spark \nYou can't afford to miss this if you are interviewing for a Big data role.\n* [Improving Python and Spark performance](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=qIKImANLFtE) 📹\n* [High Performance Python on Spark](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=abZ0f5ug18U) 📹\n* [Vectorized UDFs: Scalable Analysis with Python and PySpark](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=Til-StSDvfA) 📹\n\n***\n## General Interview Questions across the Spectrum (Video)\n\n* [Common Data Science Interview Questions - Edureka](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=tTAieUcNHdY)\n* [Common Machine Learning Interview Question - Edureka](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=t6gOpFLt-Ks)\n* [Top 5 algorithms used in Data Science](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=BfowBtIxNu4) \n* [Common Data Science Interview Questions - Analytics University](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=BfowBtIxNu4) \n* [3 types of Data Science Interview Questions](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=4Z6lxfglvUU)\n* [Lessons learned the hard way - Hacking the Data Science Interview](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=3BRLGRqj8p)\n* [What it's like to Interview as a Data Scientist](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=0HmAEWPfMnM)\n* [5 Tips for getting a Data Science Job](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=MfP-P8EHGBo)\n* [8 Frequently used Data Science Algorithms](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=z3wMgOTSE5s) \n* [Scenario Based Practical Interview](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=bJPhEa3mbwo)\n* [KNN v\u002Fs K Means](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=OClrEI_5Ri4)\n\n\n## General Interview Questions across the Spectrum (Reading)\n* [The Data Science Interview Guide](https:\u002F\u002Ftowardsdatascience.com\u002Fdata-science-interview-guide-4ee9f5dc778)\n* [Top 30 Data Science Interview Questions](https:\u002F\u002Ftowardsdatascience.com\u002Ftop-30-data-science-interview-questions-7dd9a96d3f5c)\n* [35 Important Data Science Interview Questions](https:\u002F\u002Fwww.edureka.co\u002Fblog\u002Finterview-questions\u002Fdata-science-interview-questions\u002F)\n* [100 Data Science Interview Questions across FAANG](https:\u002F\u002Fmedium.com\u002F@e22aafa7d95\u002Fc5a66186769a)\n* [The Most Comprehensive Data Science Interview Guide](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2018\u002F06\u002Fcomprehensive-data-science-machine-learning-interview-guide\u002F)\n* [41 essential ML interview questions - Springboard](https:\u002F\u002Fwww.springboard.com\u002Fblog\u002Fmachine-learning-interview-questions\u002F)\n* [30 days of Data Science Interview Preparation - iNeuron](https:\u002F\u002Fgithub.com\u002FiNeuronai\u002Finterview-question-data-science-)\n* [109 Data Science Interview Questions - Springboard](https:\u002F\u002Fwww.springboard.com\u002Fblog\u002Fdata-science-interview-questions\u002F)\n* [Most asked Data Science interview questions in India - Springboard](https:\u002F\u002Fin.springboard.com\u002Fblog\u002Fmost-asked-data-science-interview-questions-in-india\u002F)\n* [List of AI Startups in India and resources for preparing for the interview](https:\u002F\u002Fgithub.com\u002Ftheainerd\u002FMLInterview)\n* [5 interview questions to predict a good Data Scientist](https:\u002F\u002Fmedium.com\u002Fpredict\u002Ffive-interview-questions-to-predict-a-good-data-scientist-40d310cdcd68)\n* [8 proven ways to improve the accuracy of your ML model ](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2015\u002F12\u002Fimprove-machine-learning-results\u002F)\n* [60 Interview Questions on Machine Learning - AnalyticsIndiaMag](https:\u002F\u002Fanalyticsindiamag.com\u002F60-interview-questions-on-machine-learning\u002F)\n* [The Big List of DS and ML interview Resources](https:\u002F\u002Ftowardsdatascience.com\u002Fthe-big-list-of-ds-ml-interview-resources-2db4f651bd63)\n* [100 Basic Data Science Interview Questions along with answers](https:\u002F\u002Fwww.dezyre.com\u002Farticle\u002F100-data-science-interview-questions-and-answers-general-for-2018\u002F184)\n* [40 interview questions asked at Startups in ML\u002FDS Interview](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2016\u002F09\u002F40-interview-questions-asked-at-startups-in-machine-learning-data-science\u002F)\n* [My Data Science\u002FMachine Learning Job Interview Experience : List of DS\u002FML\u002FDL Questions &#8211; Machine Learning in Action](https:\u002F\u002Fappliedmachinelearning.blog\u002F2018\u002F04\u002F13\u002Fmy-data-science-machine-learning-job-interview-experience-list-of-ds-ml-dl-questions\u002F)\n* [How do I prepare for a Data Science phone interview at Airbnb](https:\u002F\u002Fwww.quora.com\u002FHow-do-I-prepare-for-a-phone-interview-for-a-data-scientist-position-with-Airbnb)\n* [Best ML algorithm for regression problems](https:\u002F\u002Ftowardsdatascience.com\u002Fselecting-the-best-machine-learning-algorithm-for-your-regression-problem-20c330bad4ef)\n* [How to ace the In person Data Science Interview](https:\u002F\u002Ftowardsdatascience.com\u002Fhow-to-ace-the-in-person-data-science-interview-584ca11df08a)\n* [How to land a Data Scientist job at Airbnb](https:\u002F\u002Ftowardsdatascience.com\u002Fhow-to-land-a-data-scientist-job-at-your-dream-company-my-journey-to-airbnb-f6a1e99892e8)\n* [120 Data Science Interview Questions(from all domains)](https:\u002F\u002Fgithub.com\u002Fkojino\u002F120-Data-Science-Interview-Questions)\n* [Understanding the Bias-Variance Tradeoff](https:\u002F\u002Ftowardsdatascience.com\u002Funderstanding-the-bias-variance-tradeoff-165e6942b229)\n* [You Need these Cheatsheets if you are tackling ML algorithms](https:\u002F\u002Fmedium.freecodecamp.org\u002Fyou-need-these-cheat-sheets-if-youre-tackling-machine-learning-algorithms-45da5743888e)\n* [Red Flags in a Data Science Interview](https:\u002F\u002Ftowardsdatascience.com\u002Fred-flags-in-data-science-interviews-4f492bbed4c4)\n* [A Data Scientist's take on Interview Questions](https:\u002F\u002Ftowardsdatascience.com\u002Fmy-take-on-data-scientist-interview-questions-part-1-6df22252b2e8)\n* [What is Cross Entropy(Nice and Short Explanation)](https:\u002F\u002Fstackoverflow.com\u002Fquestions\u002F41990250\u002Fwhat-is-cross-entropy\u002F41990932#41990932)\n* [What does an ideal Data Scientist's profile look like](https:\u002F\u002Ftowardsdatascience.com\u002Fwhat-does-an-ideal-data-scientists-profile-look-like-7d7bd78ff7ab)\n* [25 Fun Questions for a Machine Learning interview](https:\u002F\u002Fmedium.com\u002Fanalytics-vidhya\u002F25-fun-questions-for-a-machine-learning-interview-373b744a4faa)\n* [How to Prepare for Machine Learning Interviews](https:\u002F\u002Ftowardsdatascience.com\u002Fhow-to-prepare-for-machine-learning-interviews-5fac3db58168)\n* [How to develop a Machine Learning Model from scratch](https:\u002F\u002Ftowardsdatascience.com\u002Fmachine-learning-general-process-8f1b510bd8af)\n* [End to End guide for a Machine Learning Project](https:\u002F\u002Fmedium.com\u002Ffintechexplained\u002Fend-to-end-guide-for-machine-learning-project-146c288186dc)\n* [Classification v\u002Fs Regression](https:\u002F\u002Fmedium.com\u002Ffintechexplained\u002Fsupervised-machine-learning-regression-vs-classification-18b2f97708de)\n* [Must Know mathematical measures for Every Data Scientist](https:\u002F\u002Fmedium.com\u002Ffintechexplained\u002Fmust-know-mathematical-measures-for-data-scientist-15bfc4f7f39c)\n* [Where did the least square come from](https:\u002F\u002Ftowardsdatascience.com\u002Fwhere-did-the-least-square-come-from-3f1abc7f7caf)\n* [Regularization in Machine Learning - Explained](https:\u002F\u002Ftowardsdatascience.com\u002Fregularization-in-machine-learning-76441ddcf99a)\n\n\n***\n## Interesting Reads\n* [3 Common Data Science Career Transitions and how to make them happen](https:\u002F\u002Ftowardsdatascience.com\u002F3-common-data-science-career-transitions-and-how-to-make-them-happen-588c3618942f)\n* [Navigating the Data Science Career Landscape](https:\u002F\u002Fhackernoon.com\u002Fnavigating-the-data-science-career-landscape-db746a61ac62)\n* [Which model and how much data](https:\u002F\u002Ftowardsdatascience.com\u002Fwhich-model-and-how-much-data-75a3999660f3)\n","[![HitCount](http:\u002F\u002Fhits.dwyl.io\u002F{username}\u002F{repo}.svg)](http:\u002F\u002Fhits.dwyl.io\u002F{rbhatia46}\u002F{Data-Science-Interview-Resources})\n![给这个仓库点个赞](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Frbhatia46\u002FData-Science-Interview-Resources?style=social)\n\n# 数据科学面试资源\n\n**更新：** 基于过去几年丰富的面试经验，我最近决定开设一个专门的频道，帮助大家在数据科学领域脱颖而出。我的目标是为所有希望在即将到来的面试前重温基础知识，或掌握成功通过数据科学面试并将其应用于实际工作所需的技能和深入知识的人们，打造一份全面的资源指南。该频道旨在清晰地讲解日常工作中使用的各种技术，涵盖广泛的机器学习主题。欢迎在此探索：\u003Cbr\u002F>[\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frbhatia46_Data-Science-Interview-Resources_readme_c1c7be8f5306.jpg\" width=70 height=70>](https:\u002F\u002Fwww.youtube.com\u002Fchannel\u002FUC2vHbm_9kzjrRamX8ArGdkA)\n\n\n首先，感谢您访问本仓库！恭喜您做出了一个出色的职业选择。我希望通过分享自己在大型产品型公司和快速成长的初创企业中多次参与面试的经验，帮助您获得梦寐以求的数据科学职位。希望这些内容对您有所帮助。\n\n随着对数据科学家需求的不断增加，成功通过简历筛选并获得面试机会变得愈发困难。在这个仓库中，我整理了从如何顺利通过简历筛选到如何在面试中表现出色、最终拿到理想职位的所有相关内容，请务必充分利用以下资源。\n\n我在这里列出的每项资源都经过亲自验证，其中大多数是我个人使用过的，对我帮助很大。\n\n**温馨提示：** 数据科学\u002F机器学习领域非常广泛，需要学习的内容也很多。这份清单绝非详尽无遗，而只是为那些正在寻找优质备考资源的朋友提供一些参考。不过，我会定期更新并不断完善，力求将所有内容整合到一处，让您能够轻松应对各类面试！\n\n**如果您认可我的努力，请为本项目点亮一颗星吧！**\n\n**注：** 如需贡献内容，请参阅 [Contribution.md](https:\u002F\u002Fgithub.com\u002Frbhatia46\u002FData-Science-Interview-Resources\u002Fblob\u002Fmaster\u002FContribution.md)。\n\n## 如何获得面试机会？\n\n* 首先，也是最重要的，**培养必要的技能并扎实掌握基础知识**。以下是一些您应该非常熟悉的领域：\n  - 业务理解（这一点在各个职级都至关重要，尤其是对于拥有三年以上经验的专业人士）\n  - SQL与数据库（非常重要）\n  - 编程能力（首选Python；如果会Scala，在某些特定岗位上会更有优势）\n  - 数学（概率论、统计学、线性代数和微积分）—— https:\u002F\u002Fmedium.com\u002F@rbhatia46\u002Fessential-probability-statistics-concepts-before-data-science-bb787b7a5aef\n  - 机器学习（包括深度学习）及模型构建\n  - 数据结构与算法（对于FAANG等顶级产品型公司而言，这是必须掌握的内容）\n  - 行业知识（对于大多数岗位来说是可选的，但某些根据公司需求设定的岗位则极为重要）\n  - 文献综述（研究型岗位必备）：如今，能够在行业中阅读并理解新的研究论文，已成为一项极其关键且备受重视的能力，因为各大优秀企业的研发与创新文化正日益蓬勃发展。\n  - 沟通能力——如今，能够向业务相关方和高管清晰解释分析结果，已经成为数据科学家的一项重要技能。\n  - 一定的工程知识（非强制要求，但具备会有加分）：例如开发RESTful API、编写简洁优雅的代码以及掌握面向对象编程等，都可以为您在面试中增添亮点。\n  - 大数据相关知识（并非大多数岗位的硬性要求，但具备会有帮助）：Spark、Hive、Hadoop、Sqoop。\n\n* **打造个人品牌**\n  - 构建优秀的GitHub\u002F作品集，展示您解决过的实际案例。尽量选择端到端的完整用例，能够体现从业务理解到模型部署的整个数据科学生命周期。\n  - 撰写博客，如果您喜欢教学，可以开通YouTube频道或撰写书籍。\n  - 制作一份数字化、易于打开、清晰易读、简洁明了且便于定制的简历\u002FCV，务必附上每个案例的演示链接和源代码。\n  - 参与Kaggle竞赛，建立良好的Kaggle个人主页，并将其推荐给潜在雇主，以提高快速获得面试邀请的机会。\n\n* **拓展人脉关系**，通过LinkedIn、参加行业会议等方式，尽一切可能获取内部推荐，借助良好的人脉关系开启您的面试之旅。定期与在顶尖产品型企业或快速成长的初创公司工作的数据科学家保持联系，逐步建立起自己的职业网络，这一步至关重要。\n\n## 简历\u002FCV 一些技巧：\n* 以**可量化**的方式描述过往角色及你所产生的影响，务必简洁明了，并再次强调——**量化**你的成果，而非泛泛而谈无关紧要的事实。根据谷歌招聘官的建议，可以使用 XYZ 公式：\n```通过 [Z] 的方式，实现了 [X]，其效果以 [Y] 衡量```\n\n* 简历篇幅宜短，最好不超过两页。众所周知，平均每位招聘人员仅用6秒钟浏览你的简历，并据此做出判断。\n\n* 如果你是应届毕业生且缺乏工作经验，不妨尝试完成端到端的项目案例，并在简历中提及，最好附上演示链接（便于招聘人员快速了解）以及 GitHub 上的源代码链接。\n\n* 避免过多使用技术术语；同时毋庸赘言，切勿提及你不熟悉或不自信的内容，这可能会在面试中成为重大障碍。\n\n* 一些实用链接：\n  * [构建数据项目作品集的建议](https:\u002F\u002Fmedium.com\u002F@jasonkgoodman\u002Fadvice-on-building-data-portfolio-projects-c5f96d8a0627) 📘\n  * [如何撰写一份出色的软件工程师简历](https:\u002F\u002Fwww.freecodecamp.org\u002Fnews\u002Fwriting-a-killer-software-engineering-resume-b11c91ef699d\u002F) 📘\n  * [让你的数据科学简历顺利通过 ATS 系统](https:\u002F\u002Ftowardsdatascience.com\u002Fup-level-your-data-science-resume-getting-past-ats-64322f0cbb73) 📘\n  * [如何写出一份招聘经理愿意认真阅读的开发者简历](https:\u002F\u002Fwww.freecodecamp.org\u002Fnews\u002Fhow-to-write-a-resume-that-works\u002F) 📘\n\n***\n\n* 若想快速复习数学基础知识，请参考以下链接：https:\u002F\u002Fmedia-exp2.licdn.com\u002Fdms\u002Fdocument\u002FC4D1FAQFzFmR919-Erw\u002Ffeedshare-document-pdf-analyzed\u002F0\u002F1655384106479?e=1656547200&v=beta&t=9bm4OUyWfM1dQR8LWXsLrGDqYz_Yr_e7TJxHXLXe36I\n\n* 若想快速复习统计学和机器学习基础，请参考以下链接：https:\u002F\u002Fmedia-exp2.licdn.com\u002Fdms\u002Fdocument\u002FC4D1FAQFLvzVgVxYAAA\u002Ffeedshare-document-pdf-analyzed\u002F0\u002F1656265480370?e=1657152000&v=beta&t=RD90ZEx3x2VLUGSthO-1uYKadzwTRixKRg3s8j2nvOc\n\n## 概率、统计与线性代数\n* [理解描述性统计的基础知识（面试中非常重要）](https:\u002F\u002Ftowardsdatascience.com\u002Funderstanding-descriptive-statistics-c9c2b0641291) 📘\n* [数据科学面试中的40道概率题](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2017\u002F04\u002F40-questions-on-probability-for-all-aspiring-data-scientists\u002F) 📘\n* [数据科学家统计学面试的40道问题与解答](https:\u002F\u002Ftowardsdatascience.com\u002F40-statistics-interview-problems-and-answers-for-data-scientists-6971a02b7eee) 📘\n* [深度学习背景下的概率与统计](https:\u002F\u002Ftowardsdatascience.com\u002Fprobability-and-statistics-explained-in-the-context-of-deep-learning-ed1509b2eb3f) 📘\n* [概率与似然的区别](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=pYxNSUDSFH4) 📹\n* [自助法——数据科学家的瑞士军刀](https:\u002F\u002Fmedium.com\u002Fdata-science-journal\u002Fthe-bootstrap-the-swiss-army-knife-of-any-data-scientist-acd6e592be13) 📘\n* [面向数据科学家的置信区间通俗解释](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2020\u002F02\u002F21\u002Fci\u002F) 📘\n* [面向数据科学家的 p 值通俗解释](https:\u002F\u002Ftowardsdatascience.com\u002Fp-value-explained-simply-for-data-scientists-4c0cd7044f14) 📘\n* [PDF 并非概率](https:\u002F\u002Ftowardsdatascience.com\u002Fpdf-is-not-a-probability-5a4b8a5d9531) 📘\n* [每个数据科学家都应掌握的5种采样算法](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2019\u002F07\u002F30\u002Fsampling\u002F) 📘\n* [数据科学家必须掌握的10种统计技术](https:\u002F\u002Fwww.kdnuggets.com\u002F2017\u002F11\u002F10-statistical-techniques-data-scientists-need-master.html) 📘\n* [应用线性代数速成课程](https:\u002F\u002Fyoutu.be\u002FwkxgZirbCr4?si=6jk888FeJQYDzIgy) 📹\n\n***\n\n## SQL 与数据获取\nSQL 很可能是你数据科学项目的起点，也是每位数据科学家最重要的技能之一。\n\n* [数据科学家常见的5道SQL面试题](https:\u002F\u002Ftowardsdatascience.com\u002F5-common-sql-interview-problems-for-data-scientists-1bfa02d8bae6) 📘\n* [测试数据科学家SQL能力的46道题目](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2017\u002F01\u002F46-questions-on-sql-to-test-a-data-science-professional-skilltest-solution\u002F) 📘\n* [前Facebook数据科学家精选的FAANG公司常见30道SQL面试题](https:\u002F\u002Fwww.nicksingh.com\u002Fposts\u002F30-sql-and-database-design-questions-from-real-data-science-interviews) 📘\n* [SQL面试题库](https:\u002F\u002F365datascience.com\u002Fsql-interview-questions\u002F) 📘\n* [如何在数据科学面试中攻克SQL难关](https:\u002F\u002Ftowardsdatascience.com\u002Fhow-to-ace-data-science-interviews-sql-b71de212e433) 📘\n* [通过数据科学面试必知的3道SQL题](https:\u002F\u002Fmedium.com\u002F@jayfeng\u002Fthree-must-know-sql-questions-to-pass-your-data-science-interview-463311c7eaea) 📘\n* [面试中常考的10条SQL查询语句](https:\u002F\u002Fwww.java67.com\u002F2013\u002F04\u002F10-frequently-asked-sql-query-interview-questions-answers-database.html) 📘\n* [数据科学专业面试中的技术问题：SQL与编程](https:\u002F\u002Fhackernoon.com\u002Ftechnical-data-science-interview-questions-sql-and-coding-jv1k32bf) 📘\n* [如何优化SQL查询——Datacamp](https:\u002F\u002Fwww.datacamp.com\u002Fcommunity\u002Ftutorials\u002Fsql-tutorial-query) 📘\n* [数据科学面试中应掌握的10个SQL概念](https:\u002F\u002Ftowardsdatascience.com\u002Ften-sql-concepts-you-should-know-for-data-science-interviews-7acf3e428185) 📘\n\n***\n\n## 数据准备与可视化\n\n* [每位数据科学家都应掌握的5种特征选择算法](https:\u002F\u002Ftowardsdatascience.com\u002Fthe-5-feature-selection-algorithms-every-data-scientist-need-to-know-3a6b566efd2) 📘 \n* [填补数据集中缺失值的6种不同方法](https:\u002F\u002Ftowardsdatascience.com\u002F6-different-ways-to-compensate-for-missing-values-data-imputation-with-examples-6022d9ca0779) 📘 \n* [异常值检测技术简述](https:\u002F\u002Ftowardsdatascience.com\u002Fa-brief-overview-of-outlier-detection-techniques-1e0b2c19e561) 📘 \n* [使用Python进行数据清洗与预处理——最佳实践与实用工具包](https:\u002F\u002Fmedium.com\u002F@rrfd\u002Fcleaning-and-prepping-data-with-python-for-data-science-best-practices-and-helpful-packages-af1edfbe2a3) 📘 \n* [何时使用哪种图表进行可视化](https:\u002F\u002Ftowardsdatascience.com\u002Fwhat-plot-why-this-plot-and-why-not-9508a0cb35ea) 📘\n* [检测并移除异常值的方法](https:\u002F\u002Ftowardsdatascience.com\u002Fways-to-detect-and-remove-the-outliers-404d16608dba) 📘\n* [机器学习中类别不平衡问题的处理](https:\u002F\u002Ftowardsdatascience.com\u002Fdealing-with-imbalanced-classes-in-machine-learning-d43d6fa19d2) 📘\n* [更智能的分类数据编码方式](https:\u002F\u002Ftowardsdatascience.com\u002Fsmarter-ways-to-encode-categorical-data-for-machine-learning-part-1-of-3-6dca2f71b159)\n* [Numpy和Pandas速查表](https:\u002F\u002Fgithub.com\u002Fjessicayung\u002Fdata-analyst-nd\u002Fblob\u002Fmaster\u002F2-intro-to-data-analysis\u002Fnumpy_pandas_cheatsheet.pdf) 📘\n* [处理异常值的3种方法](https:\u002F\u002Fwww.kdnuggets.com\u002F2017\u002F01\u002F3-methods-deal-outliers.html) 📘\n* [特征选择技术](https:\u002F\u002Ftowardsdatascience.com\u002Ffeature-selection-techniques-in-machine-learning-with-python-f24e7da3f36e) 📘\n* [为何、如何以及何时对特征进行标准化](https:\u002F\u002Fmedium.com\u002Fgreyatom\u002Fwhy-how-and-when-to-scale-your-features-4b30ab09db5e) 📘\n* [关于散点图你需要知道的一切](https:\u002F\u002Ftowardsdatascience.com\u002Feverything-you-need-to-know-about-scatter-plots-for-data-visualisation-924144c0bc5) 📘\n* [如何为机器学习选择特征](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=YaKMeAlHgqQ) 📹\n* [特征选择的10种方法](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=Erp0bjEPoM8) 📹\n\n\n\n***\n## 经典机器学习算法\n\n* [14分钟内讲解所有机器学习算法](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=BT6Aw6Q75Yg)\n\n### 1. 逻辑回归\n\n* [一篇文章讲透逻辑回归](https:\u002F\u002Ftowardsdatascience.com\u002Flogistic-regression-b0af09cdb8ad) 📘 \n* [循序渐进理解逻辑回归](https:\u002F\u002Ftowardsdatascience.com\u002Funderstanding-logistic-regression-step-by-step-704a78be7e0a) 📘 \n* [逻辑回归——简明清晰讲解，9分钟](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=yIYKR4sgzI8) 📹\n* [线性回归与逻辑回归的区别](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=OCwZyYH14uw) 📹\n* [检验数据科学家逻辑回归知识的30道题](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2017\u002F08\u002Fskilltest-logistic-regression\u002F) 📘 \n* [逻辑回归——理论+数学+代码，一视频全搞定](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=VCJdg7YBbAQ) 📹\n* [Lasso、Ridge与逻辑回归，三者合一](https:\u002F\u002Fwww.youtube.com\u002Flive\u002FvaQxdBEcBzU?si=3judBH9xcRefRwGP) 📹\n\n\n### 2. 线性回归\n\n* [检验数据科学家线性回归知识的30道题](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2017\u002F07\u002F30-questions-to-test-a-data-scientist-on-linear-regression\u002F) 📘 \n* [线性回归——理论+数学+代码，一视频全搞定](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=E5RjzSK0fvY) 📹\n* [5种回归类型及其特性](https:\u002F\u002Ftowardsdatascience.com\u002F5-types-of-regression-and-their-properties-c5e1fa12d55e) 📘\n* [Ridge回归——清晰讲解](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=Q81RR3yKn30) 📹\n* [Lasso回归——清晰讲解](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=NGf0voTMlcs) 📹\n\n\n### 3. 基于树\u002F集成算法\n\n* [检验数据科学家树模型知识的30道题](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2017\u002F09\u002F30-questions-test-tree-based-models\u002F) 📘\n* [基尼指数 vs 信息熵](https:\u002F\u002Ftowardsdatascience.com\u002Fgini-index-vs-information-entropy-7a7e4fed3fcb) 📘\n* [决策树 vs 随机森林——该用哪种算法？](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2020\u002F05\u002Fdecision-tree-vs-random-forest-algorithm\u002F) 📘\n* [为什么随机森林不适用于时间序列？](https:\u002F\u002Fmedium.com\u002Fdatadriveninvestor\u002Fwhy-wont-time-series-data-and-random-forests-work-very-well-together-3c9f7b271631) 📘\n* [集成模型全面指南](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2018\u002F06\u002Fcomprehensive-guide-for-ensemble-models\u002F) 📘\n* [3种决策树分裂准则背后的简单数学原理](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2019\u002F11\u002F12\u002Fdtsplits\u002F) 📘\n\n\n### 4. K近邻算法\n\n* [KNN基础面试题——快速回顾](http:\u002F\u002Ftheprofessionalspoint.blogspot.com\u002F2019\u002F01\u002Fknn-algorithm-in-machine-learning.html) 📘\n* [检验数据科学家KNN知识的30道题](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2017\u002F09\u002F30-questions-test-k-nearest-neighbors-algorithm\u002F) 📘\n* [KNN的优缺点](https:\u002F\u002Fwww.fromthegenesis.com\u002Fpros-and-cons-of-k-nearest-neighbors\u002F) 📘\n* [KNN算法——理论+数学+代码，一视频全搞定](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=6kZ-OPLNcgE) 📹\n\n### 5. 支持向量机\n\n* [一篇讲透SVM——数学、术语、直观理解、核函数](https:\u002F\u002Ftowardsdatascience.com\u002Fsupport-vector-machines-svm-c9ef22815589) 📘\n* [检验数据科学家SVM知识的25道题](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2017\u002F10\u002Fsvm-skilltest\u002F) 📘\n\n### 6. 朴素贝叶斯\n\n* [充分利用朴素贝叶斯的12个技巧](https:\u002F\u002Fmachinelearningmastery.com\u002Fbetter-naive-bayes\u002F) 📘\n* [朴素贝叶斯——理论+数学+代码，一视频全搞定](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=vz_xuxYS2PM) 📹\n* [轻松掌握朴素贝叶斯的6个步骤](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2017\u002F09\u002Fnaive-bayes-explained\u002F) 📘\n\n\n***\n## 时间序列\n* [检验数据科学家时间序列知识的40道题](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2017\u002F04\u002F40-questions-on-time-series-solution-skillpower-time-series-datafest-2017\u002F) 📘\n* [11种经典的时间序列预测方法](https:\u002F\u002Fmachinelearningmastery.com\u002Ftime-series-forecasting-methods-in-python-cheat-sheet\u002F) 📘\n* [ARIMA相关面试题](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=654SjiZO5ks) 📹\n\n***\n\n## 无监督学习\n* [PCA（主成分分析）的注意事项](https:\u002F\u002Fmedium.com\u002F@sadatnazrul\u002Fthe-dos-and-donts-of-principal-component-analysis-7c2e9dc8cc48) 📘\n* [t-SNE简介：DataCamp](https:\u002F\u002Fwww.datacamp.com\u002Fcommunity\u002Ftutorials\u002Fintroduction-t-sne) 📘\n* [降维：提取精华](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=4QMFNg7tjbk) 📘\n* [面向小白的降维技术：第一部分——直觉](https:\u002F\u002Ftowardsdatascience.com\u002Fhttps-medium-com-abdullatif-h-dimensionality-reduction-for-dummies-part-1-a8c9ec7b7e79) 📘\n* [DBSCAN算法深度解析](https:\u002F\u002Ftowardsdatascience.com\u002Fexplaining-dbscan-clustering-18eaf5c83b31) 📘\n\n***\n## 推荐系统\n* [推荐系统概览](https:\u002F\u002Fwww.kdnuggets.com\u002F2020\u002F07\u002Frecommender-systems-nutshell.html)\n\n***\n## 深度学习\n* [为什么正则化可以减少深度神经网络的过拟合](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=4nqD5TBlOWU) 📹\n* [神经网络的优缺点](https:\u002F\u002Ftowardsdatascience.com\u002Fhype-disadvantages-of-neural-networks-6af04904ba5b) 📘\n* [何时不应使用神经网络](https:\u002F\u002Fmedium.com\u002Fdatadriveninvestor\u002Fwhen-not-to-use-neural-networks-89fb50622429) 📘\n* [测试数据科学家深度学习知识的40道题](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2017\u002F04\u002F40-questions-test-data-scientist-deep-learning\u002F) 📘\n* [21道热门深度学习面试题](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2020\u002F04\u002Fcomprehensive-popular-deep-learning-interview-questions-answers\u002F) 📘\n* [Edureka深度学习面试题](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=HGXlFG_Rz4E) 📹\n* [神经网络中的激活函数详解](https:\u002F\u002Ftowardsdatascience.com\u002Factivation-functions-neural-networks-1cbd9f8d91d6) 📘\n* [梯度消失与爆炸现象清晰解析](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=qO_NLVjD6zE) 📹\n* [偏差与方差非常清晰的解释](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=EuBBz3bI-aA) 📹\n* [为什么ReLU比Sigmoid更好](https:\u002F\u002Fstats.stackexchange.com\u002Fquestions\u002F126238\u002Fwhat-are-the-advantages-of-relu-over-sigmoid-function-in-deep-neural-networks) 📘\n* [25道深度学习面试题，检验你的知识](https:\u002F\u002Ftowardsdatascience.com\u002F50-deep-learning-interview-questions-part-1-2-8bbc8a00ec61) 📘\n* [2020年需牢记的10条深度学习最佳实践](https:\u002F\u002Fnanonets.com\u002Fblog\u002F10-best-practices-deep-learning\u002F) 📘\n\n\n***\n## 生成式AI与大模型\n\n* [LoRA详解](https:\u002F\u002Fyoutu.be\u002FhcAx89iwkMU) 📹\n* [RAG vs 微调 vs 提示工程](https:\u002F\u002Fyoutu.be\u002F-ZwhMnNwvxA) 📹\n* [交叉编码器与双向编码器：文本编码方法深度解析](https:\u002F\u002Fmedium.com\u002F@rbhatia46\u002Fcross-encoders-vs-bi-encoders-a-deep-dive-into-text-encoding-methods-d9aa890d6ca4) 📘\n* [RAG入门](https:\u002F\u002Fdeveloper.nvidia.com\u002Fblog\u002Frag-101-retrieval-augmented-generation-questions-answered\u002F)\n* [生成式AI概览](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=2IK3DFHRFfw) 📹\n* [BERT理论深度解析视频](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=90mGPxR2GgY) 📹\n* [Transformer理论深度解析视频](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=bCz4OMemCcA) 📹\n* [注意力机制背后的数学：键、查询和值矩阵](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=UPtG_38Oq8o) 📹\n\n\n***\n## 机器学习系统设计\n\n* [如何回答任何机器学习系统设计面试题](https:\u002F\u002Ftowardsdatascience.com\u002Fhow-to-answer-any-machine-learning-system-design-interview-question-a98656bb7ff0) 📘\n\n\n\n\n***\n## 机器学习可解释性\n* [解读机器学习模型世界的四个问题](https:\u002F\u002Fnarrativescience.com\u002Fresource\u002Fblog\u002Fmachine-learning-models\u002F) 📘\n* [Kaggle提供的机器学习可解释性速成课程](https:\u002F\u002Fwww.kaggle.com\u002Flearn\u002Fmachine-learning-explainability) 📘\n* [SHAP值简单解释](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=VB9uV-x0gtg) 📹\n\n***\n## 案例研究\n案例研究对面试至关重要，以下是一些练习资源，请先思考再查看答案。\n* [出租车聚合平台的兴起](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2016\u002F04\u002Fcase-study-analytics-interviews-dawn-taxi-aggregators\u002F) 📘\n* [优化在线商家的产品价格](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2016\u002F07\u002Fsolving-case-study-optimize-products-price-online-vendor-level-hard\u002F) 📘\n* [案例研究面试技巧](https:\u002F\u002Fworkera.ai\u002Fresources\u002Fdata-science-case-study-interview\u002F) 📘\n* [Mercari价格预测](https:\u002F\u002Ftowardsdatascience.com\u002Fa-data-science-case-study-with-python-mercari-price-prediction-4e852d95654) 📘\n* [端到端多分类文本分类流程](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2020\u002F05\u002F24\u002Fmultitextclass\u002F) 📘\n* [端到端多分类图像分类流程](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2020\u002F06\u002F06\u002Fmulticlass_image_classification_pytorch\u002F) 📘\n* [Nagarro公司1000多种产品的规模化预测](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=8jfDBD6xlFM) 📹\n* [电子商务中的聚类与分类](https:\u002F\u002Flucidworks.com\u002Fpost\u002Fclustering-classification-supervised-unsupervised-learning-ecommerce\u002F) 📘\n* [排序学习ABC](https:\u002F\u002Flucidworks.com\u002Fpost\u002Fabcs-learning-to-rank\u002F) 📘\n* [数据科学案例研究：优化零售业产品陈列](https:\u002F\u002Ftowardsdatascience.com\u002Fdata-science-case-study-optimizing-product-placement-in-retail-part-1-2e8b27e16e8d) 📘\n\n***\n## 自然语言处理\n* [测试数据科学家NLP能力的30道题](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2017\u002F07\u002F30-questions-test-data-scientist-natural-language-processing-solution-skilltest-nlp\u002F)\n* [初学者最常见的11道NLP面试题](https:\u002F\u002Fanalyticsindiamag.com\u002F11-most-commonly-asked-nlp-interview-questions-for-beginners\u002F)\n* [如何解决90%的NLP问题](https:\u002F\u002Fblog.insightdatascience.com\u002Fhow-to-solve-90-of-nlp-problems-a-step-by-step-guide-fda605278e4e)\n* [企业NLP岗位常见面试问题](https:\u002F\u002Fmedium.com\u002Fmodern-nlp\u002Fnlp-interview-questions-f062040f32f7)\n* [基于向量的相似度搜索方法（TF-IDF、BM25、SBERT）](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=ziiF1eFM3_4) 📹\n* [深入理解BERT——最好的系列课程之一，帮助你掌握BERT的基础原理和内部机制，特别感谢Chris McCormick](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PLam9sigHPGwOBuH4_4fr-XvDbe5uneaf6) 📹\n* [词嵌入、CBoW和Skipgram](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=Q95SIG4g7SA) 📹\n* [CBoW vs Skipgram：数据科学面试题](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=WssUXhPoELE) 📹\n\n\n***\n\n## FAANG及类似公司的数据科学面试\n* [亚马逊数据科学家面试练习题](https:\u002F\u002Ftowardsdatascience.com\u002Famazon-data-scientist-interview-practice-problems-15b9b86e86c6) 📘\n* [微软数据科学面试题与答案](https:\u002F\u002Ftowardsdatascience.com\u002Fmicrosoft-data-science-interview-questions-and-answers-69ccac16bd9b) 📘\n* [谷歌数据科学面试中的解题思路](https:\u002F\u002Ftowardsdatascience.com\u002Fgoogles-data-science-interview-brain-teasers-7f3c1dc4ea7f) 📘\n\n***\n## 成为顶尖数据科学家（有空时阅读）\n认真研读这些内容无疑会为你加分，所以如果有时间，千万不要错过。\n\n* [成为顶尖数据科学家的13项必备技能](https:\u002F\u002Ftowardsdatascience.com\u002Ftop-13-skills-to-become-a-rockstar-data-scientist-faf2f97e655d) 📘 \n* [掌握这4个机器学习概念，让你听起来像专家](https:\u002F\u002Ftowardsdatascience.com\u002Funderstand-these-4-advanced-concepts-to-sound-like-a-machine-learning-master-d32843840b52) 📘\n* [我作为数据科学家起步前最希望知道的12件事](https:\u002F\u002Fmedium.com\u002Fdeliberate-data-science\u002F12-things-i-wish-id-known-before-starting-as-a-data-scientist-45989be6300e) 📘\n* [理解数据科学流程](https:\u002F\u002Ftowardsdatascience.com\u002Fa-beginners-guide-to-the-data-science-pipeline-a4904b2d8ad3) 📘\n* [Kaggle数据科学术语表](https:\u002F\u002Fwww.kaggle.com\u002Fshivamb\u002Fdata-science-glossary-on-kaggle) 📘\n* [谷歌机器学习术语表](https:\u002F\u002Fdevelopers.google.com\u002Fmachine-learning\u002Fglossary\u002F) 📘\n* [让你的机器学习预测速度提升50倍——Hummingbird](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2020\u002F06\u002F06\u002Fhummingbird_faster_ml_preds\u002F) 📘\n* [数据科学面试中不应犯的3个错误](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2019\u002F12\u002F24\u002Fmistakes\u002F) 📘\n* [如何为黑盒模型计算特征重要性？](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2019\u002F12\u002F04\u002Fblackbox\u002F) 📘\n\n***\n\n## 数据结构与算法（可选）\n虽然这部分内容可以视情况而定，但如果职位描述明确要求掌握相关知识，或者你正在参加FAANG等知名企业的面试，又或是你本身具有计算机科学背景，那就绝对不能忽视。你不必达到软件工程师级别的水平，但至少要掌握基础知识。\n\n* [数据科学家的数据结构与算法指南](https:\u002F\u002Ftowardsdatascience.com\u002Fa-data-scientists-guide-to-data-structures-algorithms-1176395015a0) 📘\n* [数据科学算法面试中树结构的处理](https:\u002F\u002Ftowardsdatascience.com\u002Fhandling-trees-in-data-science-algorithmic-interview-ea14dd1b6236) 📘\n* [面向数据科学家的链表入门](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2020\u002F01\u002F28\u002Fll\u002F) 📘\n* [数据科学家的动态规划](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2020\u002F01\u002F28\u002Fdp\u002F) 📘\n* [数据科学家需要掌握的3种编程概念](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2019\u002F12\u002F09\u002Fpc\u002F) 📘\n* [数据科学家必知的5种图算法](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2019\u002F09\u002F02\u002Fgraph_algs\u002F) 📘\n\n***\n## 工程与部署\n* [数据科学家快速创建API的简易指南](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2020\u002F06\u002F06\u002Ffastapi_for_data_scientists\u002F) 📘\n* [只需5步，将机器学习模型投入生产环境](https:\u002F\u002Fmlwhiz.com\u002Fblog\u002F2019\u002F12\u002F25\u002Fprod\u002F) 📘\n* [部署机器学习模型的两种方式](https:\u002F\u002Ftowardsdatascience.com\u002Fthere-are-two-very-different-ways-to-deploy-ml-models-heres-both-ce2e97c7b9b1) 📘\n* [如何通过Flask将Keras模型部署为Web应用](https:\u002F\u002Ftowardsdatascience.com\u002Fdeploying-a-keras-deep-learning-model-as-a-web-application-in-p-fc0f2354a7ff) 📘\n* [数据科学家如何用简单Python编写Web应用？](https:\u002F\u002Ftowardsdatascience.com\u002Fhow-to-write-web-apps-using-simple-python-for-data-scientists-a227a1a01582) 📘\n\n***\n## 大数据与Spark\n* [55道Apache Spark面试题](https:\u002F\u002Fwww.edureka.co\u002Fblog\u002Finterview-questions\u002Ftop-apache-spark-interview-questions-2016\u002F) 📘\n* [Spark面试中常见的10个问题](https:\u002F\u002Fmedium.com\u002Fanalytics-vidhya\u002F10-questions-you-can-expect-in-spark-interview-24b89b807dfb) 📘\n* [Hive面试题集](https:\u002F\u002Fwww.tutorialspoint.com\u002Fhive\u002Fhive_interview_questions.htm) 📘\n* [Apache Spark面试Top 20题](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=Y8LKEDyA5iY) 📹\n* [Spark面试题全集播放列表](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PLtfmIPhU2DkNjQjL08kR3cd4kUzWqS0vg) 📹\n* [另一套精彩的Spark面试题播放列表](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PL9sbKmQTkW05mXqnq1vrrT8pCsEa53std) 📹\n* [数据科学家实用PySpark技巧](https:\u002F\u002Ftowardsdatascience.com\u002Fpractical-spark-tips-for-data-scientists-145d85e9b2d8) 📘\n* [使用Spark实现代码并行化的3种方法](https:\u002F\u002Ftowardsdatascience.com\u002F3-methods-for-parallelization-in-spark-6a1a4333b473) 📘\n* [Datashader——揭示真正大数据的结构](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=6m3CFbKmK_c) 📹\n* [闪电演讲：关于Spark MLlib你需要了解什么](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=DBxcua0Vmvk) 📹\n* [解决Apache Spark中“容器因内存超限被Yarn杀死”的异常](https:\u002F\u002Fmedium.com\u002Fanalytics-vidhya\u002Fsolving-container-killed-by-yarn-for-exceeding-memory-limits-exception-in-apache-spark-b3349685df16) 📘\n\n***\n## 关于Python和Spark的一些精彩内容\n如果你正在应聘大数据相关岗位，这些内容绝对不容错过。\n* [提升Python和Spark性能](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=qIKImANLFtE) 📹\n* [在Spark上实现高性能Python](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=abZ0f5ug18U) 📹\n* [向量化UDF：利用Python和PySpark进行可扩展分析](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=Til-StSDvfA) 📹\n\n***\n## 跨领域的通用面试问题（视频）\n* [常见数据科学面试题——Edureka](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=tTAieUcNHdY)\n* [常见机器学习面试题——Edureka](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=t6gOpFLt-Ks)\n* [数据科学中常用的5大算法](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=BfowBtIxNu4) \n* [Analytics University的常见数据科学面试题](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=BfowBtIxNu4) \n* [数据科学面试的3种类型](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=4Z6lxfglvUU)\n* [血泪教训——破解数据科学面试](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=3BRLGRqj8p)\n* [数据科学家面试是什么体验？](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=0HmAEWPfMnM)\n* [获得数据科学工作的5个技巧](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=MfP-P8EHGBo)\n* [8种常用的数据科学算法](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=z3wMgOTSE5s) \n* [情景模拟式实战面试](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=bJPhEa3mbwo)\n* [KNN与K均值对比](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=OClrEI_5Ri4)\n\n## 跨领域通用面试题（阅读）\n* [数据科学面试指南](https:\u002F\u002Ftowardsdatascience.com\u002Fdata-science-interview-guide-4ee9f5dc778)\n* [顶级30道数据科学面试题](https:\u002F\u002Ftowardsdatascience.com\u002Ftop-30-data-science-interview-questions-7dd9a96d3f5c)\n* [35个重要的数据科学面试问题](https:\u002F\u002Fwww.edureka.co\u002Fblog\u002Finterview-questions\u002Fdata-science-interview-questions\u002F)\n* [FAANG公司常见的100道数据科学面试题](https:\u002F\u002Fmedium.com\u002F@e22aafa7d95\u002Fc5a66186769a)\n* [最全面的数据科学面试指南](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2018\u002F06\u002Fcomprehensive-data-science-machine-learning-interview-guide\u002F)\n* [Springboard整理的41个机器学习核心面试问题](https:\u002F\u002Fwww.springboard.com\u002Fblog\u002Fmachine-learning-interview-questions\u002F)\n* [iNeuron推出的30天数据科学面试备考计划](https:\u002F\u002Fgithub.com\u002FiNeuronai\u002Finterview-question-data-science-)\n* [Springboard整理的109道数据科学面试题](https:\u002F\u002Fwww.springboard.com\u002Fblog\u002Fdata-science-interview-questions\u002F)\n* [Springboard总结的印度最常问的数据科学面试题](https:\u002F\u002Fin.springboard.com\u002Fblog\u002Fmost-asked-data-science-interview-questions-in-india\u002F)\n* [印度人工智能初创企业列表及面试备考资源](https:\u002F\u002Fgithub.com\u002Ftheainerd\u002FMLInterview)\n* [预测优秀数据科学家的5个面试问题](https:\u002F\u002Fmedium.com\u002Fpredict\u002Ffive-interview-questions-to-predict-a-good-data-scientist-40d310cdcd68)\n* [提升机器学习模型准确性的8种有效方法](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2015\u002F12\u002Fimprove-machine-learning-results\u002F)\n* [AnalyticsIndiaMag整理的60道机器学习面试题](https:\u002F\u002Fanalyticsindiamag.com\u002F60-interview-questions-on-machine-learning\u002F)\n* [数据科学与机器学习面试资源大全](https:\u002F\u002Ftowardsdatascience.com\u002Fthe-big-list-of-ds-ml-interview-resources-2db4f651bd63)\n* [100道基础数据科学面试题及答案](https:\u002F\u002Fwww.dezyre.com\u002Farticle\u002F100-data-science-interview-questions-and-answers-general-for-2018\u002F184)\n* [Analyticsvidhya整理的40道在初创公司机器学习\u002F数据科学面试中常考的问题](https:\u002F\u002Fwww.analyticsvidhya.com\u002Fblog\u002F2016\u002F09\u002F40-interview-questions-asked-at-startups-in-machine-learning-data-science\u002F)\n* [我的数据科学\u002F机器学习求职面试经历：DS\u002FML\u002FDL问题清单 —— 机器学习实战](https:\u002F\u002Fappliedmachinelearning.blog\u002F2018\u002F04\u002F13\u002Fmy-data-science-machine-learning-job-interview-experience-list-of-ds-ml-dl-questions\u002F)\n* [如何准备Airbnb的数据科学家电话面试？](https:\u002F\u002Fwww.quora.com\u002FHow-do-I-prepare-for-a-phone-interview-for-a-data-scientist-position-with-Airbnb)\n* [回归问题的最佳机器学习算法](https:\u002F\u002Ftowardsdatascience.com\u002Fselecting-the-best-machine-learning-algorithm-for-your-regression-problem-20c330bad4ef)\n* [如何成功通过面对面的数据科学面试？](https:\u002F\u002Ftowardsdatascience.com\u002Fhow-to-ace-the-in-person-data-science-interview-584ca11df08a)\n* [如何进入Airbnb担任数据科学家？](https:\u002F\u002Ftowardsdatascience.com\u002Fhow-to-land-a-data-scientist-job-at-your-dream-company-my-journey-to-airbnb-f6a1e99892e8)\n* [120道涵盖各领域的数据科学面试题](https:\u002F\u002Fgithub.com\u002Fkojino\u002F120-Data-Science-Interview-Questions)\n* [理解偏差-方差权衡](https:\u002F\u002Ftowardsdatascience.com\u002Funderstanding-the-bias-variance-tradeoff-165e6942b229)\n* [如果你正在研究机器学习算法，这些速查表必不可少](https:\u002F\u002Fmedium.freecodecamp.org\u002Fyou-need-these-cheat-sheets-if-youre-tackling-machine-learning-algorithms-45da5743888e)\n* [数据科学面试中的危险信号](https:\u002F\u002Ftowardsdatascience.com\u002Fred-flags-in-data-science-interviews-4f492bbed4c4)\n* [一位数据科学家对面试题的看法](https:\u002F\u002Ftowardsdatascience.com\u002Fmy-take-on-data-scientist-interview-questions-part-1-6df22252b2e8)\n* [什么是交叉熵（简洁明了的解释）](https:\u002F\u002Fstackoverflow.com\u002Fquestions\u002F41990250\u002Fwhat-is-cross-entropy\u002F41990932#41990932)\n* [理想的数据科学家画像是什么样的？](https:\u002F\u002Ftowardsdatascience.com\u002Fwhat-does-an-ideal-data-scientists-profile-look-like-7d7bd78ff7ab)\n* [25个有趣的机器学习面试问题](https:\u002F\u002Fmedium.com\u002Fanalytics-vidhya\u002F25-fun-questions-for-a-machine-learning-interview-373b744a4faa)\n* [如何准备机器学习面试？](https:\u002F\u002Ftowardsdatascience.com\u002Fhow-to-prepare-for-machine-learning-interviews-5fac3db58168)\n* [从零开始构建机器学习模型的方法](https:\u002F\u002Ftowardsdatascience.com\u002Fmachine-learning-general-process-8f1b510bd8af)\n* [机器学习项目全流程指南](https:\u002F\u002Fmedium.com\u002Ffintechexplained\u002Fend-to-end-guide-for-machine-learning-project-146c288186dc)\n* [分类与回归的区别](https:\u002F\u002Fmedium.com\u002Ffintechexplained\u002Fsupervised-machine-learning-regression-vs-classification-18b2f97708de)\n* [每位数据科学家必须掌握的数学指标](https:\u002F\u002Fmedium.com\u002Ffintechexplained\u002Fmust-know-mathematical-measures-for-data-scientist-15bfc4f7f39c)\n* [最小二乘法源自何处？](https:\u002F\u002Ftowardsdatascience.com\u002Fwhere-did-the-least-square-come-from-3f1abc7f7caf)\n* [机器学习中的正则化——详解](https:\u002F\u002Ftowardsdatascience.com\u002Fregularization-in-machine-learning-76441ddcf99a)\n\n\n***\n## 精彩阅读\n* [3种常见的数据科学职业转型及其实现方法](https:\u002F\u002Ftowardsdatascience.com\u002F3-common-data-science-career-transitions-and-how-to-make-them-happen-588c3618942f)\n* [探索数据科学职业发展图景](https:\u002F\u002Fhackernoon.com\u002Fnavigating-the-data-science-career-landscape-db746a61ac62)\n* [选择哪种模型？需要多少数据？](https:\u002F\u002Ftowardsdatascience.com\u002Fwhich-model-and-how-much-data-75a3999660f3)","# Data-Science-Interview-Resources 快速上手指南\n\n**注意**：`Data-Science-Interview-Resources` 并非一个需要安装运行的软件工具或代码库，而是一个** curated（精选）的学习资源列表**。它汇集了数据科学面试所需的核心知识点、文章教程、视频讲解和练习题。因此，本指南将指导你如何高效地利用该仓库进行备考，而非执行安装命令。\n\n## 环境准备\n\n由于本仓库本质上是链接集合，你只需要具备以下基础环境即可开始学习：\n\n*   **操作系统**：Windows, macOS 或 Linux 均可。\n*   **浏览器**：现代浏览器（Chrome, Edge, Firefox 等），用于访问链接中的文章和视频。\n*   **前置知识储备**（根据仓库建议，面试前需掌握）：\n    *   **编程语言**：熟练掌握 Python（首选）或 Scala。\n    *   **数据库**：熟悉 SQL 及数据库基本原理。\n    *   **数学基础**：概率论、统计学、线性代数、微积分。\n    *   **机器学习**：理解常见算法模型及深度学习基础。\n    *   **工程能力**：了解数据结构与算法（特别是针对 FAANG 等大厂），具备基本的 RESTful API 开发及代码规范意识。\n    *   **业务理解**：能够将技术方案转化为商业价值。\n\n## 获取与浏览步骤\n\n无需复杂的安装过程，通过以下方式即可使用：\n\n1.  **访问仓库**：\n    直接在浏览器中打开项目主页：\n    ```text\n    https:\u002F\u002Fgithub.com\u002Frbhatia46\u002FData-Science-Interview-Resources\n    ```\n    *(注：若访问 GitHub 较慢，可尝试使用国内镜像站如 `https:\u002F\u002Fgithub.com.cnpmjs.org\u002Frbhatia46\u002FData-Science-Interview-Resources` 或直接阅读下方的分类指引)*\n\n2.  **克隆仓库（可选）**：\n    如果你希望离线浏览或在本地整理笔记，可以克隆该项目：\n    ```bash\n    git clone https:\u002F\u002Fgithub.com\u002Frbhatia46\u002FData-Science-Interview-Resources.git\n    cd Data-Science-Interview-Resources\n    ```\n\n3.  **查看贡献指南**：\n    在参与贡献或提交新资源前，请查阅：\n    ```text\n    Contribution.md\n    ```\n\n## 基本使用指南\n\n本仓库按面试考察领域进行了分类，建议按照以下路径进行系统性复习：\n\n### 1. 制定复习策略\n根据仓库作者的建议，复习应涵盖从“简历筛选”到“技术面试”的全流程：\n*   **核心技能**：重点攻克 SQL、Python 编程、数学基础（概率\u002F统计\u002F线代）和机器学习模型。\n*   **个人品牌**：整理你的 GitHub 作品集，确保包含端到端（从业务理解到模型部署）的案例，并优化简历（使用 XYZ 公式量化成果）。\n*   **人脉连接**：利用 LinkedIn 和行业会议建立联系，争取内推机会。\n\n### 2. 分模块专项突破\n点击 README 中对应的链接进行深入阅读和练习：\n\n*   **数学与统计基础 (Probability, Statistics and Linear Algebra)**\n    *   重点复习：描述性统计、概率面试题（40 题）、统计推断、置信区间、P 值解释、采样算法。\n    *   推荐资源：阅读 *\"40 Question on probability\"* 和 *\"Crash Course in Applied Linear Algebra\"* 视频。\n\n*   **SQL 与数据获取 (SQL and Data Acquisition)**\n    *   重点复习：常见 SQL 面试题型、查询优化、数据库设计。\n    *   推荐资源：练习 *\"5 Common SQL Interview Problems\"* 及 *\"30 SQL Interview Questions curated for FAANG\"*。\n\n*   **数据预处理与可视化 (Data Preparation and Visualization)**\n    *   重点复习：特征选择算法、缺失值处理、异常值检测、类别数据编码、类别不平衡处理。\n    *   推荐资源：参考 *\"6 Different Ways to Compensate for Missing Values\"* 和 *\"Smarter ways to encode categorical data\"*。\n\n*   **机器学习与建模 (Machine Learning)**\n    *   （注：原文此处被截断，但通常包含回归、分类、聚类、评估指标等内容，请在原仓库中继续向下滚动查看完整列表）。\n\n### 3. 快速查漏补缺\n如果你时间紧迫，可以使用仓库中提供的快速复习文档（PDF\u002F链接）：\n*   **数学基础速查**：访问仓库中提供的 LinkedIn 文档链接复习数学基础。\n*   **统计与 ML 速查**：访问对应的统计与机器学习核心概念总结文档。\n\n### 4. 实战演练\n*   **刷题**：利用仓库中提供的 Analytics Vidhya 和 Towards Data Science 的专题文章进行模拟面试答题。\n*   **作品集构建**：参照 *\"Advice on building Data Portfolio Projects\"* 指南，完善你的个人项目演示链接和源代码。\n\n通过以上步骤，你可以充分利用 `Data-Science-Interview-Resources` 构建完整的知识体系，为数据科学岗位面试做好充分准备。","拥有三年数据分析经验的李明正备战一家头部互联网大厂的数据科学岗位面试，面对庞杂的知识体系感到无从下手。\n\n### 没有 Data-Science-Interview-Resources 时\n- **资源筛选耗时巨大**：在海量博客、论坛和视频中盲目搜索，难以辨别哪些是面试高频考点，浪费了大量复习时间。\n- **知识盲区难以自查**：缺乏系统性的技能清单，容易忽略商业理解、文献综述或特定的数学基础（如线性代数），导致面试中被问住。\n- **实战经验缺失**：只关注算法理论，不清楚大厂实际考察的 SQL 复杂查询、RESTful API 构建或代码规范等工程化要求。\n- **备考方向分散**：不同来源的建议相互冲突，无法形成统一的复习逻辑，越准备越焦虑，难以建立自信。\n\n### 使用 Data-Science-Interview-Resources 后\n- **精准锁定核心考点**：直接依据仓库中经作者亲自验证的技能图谱（如概率统计、机器学习建模），快速聚焦高价值复习内容。\n- **全方位查漏补缺**：对照清单发现自己在“业务理解”和“论文阅读”方面的短板，及时补充了针对高级岗位的关键能力。\n- **对齐工业界标准**：通过推荐的工程知识资源，掌握了清洗代码、API 开发等实战技巧，完美匹配 FAANG 类公司的面试要求。\n- **构建系统化路径**：利用整合好的从简历筛选到最终拿 Offer 的全流程指南，制定了清晰的备考计划，心态从慌乱转为从容。\n\nData-Science-Interview-Resources 将零散的面试情报转化为结构化的行动指南，帮助求职者用最短时间补齐短板，精准击中面试官的核心考察点。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frbhatia46_Data-Science-Interview-Resources_c3bedf24.png","rbhatia46","Rahul Bhatia","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Frbhatia46_8ba85558.jpg","Senior Applied Scientist @servicenow\r\nPreviously @fidelity @rakutentech @CRED-CLUB\r\n\r\nSpeaker @PyConMY 2019\r\nSpeaker @pyconid 2020\r\nGSoC 2019 Mentor @publiclab ","ServiceNow","Hyderabad, India","rbhatia46@gmail.com",null,"https:\u002F\u002Fgithub.com\u002Frbhatia46",3330,758,"2026-04-10T14:20:45","MIT",1,"","未说明",{"notes":89,"python":87,"dependencies":90},"该仓库并非可运行的软件工具或代码库，而是一份数据科学面试准备的学习资源清单（包含文章、视频链接等）。因此，它没有特定的操作系统、GPU、内存、Python 版本或依赖库要求。用户只需具备浏览器即可访问其中列出的外部学习资源。文中提到的 Python、SQL、Spark 等是面试需要掌握的技能，而非运行此仓库的环境需求。",[],[92,16,14],"其他",[94,95,96,97,98,99,100,101],"data-science-interview","data-science","machine-learning-interview","machine-learning","learning-resources","interview-resources","interview-questions","artificial-intelligence","2026-03-27T02:49:30.150509","2026-04-13T23:52:54.332360",[],[]]