[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-khanhnamle1994--cracking-the-data-science-interview":3,"tool-khanhnamle1994--cracking-the-data-science-interview":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",159636,2,"2026-04-17T23:33:34",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",108322,"2026-04-10T11:39:34",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[52,13,15,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":32,"last_commit_at":59,"category_tags":60,"status":17},4721,"markitdown","microsoft\u002Fmarkitdown","MarkItDown 是一款由微软 AutoGen 团队打造的轻量级 Python 工具，专为将各类文件高效转换为 Markdown 格式而设计。它支持 PDF、Word、Excel、PPT、图片（含 OCR）、音频（含语音转录）、HTML 乃至 YouTube 链接等多种格式的解析，能够精准提取文档中的标题、列表、表格和链接等关键结构信息。\n\n在人工智能应用日益普及的今天，大语言模型（LLM）虽擅长处理文本，却难以直接读取复杂的二进制办公文档。MarkItDown 恰好解决了这一痛点，它将非结构化或半结构化的文件转化为模型“原生理解”且 Token 效率极高的 Markdown 格式，成为连接本地文件与 AI 分析 pipeline 的理想桥梁。此外，它还提供了 MCP（模型上下文协议）服务器，可无缝集成到 Claude Desktop 等 LLM 应用中。\n\n这款工具特别适合开发者、数据科学家及 AI 研究人员使用，尤其是那些需要构建文档检索增强生成（RAG）系统、进行批量文本分析或希望让 AI 助手直接“阅读”本地文件的用户。虽然生成的内容也具备一定可读性，但其核心优势在于为机器",93400,"2026-04-06T19:52:38",[52,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":72,"owner_name":73,"owner_avatar_url":74,"owner_bio":75,"owner_company":76,"owner_location":77,"owner_email":78,"owner_twitter":79,"owner_website":80,"owner_url":81,"languages":82,"stars":112,"forks":113,"last_commit_at":114,"license":115,"difficulty_score":116,"env_os":117,"env_gpu":117,"env_ram":117,"env_deps":118,"category_tags":131,"github_topics":133,"view_count":32,"oss_zip_url":115,"oss_zip_packed_at":115,"status":17,"created_at":144,"updated_at":145,"faqs":146,"releases":147},8997,"khanhnamle1994\u002Fcracking-the-data-science-interview","cracking-the-data-science-interview","A Collection of Cheatsheets, Books, Questions, and Portfolio For DS\u002FML Interview Prep","cracking-the-data-science-interview 是一个专为数据科学与机器学习求职者打造的开源备考资源库。面对行业面试中范围广、难度高的挑战，许多求职者往往难以系统性地梳理知识点或缺乏真实的实战演练机会。该项目通过整合速查表、经典电子书、真实面试题库、案例分析以及作品集范例，提供了一站式的解决方案，帮助用户高效填补知识盲区并提升应试能力。\n\n其内容覆盖极为全面，不仅包含 SQL、统计学、数学基础等核心理论速查指南，还深入涵盖了机器学习、深度学习、计算机视觉及自然语言处理等前沿技术概念。此外，资源库中收录了多本权威专业书籍，并整理了来自实际面试场景的高频问题与案例研究，甚至包含了数据新闻作品集的参考，极大地丰富了准备维度。\n\n无论是即将步入职场的数据科学初学者、希望转行进入 AI 领域的开发者，还是寻求进阶的研究人员，都能从中找到针对性的学习资料。凭借结构清晰的知识分类和贴近实战的题目收集，cracking-the-data-science-interview 成为了连接理论学习与职业面试之间的坚实桥梁，助您从容应对各类技术考核。","Here are the sections:\n\n* [Data Science Cheatsheets](#data-science-cheatsheets)\n* [Data Science EBooks](#data-science-ebooks)\n* [Data Science Question Bank](#data-science-question-bank)\n* [Data Science Case Studies](#data-science-case-studies)\n* [Data Science Portfolio](#data-science-portfolio)\n* [Data Journalism Portfolio](#data-journalism-portfolio)\n* [Downloadable Cheatsheets](#downloadable-cheatsheets)\n\n## Data Science Cheatsheets\n\n[This section](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FCheatsheets) contains cheatsheets of basic concepts in data science that will be asked in interviews:\n\n* [SQL](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FCheatsheets#sql)\n* [Statistics and Probability](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FCheatsheets#statistics-and-probability)\n* [Mathematics](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FCheatsheets#mathematics)\n* [Machine Learning Concepts](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FCheatsheets#machine-learning-concepts)\n* [Deep Learning Concepts](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FCheatsheets#deep-learning-concepts)\n* [Supervised Learning](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FCheatsheets#supervised-learning)\n* [Unsupervised Learning](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FCheatsheets#unsupervised-learning)\n* [Computer Vision](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FCheatsheets#computer-vision)\n* [Natural Language Processing](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FCheatsheets#natural-language-processing)\n* [Stanford Materials](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FCheatsheets#stanford-materials)\n\n## Data Science EBooks\n\n[This section](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FEBooks) contains books that I have read about data science and machine learning:\n\n* [Intro To Machine Learning with Python](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FEBooks\u002FIntro-To-ML-with-Python)\n* [Machine Learning In Action](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FEBooks\u002FMachine-Learning-In-Action)\n* [Python Data Science Handbook](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FEBooks\u002FPython-DataScience-Handbook)\n* [Doing Data Science - Straight Talk From The Front Line](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FEBooks\u002FDoing-Data-Science-Straight-Talk-From-The-Front-Line)\n* [Machine Learning For Finance](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FEBooks\u002FMachine-Learning-For-Finance)\n* [Practical Statistics for Data Science](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FEBooks\u002FPractical-Statistics-For-Data-Science)\n* [A\u002FB Testing](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FEBooks\u002FAB-Testing)\n\n## Data Science Question Bank\n\n[This section](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FQuestion-Bank) contains sample questions that were asked in actual data science interviews:\n\n* [Data Interview Qs](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FQuestion-Bank\u002FData-Interview-Qs)\n* [Data Science Prep](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FQuestion-Bank\u002FData-Science-Prep)\n* [Interview Query](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FQuestion-Bank\u002FInterview-Query)\n* [Analytics Vidhya](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FQuestion-Bank\u002FAnalytics-Vidhya.md)\n* [Springboard](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FQuestion-Bank\u002FSpringboard.md)\n* [Elite Data Science](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FQuestion-Bank\u002FElite-Data-Science.md)\n* [Workera](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FQuestion-Bank\u002FWorkera)\n* [150 Essential Data Science Questions and Answers](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FQuestion-Bank\u002F150-Essential-Data-Science-Questions-and-Answers.pdf)\n\n## Data Science Case Studies\n[This section](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FCase-Studies) contains case study questions that concern designing machine learning systems to solve practical problems.\n\n## Data Science Portfolio\n\nThis section contains portfolio of data science projects completed by me for academic, self learning, and hobby purposes.\n\nFor a more visually pleasant experience for browsing the portfolio, check out [jameskle.com\u002Fdata-portfolio](https:\u002F\u002Fjameskle.com\u002Fdata-portfolio)\n\n- ### Recommendation Systems\n\n    - [Transfer Rec](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Ftransfer-rec): My ongoing research work that intersects deep learning and recommendation systems.\n\n    - [Movie Recommendation](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fmovielens): Designed 4 different models that recommend items on the MovieLens dataset.\n\n    _Tools: PyTorch, TensorBoard, Keras, Pandas, NumPy, SciPy, Matplotlib, Seaborn, Scikit-Learn, Surprise, Wordcloud_\n\n- ### Machine Learning\n\n    - [Trip Optimizer](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Ftrip-optimizer): Used XGBoost and evolutionary algorithms to optimize the travel time for taxi vehicles in New York City.\n\n    - [Instacart Market Basket Analysis](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Finstacart-orders): Tackled the Instacart Market Basket Analysis challenge to predict which products will be in a user's next order.\n\n    _Tools: Pandas, NumPy, Matplotlib, XGBoost, Geopy, Scikit-Learn_\n\n- ### Computer Vision\n\n    - [Fashion Recommendation](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Ffashion-recommendation): Built a ResNet-based model that classifies and recommends fashion images in the DeepFashion database based on semantic similarity.\n\n    - [Fashion Classification](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Ffashion-mnist): Developed 4 different Convolutional Neural Networks that classify images in the Fashion MNIST dataset.\n\n    - [Dog Breed Classification](https:\u002F\u002Fmedium.com\u002Fnanonets\u002Fhow-to-easily-build-a-dog-breed-image-classification-model-2fd214419cde): Designed a Convolutional Neural Network that identifies dog breed.\n\n    - [Road Segmentation](https:\u002F\u002Fmedium.com\u002Fnanonets\u002Fhow-to-do-image-segmentation-using-deep-learning-c673cc5862ef): Implemented a Fully-Convolutional Network for semantic segmentation task in the Kitty Road Dataset.\n\n    _Tools: TensorFlow, Keras, Pandas, NumPy, Matplotlib, Scikit-Learn, TensorBoard_\n\n- ### Natural Language Processing\n\n    - [Classifying Tweets with Weights & Biases](https:\u002F\u002Fwww.wandb.com\u002Farticles\u002Fclassifying-tweets-with-wandb): Developed 3 different neural network models that classify tweets on a crowdsourced dataset in Figure Eight.\n\n- ### Data Analysis and Visualization\n\n    - [World Cup 2018 Team Analysis](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fworld-cup-2018): Analysis and visualization of the FIFA 18 dataset to predict the best possible international squad lineups for 10 teams at the 2018 World Cup in Russia.\n\n    - [Spotify Artists Analysis](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fspotify-artists-analysis): Analysis and visualization of musical styles from 50 different artists with a wide range of genres on Spotify.\n\n    _Tools: Pandas, NumPy, Matplotlib, Rspotify, httr, dplyr, tidyr, radarchart, ggplot2_\n\n## Data Journalism Portfolio\n\nThis section contains portfolio of data journalism articles completed by me for freelance clients and self-learning purposes.\n\nFor a more visually pleasant experience for browsing the portfolio, check out [jameskle.com\u002Fdata-journalism](https:\u002F\u002Fjameskle.com\u002Fdata-journalism)\n\n- ### Statistics\n\n    - [The 10 Statistical Techniques Data Scientists Need to Master](https:\u002F\u002Fwww.kdnuggets.com\u002F2017\u002F11\u002F10-statistical-techniques-data-scientists-need-master.html)\n\n    - [Logistic Regression Tutorial](https:\u002F\u002Fwww.datacamp.com\u002Fcommunity\u002Ftutorials\u002Flogistic-regression-R)\n\n    - [Decision Trees Tutorial](https:\u002F\u002Fwww.datacamp.com\u002Fcommunity\u002Ftutorials\u002Fdecision-trees-R)\n\n    - [Support Vector Machines Tutorial](https:\u002F\u002Fwww.datacamp.com\u002Fcommunity\u002Ftutorials\u002Fsupport-vector-machines-r)\n\n    - [A Friendly Introduction to Data-Driven Marketing for Business Leaders](https:\u002F\u002Fwww.topbots.com\u002Fdata-driven-marketing-for-business-leaders\u002F)\n\n- ### Machine Learning\n\n    - [The 10 Algorithms Machine Learning Engineers Need to Know](https:\u002F\u002Fwww.kdnuggets.com\u002F2016\u002F08\u002F10-algorithms-machine-learning-engineers.html)\n\n    - [12 Useful Things to Know About Machine Learning](https:\u002F\u002Fwww.kdnuggets.com\u002F2018\u002F04\u002F12-useful-things-know-about-machine-learning.html)\n\n    - [A Tour of The Top 10 Algorithms for Machine Learning Newbie](https:\u002F\u002Fbuiltin.com\u002Fdata-science\u002Ftour-top-10-algorithms-machine-learning-newbies)\n\n    - [The 10 Data Mining Techniques Data Scientists Need For Their Toolbox](https:\u002F\u002Fbuiltin.com\u002Fdata-science\u002F10-data-mining-techniques-data-scientists-need-their-toolbox)\n\n    - [Clustering and Classification in E-Commerce](https:\u002F\u002Flucidworks.com\u002F2019\u002F01\u002F24\u002Fclustering-classification-supervised-unsupervised-learning-ecommerce\u002F)\n\n    - [The ABCs of Learning to Rank](https:\u002F\u002Flucidworks.com\u002Fpost\u002Fabcs-learning-to-rank\u002F)\n\n    - [6 Ways to Debug a Machine Learning Model](https:\u002F\u002Fwww.wandb.com\u002Farticles\u002Fdebug-ml-model)\n\n    - [8 Machine Learning Career Paths to Pursue Today](https:\u002F\u002Fwww.scaler.com\u002Fblog\u002Fmachine-learning-career\u002F#machine-learning-career-paths-and-specializations)\n\n- ### Deep Learning\n\n    - [The 10 Deep Learning Methods AI Practitioners Need to Apply](https:\u002F\u002Fwww.kdnuggets.com\u002F2017\u002F12\u002F10-deep-learning-methods-ai-practitioners-need-apply.html)\n\n    - [The 8 Neural Network Architectures ML Researchers Need to Learn](https:\u002F\u002Fwww.kdnuggets.com\u002F2018\u002F02\u002F8-neural-network-architectures-machine-learning-researchers-need-learn.html)\n\n    - [The 5 Deep Learning Frameworks Every Serious Machine Learner Should Be Familiar With](https:\u002F\u002Fheartbeat.fritz.ai\u002Fthe-5-deep-learning-frameworks-every-serious-machine-learner-should-be-familiar-with-93f4d469d24c)\n\n    - [The 5 Computer Vision Techniques That Will Change How You See The World](https:\u002F\u002Fheartbeat.fritz.ai\u002Fthe-5-computer-vision-techniques-that-will-change-how-you-see-the-world-1ee19334354b)\n\n    - [Convolutional Neural Networks: The Biologically-Inspired Model](https:\u002F\u002Fwww.codementor.io\u002F@james_aka_yale\u002Fconvolutional-neural-networks-the-biologically-inspired-model-iq6s48zms)\n\n    - [Recurrent Neural Networks: The Powerhouse of Language Modeling](https:\u002F\u002Fbuiltin.com\u002Fdata-science\u002Frecurrent-neural-networks-powerhouse-language-modeling)\n\n    - [The 7 NLP Techniques That Will Change How You Communicate in the Future](https:\u002F\u002Fheartbeat.fritz.ai\u002Fthe-7-nlp-techniques-that-will-change-how-you-communicate-in-the-future-part-i-f0114b2f0497)\n\n    - [The 5 Trends Dominating Computer Vision in 2018](https:\u002F\u002Fheartbeat.fritz.ai\u002Fthe-5-trends-that-dominated-computer-vision-in-2018-de38fbb9bd86)\n\n    - [The 3 Deep Learning Frameworks For End-to-End Speech Recognition That Power Your Devices](https:\u002F\u002Fheartbeat.fritz.ai\u002Fthe-3-deep-learning-frameworks-for-end-to-end-speech-recognition-that-power-your-devices-37b891ddc380)\n\n    - [The 5 Algorithms for Efficient Deep Learning Inference on Small Devices](https:\u002F\u002Fheartbeat.fritz.ai\u002Fthe-5-algorithms-for-efficient-deep-learning-inference-on-small-devices-bcc2d18aa806)\n\n    - [The 4 Research Techniques to Train Deep Neural Network Models More Efficiently](https:\u002F\u002Fheartbeat.fritz.ai\u002Fthe-4-research-techniques-to-train-deep-neural-network-models-more-efficiently-810ea2886205)\n\n    - [The 2 Hardware Architectures for Efficient Training and Inference of Deep Nets](https:\u002F\u002Fheartbeat.fritz.ai\u002Fthe-2-types-of-hardware-architectures-for-efficient-training-and-inference-of-deep-neural-networks-a034850e26dd)\n\n    - [10 Deep Learning Best Practices to Keep in Mind in 2020](https:\u002F\u002Fnanonets.com\u002Fblog\u002F10-best-practices-deep-learning\u002F)\n\n## Downloadable Cheatsheets\n\nThese PDF cheatsheets come from [BecomingHuman.AI](https:\u002F\u002Fbecominghuman.ai\u002Fcheat-sheets-for-ai-neural-networks-machine-learning-deep-learning-big-data-science-pdf-f22dc900d2d7).\n\n### 1 - Neural Network Basics\n\n![Neural Network Basics](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_147235f6509f.png)\n\n### 2 - Neural Network Graphs\n\n![Neural Network Graphs](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_04aa2667bfe0.png)\n\n### 3 - Machine Learning with Emojis\n\n![Machine Learning with Emojis](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_1412218f88fd.png)\n\n### 4 - Scikit-Learn With Python\n\n![Scikit-Learn With Python](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_16691a5eafe1.png)\n\n### 5 - Python Basics\n\n![Python Basics](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_e37b7347aaa7.png)\n\n### 6 - NumPy Basics\n\n![NumPy Basics](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_ae6d6a7592ee.png)\n\n### 7 - Pandas Basics\n\n![Pandas Basics](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_3458c9d13ce1.png)\n\n### 8 - Data Wrangling With Pandas\n\n![Data Wrangling With Pandas Part 1](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_c18cda67cdaa.png)\n\n![Data Wrangling With Pandas Part 2](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_eaa14e4bb8b0.png)\n\n### 9 - SciPy Linear Algebra\n\n![SciPy Linear Algebra](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_5632a1c72f33.png)\n\n### 10 - Matplotlib Basics\n\n![Matplotlib Basics](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_6c3686e780d4.png)\n\n### 11 - Keras\n\n![Keras](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_89edae9eaa18.png)\n\n### 12 - Big-O\n\n![Big-O](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_7eae760e3a78.png)\n","以下是各部分内容：\n\n* [数据科学速查表](#data-science-cheatsheets)\n* [数据科学电子书](#data-science-ebooks)\n* [数据科学题库](#data-science-question-bank)\n* [数据科学案例研究](#data-science-case-studies)\n* [数据科学作品集](#data-science-portfolio)\n* [数据新闻作品集](#data-journalism-portfolio)\n* [可下载速查表](#downloadable-cheatsheets)\n\n## 数据科学速查表\n\n[本部分](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FCheatsheets)包含面试中常考的数据科学基础概念速查表：\n\n* [SQL](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FCheatsheets#sql)\n* [统计与概率](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FCheatsheets#statistics-and-probability)\n* [数学](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FCheatsheets#mathematics)\n* [机器学习概念](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FCheatsheets#machine-learning-concepts)\n* [深度学习概念](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FCheatsheets#deep-learning-concepts)\n* [监督学习](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FCheatsheets#supervised-learning)\n* [无监督学习](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FCheatsheets#unsupervised-learning)\n* [计算机视觉](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FCheatsheets#computer-vision)\n* [自然语言处理](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FCheatsheets#natural-language-processing)\n* [斯坦福相关资料](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FCheatsheets#stanford-materials)\n\n## 数据科学电子书\n\n[本部分](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FEBooks)收录了我阅读过的关于数据科学和机器学习的书籍：\n\n* [Python机器学习入门](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FEBooks\u002FIntro-To-ML-with-Python)\n* [机器学习实战](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FEBooks\u002FMachine-Learning-In-Action)\n* [Python数据科学手册](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FEBooks\u002FPython-DataScience-Handbook)\n* [做数据科学——来自一线的直言不讳](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FEBooks\u002FDoing-Data-Science-Straight-Talk-From-The-Front-Line)\n* [金融领域的机器学习](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FEBooks\u002FMachine-Learning-For-Finance)\n* [数据科学实用统计学](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FEBooks\u002FPractical-Statistics-For-Data-Science)\n* [A\u002FB测试](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FEBooks\u002FAB-Testing)\n\n## 数据科学题库\n\n[本部分](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FQuestion-Bank)收录了实际数据科学面试中出现过的样题：\n\n* [数据面试题](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FQuestion-Bank\u002FData-Interview-Qs)\n* [数据科学备考](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FQuestion-Bank\u002FData-Science-Prep)\n* [Interview Query](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FQuestion-Bank\u002FInterview-Query)\n* [Analytics Vidhya](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FQuestion-Bank\u002FAnalytics-Vidhya.md)\n* [Springboard](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FQuestion-Bank\u002FSpringboard.md)\n* [Elite Data Science](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FQuestion-Bank\u002FElite-Data-Science.md)\n* [Workera](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FQuestion-Bank\u002FWorkera)\n* [150道数据科学必备问答](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FQuestion-Bank\u002F150-Essential-Data-Science-Questions-and-Answers.pdf)\n\n## 数据科学案例研究\n[本部分](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview\u002Ftree\u002Fmaster\u002FCase-Studies)包含有关设计机器学习系统以解决实际问题的案例研究题目。\n\n## 数据科学作品集\n\n本节收录了我为学术研究、自我学习及兴趣爱好而完成的数据科学项目。\n\n如需更直观的浏览体验，请访问 [jameskle.com\u002Fdata-portfolio](https:\u002F\u002Fjameskle.com\u002Fdata-portfolio)。\n\n- ### 推荐系统\n\n    - [Transfer Rec](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Ftransfer-rec)：我正在进行的一项研究工作，融合了深度学习与推荐系统。\n\n    - [电影推荐](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fmovielens)：设计并实现了4种不同的模型，用于在MovieLens数据集上进行物品推荐。\n\n    _工具：PyTorch、TensorBoard、Keras、Pandas、NumPy、SciPy、Matplotlib、Seaborn、Scikit-Learn、Surprise、Wordcloud_\n\n- ### 机器学习\n\n    - [行程优化器](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Ftrip-optimizer)：利用XGBoost和进化算法优化纽约市出租车的行驶时间。\n\n    - [Instacart购物篮分析](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Finstacart-orders)：参与Instacart购物篮分析挑战赛，预测用户下一次订单中可能包含的商品。\n\n    _工具：Pandas、NumPy、Matplotlib、XGBoost、Geopy、Scikit-Learn_\n\n- ### 计算机视觉\n\n    - [时尚推荐](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Ffashion-recommendation)：基于ResNet构建了一个模型，能够根据语义相似性对DeepFashion数据库中的时尚图像进行分类和推荐。\n\n    - [时尚分类](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Ffashion-mnist)：开发了4种不同的卷积神经网络，用于对Fashion MNIST数据集中的图像进行分类。\n\n    - [犬种分类](https:\u002F\u002Fmedium.com\u002Fnanonets\u002Fhow-to-easily-build-a-dog-breed-image-classification-model-2fd214419cde)：设计了一种卷积神经网络，用于识别犬种。\n\n    - [道路分割](https:\u002F\u002Fmedium.com\u002Fnanonets\u002Fhow-to-do-image-segmentation-using-deep-learning-c673cc5862ef)：在Kitty Road数据集上实现了一个全卷积网络，用于语义分割任务。\n\n    _工具：TensorFlow、Keras、Pandas、NumPy、Matplotlib、Scikit-Learn、TensorBoard_\n\n- ### 自然语言处理\n\n    - [使用Weights & Biases分类推文](https:\u002F\u002Fwww.wandb.com\u002Farticles\u002Fclassifying-tweets-with-wandb)：开发了3种不同的神经网络模型，用于对Figure Eight平台上众包收集的推文数据集进行分类。\n\n- ### 数据分析与可视化\n\n    - [2018年世界杯球队分析](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fworld-cup-2018)：对FIFA 18数据集进行分析与可视化，以预测2018年俄罗斯世界杯中10支球队的最佳国际阵容。\n\n    - [Spotify艺人分析](https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fspotify-artists-analysis)：对Spotify平台上50位风格迥异的艺人的音乐风格进行了分析与可视化。\n\n    _工具：Pandas、NumPy、Matplotlib、Rspotify、httr、dplyr、tidyr、radarchart、ggplot2_\n\n## 数据新闻作品集\n\n本部分收录了我为自由职业客户及自学目的所完成的数据新闻文章作品集。\n\n如需更直观的浏览体验，请访问 [jameskle.com\u002Fdata-journalism](https:\u002F\u002Fjameskle.com\u002Fdata-journalism)。\n\n- ### 统计学\n\n    - [数据科学家需要掌握的10种统计技术](https:\u002F\u002Fwww.kdnuggets.com\u002F2017\u002F11\u002F10-statistical-techniques-data-scientists-need-master.html)\n\n    - [逻辑回归教程](https:\u002F\u002Fwww.datacamp.com\u002Fcommunity\u002Ftutorials\u002Flogistic-regression-R)\n\n    - [决策树教程](https:\u002F\u002Fwww.datacamp.com\u002Fcommunity\u002Ftutorials\u002Fdecision-trees-R)\n\n    - [支持向量机教程](https:\u002F\u002Fwww.datacamp.com\u002Fcommunity\u002Ftutorials\u002Fsupport-vector-machines-r)\n\n    - [面向企业领导的数据驱动营销友好指南](https:\u002F\u002Fwww.topbots.com\u002Fdata-driven-marketing-for-business-leaders\u002F)\n\n- ### 机器学习\n\n    - [机器学习工程师需要了解的10大算法](https:\u002F\u002Fwww.kdnuggets.com\u002F2016\u002F08\u002F10-algorithms-machine-learning-engineers.html)\n\n    - [关于机器学习的12个实用知识点](https:\u002F\u002Fwww.kdnuggets.com\u002F2018\u002F04\u002F12-useful-things-know-about-machine-learning.html)\n\n    - [面向机器学习新手的十大算法导览](https:\u002F\u002Fbuiltin.com\u002Fdata-science\u002Ftour-top-10-algorithms-machine-learning-newbies)\n\n    - [数据科学家工具箱中必备的10种数据挖掘技术](https:\u002F\u002Fbuiltin.com\u002Fdata-science\u002F10-data-mining-techniques-data-scientists-need-their-toolbox)\n\n    - [电子商务中的聚类与分类](https:\u002F\u002Flucidworks.com\u002F2019\u002F01\u002F24\u002Fclustering-classification-supervised-unsupervised-learning-ecommerce\u002F)\n\n    - [排序学习ABC](https:\u002F\u002Flucidworks.com\u002Fpost\u002Fabcs-learning-to-rank\u002F)\n\n    - [调试机器学习模型的6种方法](https:\u002F\u002Fwww.wandb.com\u002Farticles\u002Fdebug-ml-model)\n\n    - [当下值得追求的8条机器学习职业发展路径](https:\u002F\u002Fwww.scaler.com\u002Fblog\u002Fmachine-learning-career\u002F#machine-learning-career-paths-and-specializations)\n\n- ### 深度学习\n\n    - [人工智能从业者需要应用的10种深度学习方法](https:\u002F\u002Fwww.kdnuggets.com\u002F2017\u002F12\u002F10-deep-learning-methods-ai-practitioners-need-apply.html)\n\n    - [机器学习研究人员需要掌握的8种神经网络架构](https:\u002F\u002Fwww.kdnuggets.com\u002F2018\u002F02\u002F8-neural-network-architectures-machine-learning-researchers-need-learn.html)\n\n    - [每个认真的机器学习从业者都应熟悉的5大深度学习框架](https:\u002F\u002Fheartbeat.fritz.ai\u002Fthe-5-deep-learning-frameworks-every-serious-machine-learner-should-be-familiar-with-93f4d469d24c)\n\n    - [将改变你看待世界方式的5种计算机视觉技术](https:\u002F\u002Fheartbeat.fritz.ai\u002Fthe-5-computer-vision-techniques-that-will-change-how-you-see-the-world-1ee19334354b)\n\n    - [卷积神经网络：受生物启发的模型](https:\u002F\u002Fwww.codementor.io\u002F@james_aka_yale\u002Fconvolutional-neural-networks-the-biologically-inspired-model-iq6s48zms)\n\n    - [循环神经网络：语言建模的强劲引擎](https:\u002F\u002Fbuiltin.com\u002Fdata-science\u002Frecurrent-neural-networks-powerhouse-language-modeling)\n\n    - [将改变你未来沟通方式的7种自然语言处理技术（上篇）](https:\u002F\u002Fheartbeat.fritz.ai\u002Fthe-7-nlp-techniques-that-will-change-how-you-communicate-in-the-future-part-i-f0114b2f0497)\n\n    - [2018年主导计算机视觉领域的5大趋势](https:\u002F\u002Fheartbeat.fritz.ai\u002Fthe-5-trends-that-dominated-computer-vision-in-2018-de38fbb9bd86)\n\n    - [驱动你设备的3大用于端到端语音识别的深度学习框架](https:\u002F\u002Fheartbeat.fritz.ai\u002Fthe-3-deep-learning-frameworks-for-end-to-end-speech-recognition-that-power-your-devices-37b891ddc380)\n\n    - [在小型设备上实现高效深度学习推理的5种算法](https:\u002F\u002Fheartbeat.fritz.ai\u002Fthe-5-algorithms-for-efficient-deep-learning-inference-on-small-devices-bcc2d18aa806)\n\n    - [更高效训练深度神经网络模型的4种研究技巧](https:\u002F\u002Fheartbeat.fritz.ai\u002Fthe-4-research-techniques-to-train-deep-neural-network-models-more-efficiently-810ea2886205)\n\n    - [用于深度神经网络高效训练和推理的2种硬件架构](https:\u002F\u002Fheartbeat.fritz.ai\u002Fthe-2-types-of-hardware-architectures-for-efficient-training-and-inference-of-deep-neural-networks-a034850e26dd)\n\n    - [2020年需牢记的10条深度学习最佳实践](https:\u002F\u002Fnanonets.com\u002Fblog\u002F10-best-practices-deep-learning\u002F)\n\n## 可下载速查表\n\n这些PDF速查表来自[BecomingHuman.AI](https:\u002F\u002Fbecominghuman.ai\u002Fcheat-sheets-for-ai-neural-networks-machine-learning-deep-learning-big-data-science-pdf-f22dc900d2d7)。\n\n### 1 - 神经网络基础\n\n![神经网络基础](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_147235f6509f.png)\n\n### 2 - 神经网络图谱\n\n![神经网络图谱](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_04aa2667bfe0.png)\n\n### 3 - 用表情符号学习机器学习\n\n![用表情符号学习机器学习](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_1412218f88fd.png)\n\n### 4 - 使用Python的Scikit-Learn\n\n![使用Python的Scikit-Learn](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_16691a5eafe1.png)\n\n### 5 - Python基础\n\n![Python基础](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_e37b7347aaa7.png)\n\n### 6 - NumPy基础\n\n![NumPy基础](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_ae6d6a7592ee.png)\n\n### 7 - Pandas基础\n\n![Pandas基础](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_3458c9d13ce1.png)\n\n### 8 - 使用Pandas进行数据清洗\n\n![使用Pandas进行数据清洗（第一部分）](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_c18cda67cdaa.png)\n\n![使用Pandas进行数据清洗（第二部分）](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_eaa14e4bb8b0.png)\n\n### 9 - SciPy线性代数\n\n![SciPy线性代数](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_5632a1c72f33.png)\n\n### 10 - Matplotlib基础\n\n![Matplotlib基础](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_6c3686e780d4.png)\n\n### 11 - Keras\n\n![Keras](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_89edae9eaa18.png)\n\n### 12 - 大O符号\n\n![大O符号](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_readme_7eae760e3a78.png)","# cracking-the-data-science-interview 快速上手指南\n\n`cracking-the-data-science-interview` 并非一个需要编译安装的可执行软件包，而是一个汇集了数据科学面试核心资源（速查表、电子书、题库、案例研究及项目作品集）的开源知识库。本指南将指导你如何获取并高效利用这些资源。\n\n## 环境准备\n\n由于本项目主要为文档和代码示例集合，对环境要求极低：\n\n*   **操作系统**：Windows, macOS, 或 Linux 均可。\n*   **前置依赖**：\n    *   **Git**：用于克隆仓库（推荐）。\n    *   **浏览器**：用于在线阅读或查看本地 Markdown\u002FHTML 文件。\n    *   **Python (可选)**：如果你打算运行 `Data Science Portfolio` 章节中的具体项目代码，需安装 Python 3.7+ 及相关库（如 `pandas`, `scikit-learn`, `tensorflow`, `pytorch` 等，具体依赖请参考各子项目的 `requirements.txt`）。\n\n## 安装步骤\n\n推荐使用 Git 克隆整个仓库到本地，以便离线查阅和运行示例代码。\n\n### 1. 克隆仓库\n\n打开终端（Terminal 或 CMD），执行以下命令：\n\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994\u002Fcracking-the-data-science-interview.git\n```\n\n> **国内加速建议**：\n> 如果直接克隆速度较慢，可以使用 Gitee 镜像（如果有）或通过代理加速。若无特定镜像，可尝试使用国内通用的 Git 加速服务，或在命令中替换为国内托管平台的同步地址。\n>\n> 替代方案（下载 ZIP）：\n> 访问项目 GitHub 页面，点击 \"Code\" -> \"Download ZIP\"，解压后即可使用。\n\n### 2. 进入目录\n\n```bash\ncd cracking-the-data-science-interview\n```\n\n### 3. 安装项目特定依赖（仅针对练习代码）\n\n如果你计划运行 `Case-Studies` 或 `Data Science Portfolio` 中的具体项目，请进入对应子目录安装依赖。例如：\n\n```bash\n# 以 Movie Recommendation 项目为例\ncd Data-Science-Portfolio\u002FRecommendation-Systems\u002Fmovielens\npip install -r requirements.txt\n```\n*(注：并非所有子目录都有 requirements.txt，请根据具体项目说明操作)*\n\n## 基本使用\n\n本项目主要通过浏览目录结构来获取面试准备材料。以下是核心资源的使用路径：\n\n### 1. 查阅核心概念速查表 (Cheatsheets)\n适合快速复习基础理论。\n*   **路径**：`\u002FCheatsheets`\n*   **内容**：涵盖 SQL、统计学、数学、机器学习、深度学习、NLP 等核心概念的简明笔记。\n*   **使用方式**：直接在 GitHub 网页端点击对应 `.md` 文件阅读，或在本地用 Markdown 编辑器打开。\n\n### 2. 刷题与模拟面试 (Question Bank)\n适合实战演练。\n*   **路径**：`\u002FQuestion-Bank`\n*   **内容**：包含来自真实面试的题目，如 `Data-Interview-Qs` 和 `150-Essential-Data-Science-Questions-and-Answers.pdf`。\n*   **使用方式**：\n    ```bash\n    # 在终端直接预览文本格式的题库\n    cat Question-Bank\u002FData-Interview-Qs\u002FREADME.md\n    ```\n\n### 3. 学习经典案例 (Case Studies)\n适合提升系统设计能力。\n*   **路径**：`\u002FCase-Studies`\n*   **内容**：关于设计机器学习系统解决实际问题的案例分析。\n\n### 4. 运行实战项目 (Portfolio)\n适合丰富简历和深入理解代码。\n*   **路径**：`\u002FData-Science-Portfolio`\n*   **内容**：包含推荐系统、计算机视觉、NLP 等完整项目代码。\n*   **示例：运行一个简单的数据分析脚本**\n    假设你要研究 `Trip Optimizer` 项目：\n    ```bash\n    cd Data-Science-Portfolio\u002FMachine-Learning\u002Ftrip-optimizer\n    # 确保已安装 pandas, xgboost 等库\n    python main.py \n    ```\n    *(具体入口文件名请以该子目录实际文件为准)*\n\n### 5. 阅读技术文章 (Data Journalism)\n*   **路径**：`\u002FData-Journalism-Portfolio`\n*   **内容**：作者撰写的关于统计算法、机器学习趋势等技术博客文章汇总。\n\n---\n**提示**：对于大多数用户，直接将此仓库作为**在线索引**，配合浏览器阅读各个章节的 Markdown 文件是最高效的上手方式。","刚毕业的数据科学专业学生小李正在备战多家互联网大厂的技术面试，面对庞杂的知识体系感到无从下手。\n\n### 没有 cracking-the-data-science-interview 时\n- **资料搜集碎片化**：需要在 GitHub、知乎、博客园等多个平台零散搜索 SQL 速查表或机器学习概念，耗费大量时间筛选低质内容。\n- **复习重点模糊**：不清楚面试官常考的具体题型，只能盲目刷题，导致对统计学原理和深度学习核心考点的掌握浮于表面。\n- **缺乏实战案例**：手头只有理论书本知识，缺少真实的数据新闻或商业案例分析，面试时被问到“如何解决具体业务问题”时无法展开。\n- **作品集构建困难**：不知道如何整理过往项目来展示能力，简历中的项目描述单薄，难以在初筛中脱颖而出。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkhanhnamle1994_cracking-the-data-science-interview_147235f6.png","khanhnamle1994","James Le","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fkhanhnamle1994_e54470bf.jpg","Data Journalist 📝  -> Data Scientist 📊 -> Machine Learning Researcher 🔍 -> Developer Advocate 🤝","Twelve Labs","San Francisco, CA","jl1165@rit.edu","le_james94","https:\u002F\u002Fjameskle.com\u002F","https:\u002F\u002Fgithub.com\u002Fkhanhnamle1994",[83,87,91,95,99,103,106,109],{"name":84,"color":85,"percentage":86},"Jupyter Notebook","#DA5B0B",98.3,{"name":88,"color":89,"percentage":90},"HTML","#e34c26",0.9,{"name":92,"color":93,"percentage":94},"Python","#3572A5",0.5,{"name":96,"color":97,"percentage":98},"R","#198CE7",0.2,{"name":100,"color":101,"percentage":102},"CSS","#663399",0,{"name":104,"color":105,"percentage":102},"Less","#1d365d",{"name":107,"color":108,"percentage":102},"Makefile","#427819",{"name":110,"color":111,"percentage":102},"Cython","#fedf5b",4605,1208,"2026-04-17T17:59:13",null,1,"未说明",{"notes":119,"python":117,"dependencies":120},"该项目主要是一个包含数据科学面试指南、速查表、电子书、题库和案例研究的资源仓库，本身不是一个需要特定环境运行的单一软件工具。其中列出的个人项目组合（Portfolio）涉及多种技术栈（包括 Python 和 R），各自依赖不同的库（如 PyTorch, TensorFlow, Keras, XGBoost 等）。具体的运行环境需求需参考各个子项目的独立 README 文件。",[121,122,123,124,125,126,127,128,129,130],"PyTorch","TensorFlow","Keras","Pandas","NumPy","Scikit-Learn","Matplotlib","Seaborn","XGBoost","R (dplyr, ggplot2)",[14,132,16],"其他",[134,135,136,137,138,139,140,141,142,143],"data-science","machine-learning","deep-learning","data-portfolio","downloadable-cheatsheets","statistics","python","data-journalism","concepts","data-wrangling","2026-03-27T02:49:30.150509","2026-04-18T17:03:35.549294",[],[]]