[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-dssg--hitchhikers-guide":3,"tool-dssg--hitchhikers-guide":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",156804,2,"2026-04-15T11:34:33",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",108322,"2026-04-10T11:39:34",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[52,13,15,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":32,"last_commit_at":59,"category_tags":60,"status":17},4721,"markitdown","microsoft\u002Fmarkitdown","MarkItDown 是一款由微软 AutoGen 团队打造的轻量级 Python 工具，专为将各类文件高效转换为 Markdown 格式而设计。它支持 PDF、Word、Excel、PPT、图片（含 OCR）、音频（含语音转录）、HTML 乃至 YouTube 链接等多种格式的解析，能够精准提取文档中的标题、列表、表格和链接等关键结构信息。\n\n在人工智能应用日益普及的今天，大语言模型（LLM）虽擅长处理文本，却难以直接读取复杂的二进制办公文档。MarkItDown 恰好解决了这一痛点，它将非结构化或半结构化的文件转化为模型“原生理解”且 Token 效率极高的 Markdown 格式，成为连接本地文件与 AI 分析 pipeline 的理想桥梁。此外，它还提供了 MCP（模型上下文协议）服务器，可无缝集成到 Claude Desktop 等 LLM 应用中。\n\n这款工具特别适合开发者、数据科学家及 AI 研究人员使用，尤其是那些需要构建文档检索增强生成（RAG）系统、进行批量文本分析或希望让 AI 助手直接“阅读”本地文件的用户。虽然生成的内容也具备一定可读性，但其核心优势在于为机器",93400,"2026-04-06T19:52:38",[52,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":72,"owner_name":73,"owner_avatar_url":74,"owner_bio":75,"owner_company":76,"owner_location":76,"owner_email":77,"owner_twitter":78,"owner_website":79,"owner_url":80,"languages":81,"stars":113,"forks":114,"last_commit_at":115,"license":76,"difficulty_score":32,"env_os":116,"env_gpu":116,"env_ram":116,"env_deps":117,"category_tags":127,"github_topics":129,"view_count":32,"oss_zip_url":76,"oss_zip_packed_at":76,"status":17,"created_at":134,"updated_at":135,"faqs":136,"releases":157},7746,"dssg\u002Fhitchhikers-guide","hitchhikers-guide","The Hitchhiker's Guide to Data Science for Social Good","hitchhikers-guide 是一份面向“数据科学促进社会公益”领域的开源学习指南与课程资源库。它源自芝加哥大学发起的 DSSG 奖学金项目，旨在系统性地培养能够利用机器学习、人工智能和数据科学技术解决教育、医疗、交通及刑事司法等社会问题的专业人才。\n\n这份指南主要解决了传统数据科学教育中往往缺乏社会视角、伦理考量及跨学科协作训练的痛点。它不仅涵盖编程与计算机科学基础，更强调将数据技术与社会科学深度融合，引导学习者深入探讨隐私保护、数据保密性以及技术应用背后的伦理影响，从而塑造兼具技术实力与人文关怀的“社会公益数据科学家”。\n\nhitchhikers-guide 特别适合准备参与 DSSG 项目的学员、希望在家自学相关技能的研究人员与开发者，以及有意在本地发起类似公益数据项目的组织者使用。其独特亮点在于提供了一套经过实战验证的完整课程体系，包括教程、研讨会材料及伦理讨论组内容，既适合个人按图索骥进行自我提升，也支持社区贡献与协作，帮助全球更多有志之士掌握用数据推动社会向善的能力。","# Welcome to the Hitchhiker's Guide to Data Science for Social Good.\n\n## What is the Data Science for Social Good Fellowship?\n\nThe [Data Science for Social Good Fellowship (DSSG)](http:\u002F\u002Fdssgfellowship.org) is a hands-on and project-based summer program that launched in 2013 at the University of Chicago and has now expanded to multiple locations globally and currently coordinated by the [Data Science for Social Good Foundation](http:\u002F\u002Fwww.datascienceforsocialgood.org) and [Carnegie Mellon University](http:\u002F\u002Fwww.datasciencepublicpolicy.org). It brings a group of fellows, typically graduate students (or senior undergraduate students in some cases), from across the world to work on machine learning, artificial intelligence, and data science projects that have a social impact in partnership with social good organizations. From a pool of typically around 1000 applicants, 20-40 fellows are selected from diverse computational and quantitative disciplines including computer science, statistics, math, engineering, psychology, sociology, economics, and public policy.\n\nThe fellows work in small, cross-disciplinary teams on social good projects spanning education, health, energy, transportation, criminal justice, social services, economic development and international development in collaboration with global government agencies and non-profits. This work is done under close and hands-on mentorship from full-time, dedicated, senior data science mentors as well as dedicated project managers, with industry and\u002For government experience. The result is highly trained fellows, improved data science capacity of the social good organization, and a high quality data science project that is ready for field trial and implementation at the end of the program.\n\nIn addition to hands-on project-based training, the summer program also consists of workshops, tutorials, and ethics discussion groups based on our data science for social good curriculum designed to train the fellows in doing practical data science and artificial intelligence for social impact.\n\n## Who is this guide for?\n\nThe primary audience for this guide is the set of fellows coming to DSSG but we want everything we create to be open and accessible to larger world. We hope this is useful to people beyond the summer fellows coming to DSSG.\n\n**If you are applying to the program or have been accepted as a fellow,** [check out the manual](dssg-manual\u002F) to see how you can prepare before arriving, what orientation and training will cover, and what to expect from the summer.\n\n**If you are interested in learning at home,** check out the [tutorials and teach-outs](curriculum\u002F) developed by our staff and fellows throughout the summer, and to suggest or contribute additional resources.\n\n*Another one of our goals is to encourage collaborations. Anyone interested in doing this type of work, or starting a DSSG program, to build on what we've learned by **using and contributing to** these resources.\n\n## What is in this guide?\n\nOur number one priority at [DSSG](http:\u002F\u002Fdssgfellowship.org) is to **train fellows to do responsible data science\u002FML\u002FAI for social good work**. This curriculum includes many things you'd find in a data science course or bootcamp, but with an emphasis on solving problems with social impact, integrating data science with the social sciences, understanding and discussing ethical implications of the work, as well as privacy, and confidentiality issues.\n\nWe have spent many (sort of) early mornings waxing existential over Dunkin' Donuts while trying to define what makes a \"data scientist for social good,\" that enigmatic breed combining one part data scientist, one part helper, one part educator, and one part bleeding heart idealist. We've come to a rough working definition in the form of the skills and knowledge one would need, which we categorize as follows:\n- **Programming,** because you'll need to tell your computer what to do, usually by writing code.\n- **Computer science,** because you'll need to understand how your data is - and should be - structured, as well as the algorithms you use to analyze it.\n- **Math and stats,** because [everything else in life is just applied math](https:\u002F\u002Fxkcd.com\u002F435\u002F), and numerical results are meaningless without some measure of uncertainty.\n- **Machine learning,** because you'll want to build predictive or descriptive models that can learn, evolve, and improve over time.\n- **Social science,** because you'll need to know how to design experiments to validate your models in the field, and to understand when correlation can plausibly suggest causation, and sometimes even do causal inference.\n- **Problem and Project Scoping,** because you'll need to be able to go from a vague and fuzzy project description to a problem you can solve, understand the goals of the project, the interventions you are informing, the data you have and need, and the analysis that needs to be done.\n- **Project management,** to make progress as a team, to work effectively with your project partner, and work with a team to make that useful solution actually happen.\n- **Privacy and security,** because data is people and needs to be kept secure and confidential.\n- **Ethics, fairness, bias, and transparency,** because your work has the potential to be misused or have a negative impact on people's lives, so you have to consider the biases in your data and analyses, the ethical and fairness implications, and how to make your work interpretable and transparent to the users and to the people impacted by it.\n- **Communications,** because you'll need to be able to tell the story of why what you're doing matters and the methods you're using to a broad audience.\n- **Social issues,** because you're doing this work to help people, and you don't live or work in a vacuum, so you need to understand the context and history surrounding the people, places and issues you want to impact.\n\nAll material is licensed under CC-BY 4.0\n![License: CC BY 4.0](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FLicense-CC%20BY%204.0-lightgrey.svg)\n\n# Table of Contents\nThe links below will help you find things quickly.\n\n## [DSSG Manual](sources\u002Fdssg-manual\u002FREADME.md)\n\n### [Summer Overview](sources\u002Fdssg-manual\u002Fsummer-overview\u002FREADME.md)\nThis sections covers general information on projects, working with partners, presentations, orientation information, and the following schedules:\n\n- [High level summer plan](sources\u002Fdssg-manual\u002Fsummer-overview\u002FHigh%20Level%20Plan%20for%20the%20Summer.pdf): details what the goals are for each week of the program\n- Sample Orientation schedules [2016](sources\u002Fdssg-manual\u002Fsummer-overview\u002FDSSG2016OrientationSchedule.pdf) and [2022](sources\u002Fdssg-manual\u002Fsummer-overview\u002FWeek%201%20Orientation%20Schedule%202022.pdf): sample detailed schedules for the first two weeks of the program\n\n### [Conduct, Culture, and Communications](sources\u002Fdssg-manual\u002Fconduct-culture-and-communications\u002FREADME.md)\nThis section details the DSSG anti-harassment policy, goals of the fellowship, what we hope fellows get out of the experience, the expectations of the fellows, and the DSSG environment. A slideshow version of this can also be found [here](dssg-manual\u002Fconduct-culture-and-communications\u002Fconduct-culture-and-communications.pdf).\n\n## [Curriculum](sources\u002Fcurriculum\u002FREADME.md)\n\nThis section details the various topics we will be covering throughout the summer. This includes:\n\n- [Getting Started](sources\u002Fcurriculum\u002F0_before_you_start\u002F)\n  - [Prerequisites](sources\u002Fcurriculum\u002F0_before_you_start\u002Fprerequisites\u002F)\n  - [Pipelines and Project Workflow](sources\u002Fcurriculum\u002F0_before_you_start\u002Fpipelines-and-project-workflow\u002F)\n  - [Software Setup](sources\u002Fcurriculum\u002Fsetup\u002Fsoftware-setup)\n- [Getting and Keeping Data](sources\u002Fcurriculum\u002F1_getting_and_keeping_data\u002F)\n  - [Web Scraping](sources\u002Fcurriculum\u002F1_getting_and_keeping_data\u002Fbasic-web-scraping\u002F)\n  - [Command Line Tools](sources\u002Fcurriculum\u002F1_getting_and_keeping_data\u002Fcommand-line-tools\u002F)\n  - [csv-to-db](sources\u002Fcurriculum\u002F1_getting_and_keeping_data\u002Fcsv-to-db\u002F)\n  - [Data Security](sources\u002Fcurriculum\u002F1_getting_and_keeping_data\u002Fdata-security-primer\u002F)\n  - [Databases](sources\u002Fcurriculum\u002F1_getting_and_keeping_data\u002Fdatabases\u002F)\n  - [Reproducible ETL](sources\u002Fcurriculum\u002F1_getting_and_keeping_data\u002Freproducible_ETL\u002F)\n- [Data Exploration and Analysis](sources\u002Fcurriculum\u002F2_data_exploration_and_analysis\u002F)\n  - [Advanced SQL](sources\u002Fcurriculum\u002F2_data_exploration_and_analysis\u002Fadvanced_sql\u002F)\n  - [Intro to Data Analysis in Python](sources\u002Fcurriculum\u002F2_data_exploration_and_analysis\u002Fdata-exploration-in-python\u002F)\n  - [GIS analysis](sources\u002Fcurriculum\u002F2_data_exploration_and_analysis\u002Fgis_analysis\u002F)\n  - Git and GitHub\n    - [Git and Github](sources\u002Fcurriculum\u002F2_data_exploration_and_analysis\u002Fgit-and-github\u002F)\n    - [Intro to Git and Python](sources\u002Fcurriculum\u002F2_data_exploration_and_analysis\u002Fintro-to-git-and-python\u002F)\n  - [Network Analysis](sources\u002Fcurriculum\u002F2_data_exploration_and_analysis\u002Fnetwork-analysis\u002F)\n  - [Record Linkage](sources\u002Fcurriculum\u002F2_data_exploration_and_analysis\u002Frecord-linkage\u002F)\n  - [Text Analysis](sources\u002Fcurriculum\u002F2_data_exploration_and_analysis\u002Ftext-analysis\u002F)\n- [Modeling and Machine Learning](sources\u002Fcurriculum\u002F3_modeling_and_machine_learning\u002F)\n  - [Causal Inference](sources\u002Fcurriculum\u002F3_modeling_and_machine_learning\u002Fcausal-inference\u002F)\n  - [Machine Learning](sources\u002Fcurriculum\u002F3_modeling_and_machine_learning\u002Fmachine-learning\u002F)\n    - [Machine Learning Overview](sources\u002Fcurriculum\u002F3_modeling_and_machine_learning\u002Fmachine-learning\u002Fmachine-learning-overview.pdf)\n    - [Ethics in Machine Learning for Public Policy](sources\u002Fcurriculum\u002F3_modeling_and_machine_learning\u002Fmachine-learning\u002Fethics-ML.pdf)\n  - [Operations Research](sources\u002Fcurriculum\u002F3_modeling_and_machine_learning\u002Foperations-research\u002F)\n  - [Quantitative Social Science](sources\u002Fcurriculum\u002F3_modeling_and_machine_learning\u002Fquantitative-social-science\u002F)\n- [Programming Best Practices](sources\u002Fcurriculum\u002F4_programming_best_practices\u002F)\n  - [Writing Legible, Good Code](sources\u002Fcurriculum\u002F4_programming_best_practices\u002Flegible-good-code\u002F)\n  - [Living in Command Land](sources\u002Fcurriculum\u002F4_programming_best_practices\u002Fliving-in-the-terminal\u002F)\n  - [Dotfiles](sources\u002Fcurriculum\u002F4_programming_best_practices\u002Fpimp-my-dotfiles\u002F)\n  - [Making Project Reproducible](sources\u002Fcurriculum\u002F4_programming_best_practices\u002Freproducible-software\u002F)\n  - [Testing](sources\u002Fcurriculum\u002F4_programming_best_practices\u002Ftest-test-test\u002F)\n- [Presentations and Communications](sources\u002Fcurriculum\u002Fcommunication\u002F)\n  - [Good Presentations](sources\u002Fcurriculum\u002Fcommunication\u002Fpresentation-on-presentations.pdf)\n  - [Usability and User Interface Design](sources\u002Fcurriculum\u002Fcommunications\u002Fuser-interface.md)\n- [Tutorial Templates](sources\u002Fcurriculum\u002Ftutorial-template\u002F) (for making your own tutorials!)\n\n## [Wiki](https:\u002F\u002Fgithub.com\u002Fdssg\u002Fwiki\u002Fwiki)\n\nIn [the wiki](https:\u002F\u002Fgithub.com\u002Fdssg\u002Fwiki\u002Fwiki), you will find a bunch of helpful information and instructions that people have found helpful along the way. It covers topics like:\n\n- Accessing S3 from the command line\n- Creating an alias to make Python3 your default (rather than python2)\n- Installing RStudio on your EC2\n- Killing your query\n- Creating a custom jupyter setup\n- Mounting box from ubuntu\n- Pretty Print psql and less output\n- Remotely editing text files in your favorite text editor\n- SQL Server to Postgres\n- Using rpy2\n- VNC Viewer\n\n# Contributing\n\nThis guide is compiled through `mkdocs` and served with github pages. When updating them, you can serve them locally to test your changes via (from the top level of this repo):\n```\nmkdocs serve -f \"$(pwd)\u002Fmkdocs.yml\"\n```\n\nOnce you're ready to publish them, you can do so with:\n```\nmkdocs gh-deploy -f \"$(pwd)\u002Fmkdocs.yml\"\n```\n\n(Note that a bug in the version of `mkdocs` we currently use requires specifying the full path to the configuration file, hence the `$(pwd)` in the command -- we should be able to remove this in the future if we update the dependency)\n","# 欢迎来到社会公益数据科学指南。\n\n## 什么是社会公益数据科学 fellowship？\n\n[社会公益数据科学 fellowship（DSSG）](http:\u002F\u002Fdssgfellowship.org) 是一项实践导向、以项目为基础的暑期项目，于2013年在芝加哥大学启动，现已扩展至全球多个地点，目前由 [社会公益数据科学基金会](http:\u002F\u002Fwww.datascienceforsocialgood.org) 和 [卡内基梅隆大学](http:\u002F\u002Fwww.datasciencepublicpolicy.org) 协调管理。该项目汇聚来自世界各地的学员——通常是研究生（有时也包括高年级本科生），与社会公益组织合作，开展具有社会影响力的机器学习、人工智能和数据科学项目。每年约有1000名申请者，最终选拔20至40名学员，他们来自计算机科学、统计学、数学、工程学、心理学、社会学、经济学和公共政策等多元的计算和定量学科背景。\n\n学员们以跨学科的小团队形式，与全球政府机构和非营利组织合作，围绕教育、健康、能源、交通、刑事司法、社会服务、经济发展及国际发展等领域开展社会公益项目。在整个项目期间，学员们在全职、经验丰富的资深数据科学导师以及具备行业或政府背景的专职项目经理的密切指导下工作。项目结束时，学员们将获得高度专业化的训练，社会公益组织的数据科学能力也将得到提升，并产出一个高质量、可直接用于实地试验和实施的数据科学项目。\n\n除了实践性的项目培训外，该暑期项目还包括基于我们社会公益数据科学课程体系的工作坊、教程和伦理讨论小组，旨在培养学员运用数据科学和人工智能解决社会问题的能力。\n\n## 本指南面向哪些人群？\n\n本指南的主要受众是即将参加 DSSG 的学员，但我们希望所有内容都能公开且易于获取，惠及更广泛的人群。我们期待这份指南不仅对参与 DSSG 暑期项目的学员有所帮助，也能为更多人提供参考。\n\n**如果您正在申请该项目或已被录取为学员，** 请查阅 [手册](dssg-manual\u002F) ，了解如何在抵达前做好准备、迎新培训的内容以及暑期期间的预期安排。\n\n**如果您有兴趣在家自学，** 可以浏览我们工作人员和学员在暑期期间开发的 [教程和教学资源](curriculum\u002F) ，并欢迎提出建议或贡献更多资源。\n\n*我们的另一个目标是促进协作。任何对这类工作感兴趣，或希望借鉴我们的经验创办 DSSG 项目的个人或组织，都可通过 **使用并贡献这些资源** 来实现这一目标。\n\n## 本指南包含哪些内容？\n\n在 [DSSG](http:\u002F\u002Fdssgfellowship.org) ，我们的首要任务是 **培养学员开展负责任的社会公益数据科学\u002F机器学习\u002F人工智能工作** 。本课程体系涵盖了数据科学课程或训练营中的常见内容，但特别强调以社会影响力为导向的问题解决、数据科学与社会科学的融合、对工作伦理影响的深入理解，以及隐私和保密性问题。\n\n为了定义“社会公益数据科学家”这一既神秘又复杂的角色——集数据科学家、助人者、教育者和理想主义者于一身——我们曾在许多个清晨一边喝着唐恩都乐咖啡一边进行哲学式的探讨。最终，我们形成了一份粗略的工作定义，将其归纳为以下技能和知识：\n- **编程，** 因为你需要通过编写代码来告诉计算机该做什么。\n- **计算机科学，** 因为你需要理解数据的结构及其最佳存储方式，以及用于分析数据的算法。\n- **数学和统计学，** 因为 [生活中的一切本质上都是应用数学](https:\u002F\u002Fxkcd.com\u002F435\u002F) ，而没有不确定性度量的数值结果毫无意义。\n- **机器学习，** 因为你需要构建能够不断学习、演化和改进的预测或描述性模型。\n- **社会科学，** 因为你需要掌握如何设计实验来验证你的模型在实际场景中的有效性，理解相关性何时可能暗示因果关系，甚至进行因果推断。\n- **问题与项目范围界定，** 因为你需要将模糊不清的项目描述转化为可解决的实际问题，明确项目目标、你所支持的干预措施、现有及所需的数据，以及需要完成的分析工作。\n- **项目管理，** 以便团队高效推进工作，与合作伙伴有效协作，并共同实现切实可行的解决方案。\n- **隐私与安全，** 因为数据即人，必须得到妥善保护和保密。\n- **伦理、公平性、偏见与透明度，** 因为你的工作有可能被误用或对人们的生活产生负面影响，因此你需要考虑数据和分析中的潜在偏见、伦理和公平性影响，以及如何使你的工作对用户和受其影响的人群具有可解释性和透明度。\n- **沟通能力，** 因为你需要向广大受众清晰地阐述你所做工作的意义及其采用的方法。\n- **社会议题，** 因为你从事这项工作是为了帮助他人，而你并非孤立地工作，所以必须了解你所要影响的人群、地点和议题所处的背景与历史。\n\n所有材料均采用 CC-BY 4.0 许可协议授权。\n![许可：CC BY 4.0](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FLicense-CC%20BY%204.0-lightgrey.svg)\n\n# 目录\n以下链接可以帮助您快速找到所需内容。\n\n## [DSSG 手册](sources\u002Fdssg-manual\u002FREADME.md)\n\n### [暑期概览](sources\u002Fdssg-manual\u002Fsummer-overview\u002FREADME.md)\n本部分涵盖项目概况、与合作伙伴的协作、成果展示、迎新信息，以及以下时间表：\n\n- [暑期总体计划](sources\u002Fdssg-manual\u002Fsummer-overview\u002FHigh%20Level%20Plan%20for%20the%20Summer.pdf)：详细说明了项目每周的目标。\n- 迎新日程示例 [2016 年](sources\u002Fdssg-manual\u002Fsummer-overview\u002FDSSG2016OrientationSchedule.pdf) 和 [2022 年](sources\u002Fdssg-manual\u002Fsummer-overview\u002FWeek%201%20Orientation%20Schedule%202022.pdf)：展示了项目前两周的详细日程安排。\n\n### [行为规范、文化与沟通](sources\u002Fdssg-manual\u002Fconduct-culture-and-communications\u002FREADME.md)\n本节详细介绍了 DSSG 的反骚扰政策、项目的目标、我们期望学员从这段经历中获得什么、对学员的期望，以及 DSSG 的工作环境。该内容的幻灯片版本也可在此处找到：[这里](dssg-manual\u002Fconduct-culture-and-communications\u002Fconduct-culture-and-communications.pdf)。\n\n## [课程大纲](sources\u002Fcurriculum\u002FREADME.md)\n\n本节详细列出了我们在整个暑期将要涵盖的各种主题。其中包括：\n\n- [入门](sources\u002Fcurriculum\u002F0_before_you_start\u002F)\n  - [先决条件](sources\u002Fcurriculum\u002F0_before_you_start\u002Fprerequisites\u002F)\n  - [数据流水线与项目流程](sources\u002Fcurriculum\u002F0_before_you_start\u002Fpipelines-and-project-workflow\u002F)\n  - [软件环境搭建](sources\u002Fcurriculum\u002Fsetup\u002Fsoftware-setup)\n- [数据获取与管理](sources\u002Fcurriculum\u002F1_getting_and_keeping_data\u002F)\n  - [网页爬取](sources\u002Fcurriculum\u002F1_getting_and_keeping_data\u002Fbasic-web-scraping\u002F)\n  - [命令行工具](sources\u002Fcurriculum\u002F1_getting_and_keeping_data\u002Fcommand-line-tools\u002F)\n  - [CSV转数据库](sources\u002Fcurriculum\u002F1_getting_and_keeping_data\u002Fcsv-to-db\u002F)\n  - [数据安全基础](sources\u002Fcurriculum\u002F1_getting_and_keeping_data\u002Fdata-security-primer\u002F)\n  - [数据库](sources\u002Fcurriculum\u002F1_getting_and_keeping_data\u002Fdatabases\u002F)\n  - [可复现的 ETL 流程](sources\u002Fcurriculum\u002F1_getting_and_keeping_data\u002Freproducible_ETL\u002F)\n- [数据探索与分析](sources\u002Fcurriculum\u002F2_data_exploration_and_analysis\u002F)\n  - [高级 SQL](sources\u002Fcurriculum\u002F2_data_exploration_and_analysis\u002Fadvanced_sql\u002F)\n  - [Python 数据分析入门](sources\u002Fcurriculum\u002F2_data_exploration_and_analysis\u002Fdata-exploration-in-python\u002F)\n  - [GIS 分析](sources\u002Fcurriculum\u002F2_data_exploration_and_analysis\u002Fgis_analysis\u002F)\n  - Git 和 GitHub\n    - [Git 和 GitHub](sources\u002Fcurriculum\u002F2_data_exploration_and_analysis\u002Fgit-and-github\u002F)\n    - [Git 与 Python 入门](sources\u002Fcurriculum\u002F2_data_exploration_and_analysis\u002Fintro-to-git-and-python\u002F)\n  - [网络分析](sources\u002Fcurriculum\u002F2_data_exploration_and_analysis\u002Fnetwork-analysis\u002F)\n  - [记录链接](sources\u002Fcurriculum\u002F2_data_exploration_and_analysis\u002Frecord-linkage\u002F)\n  - [文本分析](sources\u002Fcurriculum\u002F2_data_exploration_and_analysis\u002Ftext-analysis\u002F)\n- [建模与机器学习](sources\u002Fcurriculum\u002F3_modeling_and_machine_learning\u002F)\n  - [因果推断](sources\u002Fcurriculum\u002F3_modeling_and_machine_learning\u002Fcausal-inference\u002F)\n  - [机器学习](sources\u002Fcurriculum\u002F3_modeling_and_machine_learning\u002Fmachine-learning\u002F)\n    - [机器学习概览](sources\u002Fcurriculum\u002F3_modeling_and_machine_learning\u002Fmachine-learning\u002Fmachine-learning-overview.pdf)\n    - [公共政策中的机器学习伦理](sources\u002Fcurriculum\u002F3_modeling_and_machine_learning\u002Fmachine-learning\u002Fethics-ML.pdf)\n  - [运筹学](sources\u002Fcurriculum\u002F3_modeling_and_machine_learning\u002Foperations-research\u002F)\n  - [定量社会科学](sources\u002Fcurriculum\u002F3_modeling_and_machine_learning\u002Fquantitative-social-science\u002F)\n- [编程最佳实践](sources\u002Fcurriculum\u002F4_programming_best_practices\u002F)\n  - [编写易读、高质量的代码](sources\u002Fcurriculum\u002F4_programming_best_practices\u002Flegible-good-code\u002F)\n  - [在命令行世界中生活](sources\u002Fcurriculum\u002F4_programming_best_practices\u002Fliving-in-the-terminal\u002F)\n  - [Dotfiles 配置](sources\u002Fcurriculum\u002F4_programming_best_practices\u002Fpimp-my-dotfiles\u002F)\n  - [使项目具有可复现性](sources\u002Fcurriculum\u002F4_programming_best_practices\u002Freproducible-software\u002F)\n  - [测试](sources\u002Fcurriculum\u002F4_programming_best_practices\u002Ftest-test-test\u002F)\n- [演示与沟通](sources\u002Fcurriculum\u002Fcommunication\u002F)\n  - [优秀的演示技巧](sources\u002Fcurriculum\u002Fcommunication\u002Fpresentation-on-presentations.pdf)\n  - [可用性和用户界面设计](sources\u002Fcurriculum\u002Fcommunications\u002Fuser-interface.md)\n- [教程模板](sources\u002Fcurriculum\u002Ftutorial-template\u002F)（用于创建您自己的教程！）\n\n## [维基](https:\u002F\u002Fgithub.com\u002Fdssg\u002Fwiki\u002Fwiki)\n\n在[维基](https:\u002F\u002Fgithub.com\u002Fdssg\u002Fwiki\u002Fwiki)中，您可以找到许多实用的信息和指南，这些内容对大家在学习过程中非常有帮助。它涵盖了以下主题：\n\n- 通过命令行访问 S3\n- 创建别名以将 Python3 设置为默认版本（而非 Python2）\n- 在 EC2 上安装 RStudio\n- 终止您的查询\n- 创建自定义 Jupyter 环境\n- 从 Ubuntu 挂载 Box\n- 格式化 psql 输出并减少显示内容\n- 使用您喜欢的文本编辑器远程编辑文件\n- 将 SQL Server 迁移到 Postgres\n- 使用 rpy2\n- VNC 查看器\n\n# 贡献说明\n\n本指南使用 `mkdocs` 编译，并通过 GitHub Pages 提供服务。更新时，您可以在本地运行以下命令来测试更改：\n```\nmkdocs serve -f \"$(pwd)\u002Fmkdocs.yml\"\n```\n\n当您准备好发布时，可以使用以下命令进行部署：\n```\nmkdocs gh-deploy -f \"$(pwd)\u002Fmkdocs.yml\"\n```\n\n（请注意，我们当前使用的 `mkdocs` 版本中存在一个 bug，需要指定配置文件的完整路径，因此命令中使用了 `$(pwd)`。未来如果我们更新依赖项，应该可以移除这一要求。）","# Hitchhiker's Guide 快速上手指南\n\n本指南旨在帮助开发者快速了解并使用 **Hitchhiker's Guide to Data Science for Social Good** 资源库。该项目是“数据科学促进社会公益奖学金（DSSG）”的核心课程与手册集合，涵盖从编程基础、数据处理、机器学习到伦理道德的全方位内容。\n\n## 环境准备\n\n本项目主要是一个文档和教程资源库，本身不是一个需要编译的二进制软件，但运行其中的代码示例和构建本地文档站点需要以下环境：\n\n### 系统要求\n- **操作系统**: Linux, macOS 或 Windows (推荐 WSL2)\n- **Python**: 3.8 或更高版本\n- **Git**: 用于克隆仓库\n\n### 前置依赖\n为了浏览教程内容并运行相关代码示例，建议安装以下工具：\n- **包管理器**: `pip` 或 `conda`\n- **文档构建工具**: `mkdocs` 及其插件（用于本地预览文档）\n- **数据科学栈**: `pandas`, `numpy`, `scikit-learn`, `jupyter`, `sqlalchemy` 等（具体依赖视所选教程模块而定）\n- **数据库**: PostgreSQL (许多数据管道教程涉及数据库操作)\n\n## 安装步骤\n\n### 1. 克隆仓库\n首先，将项目代码克隆到本地：\n\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Fdssg\u002Fhitchhikers-guide.git\ncd hitchhikers-guide\n```\n\n*(注：如果国内访问 GitHub 较慢，建议使用 Gitee 镜像或配置 Git 代理加速)*\n\n### 2. 安装文档构建依赖\n若需在本地预览和修改文档（基于 `mkdocs`），请安装所需 Python 包：\n\n```bash\npip install mkdocs mkdocs-material\n```\n\n### 3. 安装数据科学依赖（可选）\n若要运行 `curriculum` 目录下的具体代码教程，建议创建一个虚拟环境并安装通用数据科学库：\n\n```bash\npython -m venv dssg-env\nsource dssg-env\u002Fbin\u002Factivate  # Windows 用户请使用: dssg-env\\Scripts\\activate\npip install pandas numpy scikit-learn jupyterlab sqlalchemy psycopg2-binary requests beautifulsoup4\n```\n\n## 基本使用\n\n### 方式一：本地预览文档网站\n你可以启动一个本地服务器来浏览结构化的课程手册和教程，体验与线上网站一致的效果。\n\n在项目根目录下运行：\n\n```bash\nmkdocs serve -f \"$(pwd)\u002Fmkdocs.yml\"\n```\n\n启动成功后，在浏览器中访问 `http:\u002F\u002F127.0.0.1:8000` 即可查看完整指南，包括：\n- **DSSG Manual**: 项目概览、行为规范、夏季计划表。\n- **Curriculum**: 分模块教程（数据获取、探索分析、建模、最佳实践等）。\n- **Wiki**: 实用技巧集锦（如 S3 访问、环境配置等）。\n\n### 方式二：直接学习教程内容\n你可以直接进入 `sources\u002Fcurriculum` 目录，按照模块顺序进行学习。例如，开始第一个关于“数据获取”的教程：\n\n```bash\ncd sources\u002Fcurriculum\u002F1_getting_and_keeping_data\u002Fbasic-web-scraping\u002F\n# 查看该目录下的 README.md 或 Jupyter Notebook 文件进行学习\n```\n\n### 方式三：贡献与更新\n如果你希望添加新的教程或修正现有内容，可以在本地修改 Markdown 文件，并通过以下命令部署到 GitHub Pages（需配置好 Git 权限）：\n\n```bash\nmkdocs gh-deploy -f \"$(pwd)\u002Fmkdocs.yml\"\n```\n\n---\n*提示：本指南强调“负责任的數據科学”，在学习技术的同时，请务必关注 `Ethics, fairness, bias, and transparency` 相关章节。*","某非营利组织与高校联合发起暑期项目，旨在帮助缺乏技术背景的研究生团队快速构建能解决教育公平或医疗资源分配问题的数据科学模型。\n\n### 没有 hitchhikers-guide 时\n- 团队成员虽具备编程基础，但面对真实的社会公益数据时，不知如何平衡模型精度与用户隐私保护，容易触碰伦理红线。\n- 缺乏跨学科协作框架，计算机背景的学生难以理解社会学或公共政策领域的核心诉求，导致最终交付的模型“技术上完美但无法落地”。\n- 从零摸索项目流程，浪费大量时间在重复的环境配置和基础教程上，压缩了实际解决社会问题的核心开发时间。\n- 对“负责任的 AI\"概念模糊，在数据处理阶段忽视偏见检测，可能无意中加剧了原本想要缓解的社会不公。\n\n### 使用 hitchhikers-guide 后\n- 团队直接参照指南中的伦理讨论组和隐私规范，在建模初期就建立了严格的数据脱敏与公平性评估机制，确保技术方案安全合规。\n- 利用指南内整合的跨学科课程，技术人员快速掌握了将社会科学研究方法融入数据分析的流程，使模型真正契合政府或非营利组织的业务痛点。\n- 依托成熟的暑期培训大纲和实战教程，新人能在第一周完成环境搭建并进入核心代码开发，大幅提升了项目迭代效率。\n- 通过指南定义的“公益数据科学家”能力图谱，团队明确了从编码到沟通的全方位技能要求，有效避免了因视角单一导致的算法偏见。\n\nhitchhikers-guide 不仅是一套技术教程，更是连接数据科学与社会价值的桥梁，让技术人才能在坚守伦理底线的前提下高效解决真实世界难题。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fdssg_hitchhikers-guide_1d31a5b9.png","dssg","Data Science for Social Good","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fdssg_ac4f9257.jpg","",null,"info@datascienceforsocialgood.org","datascifellows","http:\u002F\u002Fdssgfellowship.org","https:\u002F\u002Fgithub.com\u002Fdssg",[82,86,90,94,98,102,106,109],{"name":83,"color":84,"percentage":85},"Jupyter Notebook","#DA5B0B",83.3,{"name":87,"color":88,"percentage":89},"HTML","#e34c26",13.8,{"name":91,"color":92,"percentage":93},"Python","#3572A5",2.3,{"name":95,"color":96,"percentage":97},"TeX","#3D6117",0.4,{"name":99,"color":100,"percentage":101},"Makefile","#427819",0.2,{"name":103,"color":104,"percentage":105},"Shell","#89e051",0.1,{"name":107,"color":108,"percentage":105},"PLSQL","#dad8d8",{"name":110,"color":111,"percentage":112},"CSS","#663399",0,1045,284,"2026-04-10T16:23:01","未说明",{"notes":118,"python":119,"dependencies":120},"本项目主要为数据科学和社会公益的培训课程与文档指南，而非单一的 AI 模型推理工具。运行环境需支持命令行工具、数据库操作及 Jupyter Notebook。文档提及需在 EC2 上安装 RStudio，并建议使用 Python 3 作为默认版本。构建本地文档站点需安装 mkdocs。无特定的 GPU 或大内存硬性要求，具体取决于所运行的数据分析任务。","3.x (建议设为默认，区别于 python2)",[121,122,123,124,125,126],"mkdocs","Jupyter","Git","SQL (Postgres\u002FSQL Server)","R (可选，需 RStudio)","rpy2 (可选)",[16,128,14],"其他",[130,131,72,132,133],"tutorial-exercises","data-science","training","machine-learning","2026-03-27T02:49:30.150509","2026-04-16T02:02:39.515037",[137,142,147,152],{"id":138,"question_zh":139,"answer_zh":140,"source_url":141},34675,"项目管理部分的文档链接失效了，该如何处理？","由于源链接指向的 Google Doc 已不存在且无法找到替代链接，维护者已直接移除了整个项目管理部分的内容。","https:\u002F\u002Fgithub.com\u002Fdssg\u002Fhitchhikers-guide\u002Fissues\u002F109",{"id":143,"question_zh":144,"answer_zh":145,"source_url":146},34676,"是否有 Julia 语言的课程教程内容可供学习？","目前没有可用的书面内容。之前的 Julia 课程是以交互式教程会话的形式进行的，因此没有留下适合发布的静态教材或文档。","https:\u002F\u002Fgithub.com\u002Fdssg\u002Fhitchhikers-guide\u002Fissues\u002F106",{"id":148,"question_zh":149,"answer_zh":150,"source_url":151},34673,"如果在多台机器上使用项目，需要为每台机器生成不同的 SSH 密钥吗？","不需要。你只需要生成一个密钥，就可以在所有机器上重复使用同一个 SSH 密钥。","https:\u002F\u002Fgithub.com\u002Fdssg\u002Fhitchhikers-guide\u002Fissues\u002F1",{"id":153,"question_zh":154,"answer_zh":155,"source_url":156},34674,"为什么找不到“社会科学中的理论与理论化”课程内容的文件链接？","原始链接指向的 Google Slides 文件可能已被删除。这是因为芝加哥大学（UChicago）移除了相关的 Google Drive 访问权限，导致文件无法找回。目前该部分内容暂时缺失。","https:\u002F\u002Fgithub.com\u002Fdssg\u002Fhitchhikers-guide\u002Fissues\u002F104",[]]