[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-ruc-datalab--DeepAnalyze":3,"tool-ruc-datalab--DeepAnalyze":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",157379,2,"2026-04-15T23:32:42",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",108322,"2026-04-10T11:39:34",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[52,13,15,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":32,"last_commit_at":59,"category_tags":60,"status":17},4721,"markitdown","microsoft\u002Fmarkitdown","MarkItDown 是一款由微软 AutoGen 团队打造的轻量级 Python 工具，专为将各类文件高效转换为 Markdown 格式而设计。它支持 PDF、Word、Excel、PPT、图片（含 OCR）、音频（含语音转录）、HTML 乃至 YouTube 链接等多种格式的解析，能够精准提取文档中的标题、列表、表格和链接等关键结构信息。\n\n在人工智能应用日益普及的今天，大语言模型（LLM）虽擅长处理文本，却难以直接读取复杂的二进制办公文档。MarkItDown 恰好解决了这一痛点，它将非结构化或半结构化的文件转化为模型“原生理解”且 Token 效率极高的 Markdown 格式，成为连接本地文件与 AI 分析 pipeline 的理想桥梁。此外，它还提供了 MCP（模型上下文协议）服务器，可无缝集成到 Claude Desktop 等 LLM 应用中。\n\n这款工具特别适合开发者、数据科学家及 AI 研究人员使用，尤其是那些需要构建文档检索增强生成（RAG）系统、进行批量文本分析或希望让 AI 助手直接“阅读”本地文件的用户。虽然生成的内容也具备一定可读性，但其核心优势在于为机器",93400,"2026-04-06T19:52:38",[52,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":72,"owner_name":73,"owner_avatar_url":74,"owner_bio":75,"owner_company":75,"owner_location":75,"owner_email":75,"owner_twitter":75,"owner_website":76,"owner_url":77,"languages":78,"stars":108,"forks":109,"last_commit_at":110,"license":111,"difficulty_score":112,"env_os":113,"env_gpu":114,"env_ram":115,"env_deps":116,"category_tags":128,"github_topics":130,"view_count":32,"oss_zip_url":75,"oss_zip_packed_at":75,"status":17,"created_at":151,"updated_at":152,"faqs":153,"releases":154},7855,"ruc-datalab\u002FDeepAnalyze","DeepAnalyze","DeepAnalyze is the first agentic LLM for autonomous data science. 🎈你的AI数据分析师，自动分析大量数据，一键生成专业分析报告！","DeepAnalyze 是全球首款专为自主数据科学设计的智能体大模型，堪称您的专属 AI 数据分析师。它无需人工干预，即可独立完成从数据清洗、探索分析、建模预测到可视化展示及报告生成的全流程任务。\n\n面对海量且格式各异的数据（如数据库、CSV、Excel、JSON 乃至非结构化文本），用户往往难以快速提取核心价值并产出专业结论。DeepAnalyze 正是为了解决这一痛点而生，它能像资深分析师一样对多源数据进行深度研究，并一键输出达到专业水准的分析报告，极大降低了数据科学的门槛与时间成本。\n\n这款工具非常适合希望自动化处理数据任务的研究人员、需要快速验证假设的数据科学家，以及想要部署私有化分析助手的开发者。其独特亮点在于完全开源：不仅公开了 8B 参数量的模型权重和核心代码，还分享了包含 50 万条指令的训练数据集。此外，最新更新的 WebUI 支持基于 Docker 的沙箱代码执行，确保了分析过程的安全性与可复现性，让用户能轻松构建属于自己的智能数据分析工作流。","﻿\u003Cp align=\"center\" width=\"100%\">\n\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fruc-datalab_DeepAnalyze_readme_dcb9f96de3fa.png\" alt=\"DeepAnalyze\" style=\"width: 60%; min-width: 300px; display: block; margin: auto;\">\n\u003C\u002Fp>\n\n# DeepAnalyze: Agentic Large Language Models for Autonomous Data Science\n[![arXiv](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FarXiv-2510.16872-b31b1b.svg?logo=arXiv)](https:\u002F\u002Farxiv.org\u002Fabs\u002F2510.16872)\n[![homepage](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F%F0%9F%8C%90%20Homepage%20-DeepAnalyze%20Cases-blue.svg)](https:\u002F\u002Fruc-deepanalyze.github.io\u002F)\n[![model](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F%F0%9F%A4%97%20Huggingface%20-DeepAnalyze--8B-orange.svg)](https:\u002F\u002Fhuggingface.co\u002FRUC-DataLab\u002FDeepAnalyze-8B)\n[![data](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F%F0%9F%93%9A%20Datasets%20-DataScience--Instruct--500K-darkgreen.svg)](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FRUC-DataLab\u002FDataScience-Instruct-500K)\n[![star](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fruc-datalab\u002FDeepAnalyze?style=social&label=Code+Stars)](https:\u002F\u002Fgithub.com\u002Fruc-datalab\u002FDeepAnalyze)\n![Badge](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fruc-datalab_DeepAnalyze_readme_dac1e298edbd.png)  [![wechat](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FWeChat-%E5%8A%A0%E5%85%A5DeepAnalyze%E4%BA%A4%E6%B5%81%E8%AE%A8%E8%AE%BA%E7%BE%A4-black?logo=wechat&logoColor=07C160)](.\u002Fassets\u002Fwechat.jpg) \n\n[![twitter](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F@Brian%20Roemmele-gray?logo=x&logoColor=white&labelColor=black)](https:\u002F\u002Fx.com\u002FBrianRoemmele\u002Fstatus\u002F1981015483823571352) [![twitter](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F@Dr%20Singularity-gray?logo=x&logoColor=white&labelColor=black)](https:\u002F\u002Fx.com\u002FDr_Singularity\u002Fstatus\u002F1981010771338498241) [![twitter](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F@Gorden%20Sun-gray?logo=x&logoColor=white&labelColor=black)](https:\u002F\u002Fx.com\u002FGorden_Sun\u002Fstatus\u002F1980573407386423408) [![twitter](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F@AIGCLINK-gray?logo=x&logoColor=white&labelColor=black)](https:\u002F\u002Fx.com\u002Faigclink\u002Fstatus\u002F1980554517126246642) [![twitter](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F@Python%20Developer-gray?logo=x&logoColor=white&labelColor=black)](https:\u002F\u002Fx.com\u002FPython_Dv\u002Fstatus\u002F1980667557318377871) [![twitter](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F@meng%20shao-gray?logo=x&logoColor=white&labelColor=black)](https:\u002F\u002Fx.com\u002Fshao__meng\u002Fstatus\u002F1980623242114314531) \n\n\n> **Authors**: **[Shaolei Zhang](https:\u002F\u002Fzhangshaolei1998.github.io\u002F), [Ju Fan*](http:\u002F\u002Fiir.ruc.edu.cn\u002F~fanj\u002F), [Meihao Fan](https:\u002F\u002Fscholar.google.com\u002Fcitations?user=9RTm2qoAAAAJ), [Guoliang Li](https:\u002F\u002Fdbgroup.cs.tsinghua.edu.cn\u002Fligl\u002F), [Xiaoyong Du](http:\u002F\u002Finfo.ruc.edu.cn\u002Fjsky\u002Fszdw\u002Fajxjgcx\u002Fjsjkxyjsx1\u002Fjs2\u002F7374b0a3f58045fc9543703ccea2eb9c.htm)**\n>\n> Renmin University of China, Tsinghua University\n\n\n**DeepAnalyze** is the first agentic LLM for autonomous data science. It can autonomously complete a wide range of data-centric tasks without human intervention, supporting:\n- 🛠 **Entire data science pipeline**: Automatically perform any data science tasks such as data preparation, analysis, modeling, visualization, and report generation.\n- 🔍 **Open-ended data research**: Conduct deep research on diverse data sources, including structured data (Databases, CSV, Excel), semi-structured data (JSON, XML, YAML), and unstructured data (TXT, Markdown), and finally produce analyst-grade research reports.\n- 📊 **Fully open-source**: The [model](https:\u002F\u002Fhuggingface.co\u002FRUC-DataLab\u002FDeepAnalyze-8B), [code](https:\u002F\u002Fgithub.com\u002Fruc-datalab\u002FDeepAnalyze), [training data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FRUC-DataLab\u002FDataScience-Instruct-500K), and [demo](https:\u002F\u002Fhuggingface.co\u002FRUC-DataLab\u002FDeepAnalyze-8B) of DeepAnalyze are all open-sourced, allowing you to deploy or extend your own data analysis assistant.\n\n\u003Cp align=\"center\" width=\"100%\">\n\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fruc-datalab_DeepAnalyze_readme_9b2d09fd4154.jpg\" alt=\"deepanalyze\" style=\"width: 70%; min-width: 300px; display: block; margin: auto;\">\n\u003C\u002Fp>\n\n\n## 🔥 News\n\n- **[2026.03.16]**: Update DeepAnalyze **WebUI v2**, featuring a smoother UI, support for the **HeyWhale API**, and support for **Docker-based sandboxed code execution**. More details in [Readme](.\u002Fdemo\u002Fchat_v2\u002FREADME.md) .\n\n- **[2026.01.31]**: 🎉🎉🎉DeepAnalyze served as the official agent supporting the **[2026年(第19届)中国大学生计算机设计大赛大数据主题赛 (2026 (19th) China Collegiate Computer Design Contest – Big Data Track)](https:\u002F\u002Fjsjds.dhu.edu.cn\u002F2025\u002F0322\u002Fc20379a371447\u002Fpage.htm)**.\n\n- **[2025.12.28] ANNOUNCEMENT: DeepAnalyze API Keys Are Now Available 🎉🎉🎉**  You can now apply for your API key via this [Google Form](https:\u002F\u002Fforms.gle\u002FYxVkCzczqq8jeciw9) or this [Feishu Form](https:\u002F\u002Fheywhale.feishu.cn\u002Fshare\u002Fbase\u002FshrcnnBRgO0x2qhx40yq4m1HxUg). For full details and usage instructions, please refer to the [Guide](.\u002Fdocs\u002FDeepAnalyze_API_Key_Usage_Guide.md) or the [Feishu Wiki](https:\u002F\u002Fheywhale.feishu.cn\u002Fwiki\u002FTcVmw314liwCiKkxnttc2CnInfg).\n\n\n- **[2025.11.13]**: DeepAnalyze now supports OpenAI-style API endpointsis and is accessible through the Command Line Terminal UI. Thanks to the contributor [@LIUyizheSDU](https:\u002F\u002Fgithub.com\u002FLIUyizheSDU\u002F)\n\n- **[2025.11.08]**: DeepAnalyze is now accessible through the JupyterUI, building based on [jupyter-mcp-server](https:\u002F\u002Fgithub.com\u002Fdatalayer\u002Fjupyter-mcp-server). Thanks to the contributor [@ChengJiale150](https:\u002F\u002Fgithub.com\u002FChengJiale150).\n\n- **[2025.10.28]**: We welcome all contributions, including improving the DeepAnalyze and sharing use cases (see [`CONTRIBUTION.md`](CONTRIBUTION.md)). All merged PRs will be listed as contributors.\n\n- **[2025.10.27]**: DeepAnalyze has attracted widespread attention, gaining **1K+** GitHub stars and **200K+** Twitter views within a week.\n\n- **[2025.10.21]**: DeepAnalyze's [paper](https:\u002F\u002Farxiv.org\u002Fabs\u002F2510.16872), [code](https:\u002F\u002Fgithub.com\u002Fruc-datalab\u002FDeepAnalyze), [model](https:\u002F\u002Fhuggingface.co\u002FRUC-DataLab\u002FDeepAnalyze-8B), [training data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FRUC-DataLab\u002FDataScience-Instruct-500K) are released!\n\n## 🖥 Demo\n\n### WebUI\n\nhttps:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Fassets\u002F04184975-7ee7-4ae0-8761-7a7550c5c8fe\n\u003Cp align=\"center\" width=\"100%\">\nUpload the data, DeepAnalyze can perform data-oriented deep research 🔍 and any data-centric tasks 🛠\n\u003C\u002Fp>\n\n- Clone this repo and download [DeepAnalyze-8B](https:\u002F\u002Fhuggingface.co\u002FRUC-DataLab\u002FDeepAnalyze-8B).\n- Deploy DeepAnalyze-8B via vllm: `vllm serve DeepAnalyze-8B`\n- Run these scripts to launch the API and interface, and then interact through the browser (http:\u002F\u002Flocalhost:4000):\n    ```bash\n    cd demo\u002Fchat\u002Ffrontend\n    npm install\n    cd ..\n    bash start.sh\n    \n    # stop the api and interface\n    bash stop.sh\n    \n    # stop vllm if needed\n    ```\n- If you want to deploy under a specific IP, please replace localhost with your IP address in [.\u002Fdemo\u002Fchat\u002Fbackend.py](.\u002Fdemo\u002Fchat\u002Fbackend.py) and [.\u002Fdemo\u002Fchat\u002Ffrontend\u002Flib\u002Fconfig.ts](.\u002Fdemo\u002Fchat\u002Ffrontend\u002Flib\u002Fconfig.ts)\n\n### WebUI v2\n\nhttps:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Fassets\u002F2dd1d2aa-6fb9-4202-bc8d-cbe874844725\n\u003Cp align=\"center\" width=\"100%\">\nUpload the data, DeepAnalyze can perform data-oriented deep research 🔍 and any data-centric tasks 🛠\n\u003C\u002Fp> \n\n- A more streamlined UI\n- Added support for HeyWhale API keys\n- Added support for a Docker-based sandbox code execution environment.\n- The usage method is the same as WebUI.\n\n    ```bash\n    cd demo\u002Fchat_v2\u002Ffrontend  \n    npm install\n    cd ..\n    cp .env.example .env \n    bash start.sh\n    # stop the api and interface\n    bash stop.sh\n    \n    # stop vllm if needed\n    ```\n\n### JupyterUI\n\nhttps:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Fassets\u002Fa2335f45-be0e-4787-a4c1-e93192891c5f\n\u003Cp align=\"center\" width=\"100%\">\nFamiliar with Jupyter Notebook? Try DeepAnalyze through the JupyterUI!\n\u003C\u002Fp>\n\n- This Demo runs Jupyter Lab as frontend, creating a new notebook, converting `\u003CAnalyze|Understand|Answer>` to Markdown cells, converting `\u003CCode>` to Code cells and executing them as `\u003CExecute>`.\n- Go to [demo\u002Fjupyter](.\u002Fdemo\u002Fjupyter) to see more and try!\n- 👏Thanks a lot to the contributor [@ChengJiale150](https:\u002F\u002Fgithub.com\u002FChengJiale150).\n\n### CLI\n\nhttps:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Fassets\u002F018acae5-b979-4143-ae1e-5b74da453c1d\n\u003Cp align=\"center\" width=\"100%\">\nTry DeepAnalyze through the command-line interface\n\u003C\u002Fp>\n\n- Deploy DeepAnalyze-8B via vllm: `vllm serve DeepAnalyze-8B`\n\n- Start the API server and launch the CLI interface:\n    ```bash\n    cd API\n    python start_server.py  # In one terminal\n    \n    cd demo\u002Fcli\n    python api_cli.py       # In another terminal (English)\n    # or\n    python api_cli_ZH.py    # In another terminal (Chinese)\n    ```\n    \n- The CLI provides a Rich-based beautiful interface with file upload support and real-time streaming responses.\n\n- Supports both English and Chinese interfaces .\n\n    \n\n> [!TIP]\n>\n> Clone this repository to deploy DeepAnalyze locally as your data analyst, completing any data science tasks without any workflow or closed-source APIs.\n>\n> 🔥 The UI of the demo is an initial version. Welcome to further develop it, and we will include you as a contributor.\n\n## 🚀 Quick Start\n\n### 🔑 **Use the DeepAnalyze API**\n\n**API keys are now available!**\n\nTo request your key, please fill out one of the following application forms:\n*   **[Primary Form (Google)](https:\u002F\u002Fforms.gle\u002FYxVkCzczqq8jeciw9)**\n*   **[Alternative Form (Feishu)](https:\u002F\u002Fheywhale.feishu.cn\u002Fshare\u002Fbase\u002FshrcnnBRgO0x2qhx40yq4m1HxUg)**\n\n**📚 For comprehensive usage instructions, please refer to the API guide:**\n\n*   **[Documentation](.\u002Fdocs\u002FDeepAnalyze_API_Key_Usage_Guide.md)**\n*   **[Feishu Wiki](https:\u002F\u002Fheywhale.feishu.cn\u002Fwiki\u002FTcVmw314liwCiKkxnttc2CnInfg)**\n\n\n\n### Model Download\n\nDownload model in  [RUC-DataLab\u002FDeepAnalyze-8B · Hugging Face](https:\u002F\u002Fhuggingface.co\u002FRUC-DataLab\u002FDeepAnalyze-8B)  or  [DeepAnalyze-8B · 模型库](https:\u002F\u002Fwww.modelscope.cn\u002Fmodels\u002FRUC-DataLab\u002FDeepAnalyze-8B\u002Fsummary)\n\n#### 📊 Memory Configuration Recommended Parameters Table\n\n| GPU Memory | Model Type | Recommended max-model-len | Use FP8 KV Cache |\n|------------|------------|--------------------------|-----------------------|\n| **16GB** | 8-bit Quantized | 8192 | ✓ |\n| **16GB** | 4-bit Quantized | 49152 | ✓ |\n| **24GB** | Original Model | 16384 | ✓ |\n| **24GB** | 8-bit Quantized | 98304 | ✓ |\n| **24GB** | 4-bit Quantized | 131072 | ✓ |\n| **40GB** | Original Model | 131072 | ✓ |\n| **40GB** | 8-bit Quantized | 131072 |  |\n| **80GB** | Original Model | 131072 |  |\n\nTo obtain the quantized model, you can use `.\u002Fquantize.py` .\n\n#### 🚀 vLLM Launch Command Template\n\n##### General Command Template\n```bash\npython -m vllm.entrypoints.openai.api_server \\\n  --model \u003Cmodel_path> \\\n  --served-model-name DeepAnalyze-8B \\\n  --max-model-len \u003Cselect_from_table_above> \\\n  --gpu-memory-utilization 0.95 \\\n  --port 8000 \\\n  \u003Cadd_fp8_if_required> \\\n  --trust-remote-code\n```\n\n##### Command Examples by Scenario\n\n**Scenario 1: 16GB GPU Memory Users (Recommended: 4-bit Quantized Version)**\n\n```bash\npython -m vllm.entrypoints.openai.api_server \\\n  --model \u002Fpath\u002Fto\u002Fdeepanalyze\u002F4bit \\\n  --served-model-name DeepAnalyze-8B \\\n  --max-model-len 49152 \\\n  --gpu-memory-utilization 0.95 \\\n  --port 8000 \\\n  --kv-cache-dtype fp8 \\\n  --trust-remote-code\n```\n\n**Scenario 2: 24GB GPU Memory Users (For Maximum Context Length)**\n\n```bash\npython -m vllm.entrypoints.openai.api_server \\\n  --model \u002Fpath\u002Fto\u002Fdeepanalyze\u002F4bit \\\n  --served-model-name DeepAnalyze-8B \\\n  --max-model-len 131072 \\\n  --gpu-memory-utilization 0.95 \\\n  --port 8000 \\\n  --kv-cache-dtype fp8 \\\n  --trust-remote-code\n```\n\n**Scenario 3: 80GB GPU Memory Users (Best Performance)**\n\n```bash\npython -m vllm.entrypoints.openai.api_server \\\n  --model \u002Fpath\u002Fto\u002Foriginal\u002Fmodel \\\n  --served-model-name DeepAnalyze-8B \\\n  --max-model-len 131072 \\\n  --gpu-memory-utilization 0.95 \\\n  --port 8000 \\\n  --trust-remote-code\n```\n\n#### Quick Selection Guide\n\n- **Limited Memory (\u003C24GB)**: Use 4-bit Quantized Version + FP8 KV Cache\n- **Balanced Configuration (24-40GB)**: Choose model type based on requirements\n- **Sufficient Memory (≥40GB)**: Use Original Model for best precision\n\nAfter launching, the API service can be accessed via `http:\u002F\u002Flocalhost:8000\u002Fv1\u002Fcompletions`.\n\n### Requirements\n\n- Install packages: `torch`, `transformers`, `vllm>=0.8.5`\n    ```bash\n    conda create -n deepanalyze python=3.12 -y\n    conda activate deepanalyze\n    pip install -r requirements.txt\n    \n    # For training\n    (cd .\u002Fdeepanalyze\u002Fms-swift\u002F && pip install -e .)\n    (cd .\u002Fdeepanalyze\u002FSkyRL\u002F && pip install -e .)\n    ```\n- [`requirements.txt`](requirements.txt) lists the minimal dependencies required for DeepAnalyze inference.\nFor training, please refer to [`.\u002Fdeepanalyze\u002Fms-swift\u002Frequirements.txt`](.\u002Fdeepanalyze\u002Fms-swift\u002Frequirements.txt) and [`.\u002Fdeepanalyze\u002FSkyRL\u002Fpyproject.toml`](.\u002Fdeepanalyze\u002FSkyRL\u002Fpyproject.toml)\n- We recommend separating the inference and training environments to avoid dependency conflicts.\n\n### Command Interaction\n\n- Deploy DeepAnalyze-8B via vllm: `vllm serve DeepAnalyze-8B`\n\n- Run these scripts for any data science tasks:\n  - You can specify **any data science tasks**, including specific data tasks and open-ended data research.\n  - You can specify **any number of data sources**, and DeepAnalyze will automatically explore them.\n  - You can specify **any type of data sources**, e.g., structured data (Databases, CSV, Excel), semi-structured data (JSON, XML, YAML), and unstructured data (TXT, Markdown)\n\n  ```python\n  from deepanalyze import DeepAnalyzeVLLM\n  \n  prompt = \"\"\"# Instruction\n  Generate a data science report.\n  \n  # Data\n  File 1: {\"name\": \"bool.xlsx\", \"size\": \"4.8KB\"}\n  File 2: {\"name\": \"person.csv\", \"size\": \"10.6KB\"}\n  File 3: {\"name\": \"disabled.xlsx\", \"size\": \"5.6KB\"}\n  File 4: {\"name\": \"enlist.csv\", \"size\": \"6.7KB\"}\n  File 5: {\"name\": \"filed_for_bankrupcy.csv\", \"size\": \"1.0KB\"}\n  File 6: {\"name\": \"longest_absense_from_school.xlsx\", \"size\": \"16.0KB\"}\n  File 7: {\"name\": \"male.xlsx\", \"size\": \"8.8KB\"}\n  File 8: {\"name\": \"no_payment_due.xlsx\", \"size\": \"15.6KB\"}\n  File 9: {\"name\": \"unemployed.xlsx\", \"size\": \"5.6KB\"}\n  File 10: {\"name\": \"enrolled.csv\", \"size\": \"20.4KB\"}\"\"\"\n  \n  workspace = \"\u002Fhome\u002Fu2023000922\u002Fzhangshaolei\u002Fdeepanalyze_public\u002FDeepAnalyze\u002Fexample\u002Fanalysis_on_student_loan\u002F\"\n  \n  deepanalyze = DeepAnalyzeVLLM(\n      \"\u002Ffs\u002Ffast\u002Fu2023000922\u002Fzhangshaolei\u002Fcheckpoints\u002Fdeepanalyze-8b\u002F\"\n  )\n  answer = deepanalyze.generate(prompt, workspace=workspace)\n  print(answer[\"reasoning\"])\n  ```\n  You shoud get a deep research report, which can be rendered as a PDF.:\n  ```text\n  # Comprehensive Analysis of Student Enrollment Patterns and Institutional Transfers\n  \n  ## Introduction and Research Context\n  \n  The analysis of student enrollment patterns represents a critical area of educational research with significant implications for institutional planning, resource allocation, and student support services. This comprehensive study examines a comprehensive dataset encompassing 1,194 enrollment records across six educational institutions, merged with supplementary demographic, financial, and employment status data. The research employs advanced analytical techniques including network analysis, predictive modeling, and temporal pattern recognition to uncover both macro-level institutional trends and micro-level student mobility patterns. The dataset's longitudinal nature, spanning fifteen months of enrollment records, provides unique insights into the complex dynamics of student pathways through higher education systems.\n  \n  Our methodological approach combines quantitative analysis of enrollment durations, transfer probabilities, and financial indicators with qualitative ...\n  \n  The research contributes to the growing body of literature on student mobility by providing empirical evidence of institutional transfer networks and their relationship to student outcomes...\n  .....\n  ```\n  \u003Cp align=\"center\" width=\"100%\">\n    \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fruc-datalab_DeepAnalyze_readme_dc32ff03f2a2.png\" alt=\"deepanalyze\" style=\"width: 100%; min-width: 300px; display: block; margin: auto;\">\n  \u003C\u002Fp>\n\n  > For more examples and task completion details, please refer to [DeepAnalyze's homepage](https:\u002F\u002Fruc-deepanalyze.github.io\u002F).\n\n### API\n- You can build an OpenAI-Style API, using this script (note to change `MODEL_PATH = \"DeepAnalyze-8B\"` in [API\u002Fconfig.py](API\u002Fconfig.py) to your vllm model name):\n\n  ```\n  python API\u002Fstart_server.py\n  ```\n\n- API usage :\n\n  ```\n  FILE_RESPONSE=$(curl -s -X POST \"http:\u002F\u002Flocalhost:8200\u002Fv1\u002Ffiles\" \\\n      -F \"file=@data.csv\" \\\n      -F \"purpose=file-extract\")\n  \n  FILE_ID=$(echo $FILE_RESPONSE | jq -r '.id')\n  \n  curl -X POST http:\u002F\u002Flocalhost:8200\u002Fv1\u002Fchat\u002Fcompletions \\\n       -H \"Content-Type: application\u002Fjson\" \\\n       -d \"{\n          \\\"model\\\": \\\"DeepAnalyze-8B\\\",\n          \\\"messages\\\": [\n            {\n              \\\"role\\\": \\\"user\\\",\n              \\\"content\\\": \\\"Generate a data science report.\\\",\n              \\\"file_ids\\\": [\\\"$FILE_ID\\\"]\n            }\n          ]\n        }\"\n  # wait for a while\n  ```\n  \n- Refer to API\u002FREADME.md for details.\n\n## 🎈 Develop Your Own DeepAnalyze\n\n### 1. Download Model and Training Data\n- Download [DeepSeek-R1-0528-Qwen3-8B](https:\u002F\u002Fhuggingface.co\u002Fdeepseek-ai\u002FDeepSeek-R1-0528-Qwen3-8B). Or you can directly finetune based on [DeepAnalyze-8B](https:\u002F\u002Fhuggingface.co\u002FRUC-DataLab\u002FDeepAnalyze-8B).\n\n  - If you use DeepSeek-R1-0528-Qwen3-8B as the base model, you should add the special tokens, using:\n\n    ```shell\n    MODEL_PATH=path_to_DeepSeek-R1-0528-Qwen3-8B\n    SAVE_PATH=path_to_save_DeepSeek-R1-0528-Qwen3-8B-addvocab\n    \n    python deepanalyze\u002Fadd_vocab.py \\\n      --model_path \"$MODEL_PATH\" \\\n      --save_path \"$SAVE_PATH\" \\\n      --add_tags\n    ```\n\n- Download training data [DataScience-Instruct-500K](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FRUC-DataLab\u002FDataScience-Instruct-500K).\n  \n  - unzip `DataScience-Instruct-500K\u002FRL\u002Fdata.zip`\n\n\n### 2. Curriculum-based Agentic Training\n- Single-ability Fine-tuning: [.\u002Fscripts\u002Fsingle.sh](.\u002Fscripts\u002Fsingle.sh)\n- Multi-ability Agentic Training (cold start): [.\u002Fscripts\u002Fmulti_coldstart.sh](.\u002Fscripts\u002Fmulti_coldstart.sh)\n- Multi-ability Agentic Training (RL): [.\u002Fscripts\u002Fmulti_rl.sh](.\u002Fscripts\u002Fmulti_rl.sh)\n\n### 3. Evaluation\n- We have unified the evaluation of most existing data science benchmarks using vLLM (with more being continuously added...). You can directly follow the introduction in [.\u002Fplayground](.\u002Fplayground) to quickly evaluate DeepAnalyze or your own agent.\n\n\n## 👏 Contribution\n> We welcome all forms of contributions, and merged PRs will be listed as contributors. \n### Contribution on Code and Model\n\n- We welcome all forms of contributions on DeepAnalyze's code, model and UI, such as Docker packaging, DeepAnalyze model conversion and quantization, and submitting DeepAnalyze workflows based on closed-source LLMs. \n- You can submit a pull request directly.\n- Please refer to the [Developer Guides](https:\u002F\u002Fmatchbench.github.io\u002Fmd_file\u002FDeveloperGuides.html) for contribution guidelines.\n\n### Contribution on Case Study\n\n- We also especially encourage you to share your use cases and feedback when using DeepAnalyze; these are extremely valuable for helping us improve DeepAnalyze.\n- You can place your use cases in a new folder under [`.example\u002F`](.example\u002F). We recommend following the folder structure of [`.example\u002Fanalysis_on_student_loan\u002F`](.example\u002Fanalysis_on_student_loan\u002F), which includes three parts:\n    - `data\u002F`: stores the uploaded files\n    - `prompt.txt`: input instructions\n    - `README.md`: documentation. We suggest including the input, DeepAnalyze’s output, outputs from other closed-source LLMs (optional), and your evaluation\u002Fcomments of the case.\n- DeepAnalyze only has 8B parameters, so we also welcome examples where DeepAnalyze performs slightly worse than the closed-source LLMs — this will help us improve DeepAnalyze.\n\n## 🤝 Acknowledgement\n\n- **Training Frameworks:** [ms-swift](https:\u002F\u002Fgithub.com\u002Fmodelscope\u002Fms-swift), [SkyRL](https:\u002F\u002Fgithub.com\u002FNovaSky-AI\u002FSkyRL)\n\n- **Sources of Training Data:** [Reasoning-Table](https:\u002F\u002Fgithub.com\u002FMJinXiang\u002FReasoning-Table), [Spider](https:\u002F\u002Fyale-lily.github.io\u002Fspider), [BIRD](https:\u002F\u002Fbird-bench.github.io\u002F), [DABStep](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Fdabstep)\n\n - **API Key & Related Services: HeyWhale Community** .\n\n   **HeyWhale Community (www.heywhale.com) is a world-leading Chinese hands-on AI community. By providing massive data resources, practical cases, learning materials, and a wide range of AI training activities, it brings together nearly one million AI practitioners and enthusiasts to share insights, exchange ideas, collaborate, and rapidly advance their skills through practice.**\n\n\n## 🖋 Citation\n\nIf this repository is useful for you, please cite as:\n\n```\n@misc{deepanalyze,\n      title={DeepAnalyze: Agentic Large Language Models for Autonomous Data Science}, \n      author={Shaolei Zhang and Ju Fan and Meihao Fan and Guoliang Li and Xiaoyong Du},\n      year={2025},\n      eprint={2510.16872},\n      archivePrefix={arXiv},\n      primaryClass={cs.AI},\n      url={https:\u002F\u002Farxiv.org\u002Fabs\u002F2510.16872}, \n}\n```\n\nIf you have any questions, please feel free to submit an issue or contact `zhangshaolei98@ruc.edu.cn`.\n\n## 🌟 Misc\n\nWelcome to join the [DeepAnalyze WeChat group](.\u002Fassets\u002Fwechat.jpg), chat and share ideas with others!\n\n\u003Cp align=\"left\" width=\"100%\">\n\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fruc-datalab_DeepAnalyze_readme_cd3c0dae7cce.jpg\" alt=\"DeepAnalyze\" style=\"width: 35%; min-width: 300px; display: block; margin: auto;\">\n\u003C\u002Fp>\n\nIf you like DeepAnalyze, give it a GitHub Star ⭐. \n\n[![Star History Chart](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fruc-datalab_DeepAnalyze_readme_f69afff04683.png)](https:\u002F\u002Fwww.star-history.com\u002F#ruc-datalab\u002FDeepAnalyze&type=date&legend=top-left)\n","\u003Cp align=\"center\" width=\"100%\">\n\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fruc-datalab_DeepAnalyze_readme_dcb9f96de3fa.png\" alt=\"DeepAnalyze\" style=\"width: 60%; min-width: 300px; display: block; margin: auto;\">\n\u003C\u002Fp>\n\n# DeepAnalyze：用于自主数据科学的代理型大型语言模型\n[![arXiv](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FarXiv-2510.16872-b31b1b.svg?logo=arXiv)](https:\u002F\u002Farxiv.org\u002Fabs\u002F2510.16872)\n[![主页](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F%F0%9F%8C%90%20Homepage%20-DeepAnalyze%20Cases-blue.svg)](https:\u002F\u002Fruc-deepanalyze.github.io\u002F)\n[![模型](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F%F0%9F%A4%97%20Huggingface%20-DeepAnalyze--8B-orange.svg)](https:\u002F\u002Fhuggingface.co\u002FRUC-DataLab\u002FDeepAnalyze-8B)\n[![数据](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F%F0%9F%93%9A%20Datasets%20-DataScience--Instruct--500K-darkgreen.svg)](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FRUC-DataLab\u002FDataScience-Instruct-500K)\n[![星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fruc-datalab\u002FDeepAnalyze?style=social&label=Code+Stars)](https:\u002F\u002Fgithub.com\u002Fruc-datalab\u002FDeepAnalyze)\n![访问量](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fruc-datalab_DeepAnalyze_readme_dac1e298edbd.png)  [![微信](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FWeChat-%E5%8A%A0%E5%85%A5DeepAnalyze%E4%BA%A4%E6%B5%81%E8%AE%A8%E8%AE%BA%E7%BE%A4-black?logo=wechat&logoColor=07C160)](.\u002Fassets\u002Fwechat.jpg) \n\n[![推特](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F@Brian%20Roemmele-gray?logo=x&logoColor=white&labelColor=black)](https:\u002F\u002Fx.com\u002FBrianRoemmele\u002Fstatus\u002F1981015483823571352) [![推特](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F@Dr%20Singularity-gray?logo=x&logoColor=white&labelColor=black)](https:\u002F\u002Fx.com\u002FDr_Singularity\u002Fstatus\u002F1981010771338498241) [![推特](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F@Gorden%20Sun-gray?logo=x&logoColor=white&labelColor=black)](https:\u002F\u002Fx.com\u002FGorden_Sun\u002Fstatus\u002F1980573407386423408) [![推特](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F@AIGCLINK-gray?logo=x&logoColor=white&labelColor=black)](https:\u002F\u002Fx.com\u002Faigclink\u002Fstatus\u002F1980554517126246642) [![推特](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F@Python%20Developer-gray?logo=x&logoColor=white&labelColor=black)](https:\u002F\u002Fx.com\u002FPython_Dv\u002Fstatus\u002F1980667557318377871) [![推特](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F@meng%20shao-gray?logo=x&logoColor=white&labelColor=black)](https:\u002F\u002Fx.com\u002Fshao__meng\u002Fstatus\u002F1980623242114314531) \n\n\n> **作者**: **[张绍雷](https:\u002F\u002Fzhangshaolei1998.github.io\u002F)、[范炬*](http:\u002F\u002Fiir.ruc.edu.cn\u002F~fanj\u002F)、[范美浩](https:\u002F\u002Fscholar.google.com\u002Fcitations?user=9RTm2qoAAAAJ)、[李国亮](https:\u002F\u002Fdbgroup.cs.tsinghua.edu.cn\u002Fligl\u002F)、[杜晓勇](http:\u002F\u002Finfo.ruc.edu.cn\u002Fjsky\u002Fszdw\u002Fajxjgcx\u002Fjsjkxyjsx1\u002Fjs2\u002F7374b0a3f58045fc9543703ccea2eb9c.htm)**\n>\n> 中国人民大学、清华大学\n\n\n**DeepAnalyze** 是首个用于自主数据科学的代理型大语言模型。它无需人工干预即可自主完成广泛的以数据为中心的任务，支持：\n- 🛠 **完整的数据科学流水线**：自动执行数据准备、分析、建模、可视化和报告生成等各类数据科学任务。\n- 🔍 **开放式数据研究**：对多样化的数据源进行深入研究，包括结构化数据（数据库、CSV、Excel）、半结构化数据（JSON、XML、YAML）以及非结构化数据（TXT、Markdown），最终生成分析师级别的研究报告。\n- 📊 **完全开源**：DeepAnalyze 的 **模型**（https:\u002F\u002Fhuggingface.co\u002FRUC-DataLab\u002FDeepAnalyze-8B）、**代码**（https:\u002F\u002Fgithub.com\u002Fruc-datalab\u002FDeepAnalyze）、**训练数据**（https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FRUC-DataLab\u002FDataScience-Instruct-500K）和**演示**（https:\u002F\u002Fhuggingface.co\u002FRUC-DataLab\u002FDeepAnalyze-8B）均已开源，允许您部署或扩展自己的数据分析助手。\n\n\u003Cp align=\"center\" width=\"100%\">\n\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fruc-datalab_DeepAnalyze_readme_9b2d09fd4154.jpg\" alt=\"deepanalyze\" style=\"width: 70%; min-width: 300px; display: block; margin: auto;\">\n\u003C\u002Fp>\n\n\n## 🔥 新闻\n\n- **[2026年3月16日]**：更新 DeepAnalyze **WebUI v2**，界面更加流畅，新增对 **HeyWhale API** 的支持，并支持基于 **Docker 的沙箱式代码执行**。更多详情请参阅 [Readme](.\u002Fdemo\u002Fchat_v2\u002FREADME.md)。\n\n- **[2026年1月31日]**：🎉🎉🎉DeepAnalyze 作为官方代理，支持了 **[2026年（第19届）中国大学生计算机设计大赛大数据主题赛](https:\u002F\u002Fjsjds.dhu.edu.cn\u002F2025\u002F0322\u002Fc20379a371447\u002Fpage.htm)**。\n\n- **[2025年12月28日] 公告：DeepAnalyze API 密钥现已开放 🎉🎉🎉** 您现在可以通过此 [Google 表单](https:\u002F\u002Fforms.gle\u002FYxVkCzczqq8jeciw9) 或此 [飞书表单](https:\u002F\u002Fheywhale.feishu.cn\u002Fshare\u002Fbase\u002FshrcnnBRgO0x2qhx40yq4m1HxUg) 申请您的 API 密钥。有关完整详情及使用说明，请参阅 [指南](.\u002Fdocs\u002FDeepAnalyze_API_Key_Usage_Guide.md) 或 [飞书 Wiki](https:\u002F\u002Fheywhale.feishu.cn\u002Fwiki\u002FTcVmw314liwCiKkxnttc2CnInfg)。\n\n\n- **[2025年11月13日]**：DeepAnalyze 现在支持 OpenAI 风格的 API 端点，并可通过命令行终端界面访问。感谢贡献者 [@LIUyizheSDU](https:\u002F\u002Fgithub.com\u002FLIUyizheSDU\u002F)。\n\n- **[2025年11月8日]**：DeepAnalyze 现在可以通过 JupyterUI 访问，基于 [jupyter-mcp-server](https:\u002F\u002Fgithub.com\u002Fdatalayer\u002Fjupyter-mcp-server) 构建。感谢贡献者 [@ChengJiale150](https:\u002F\u002Fgithub.com\u002FChengJiale150\u002F)。\n\n- **[2025年10月28日]**：我们欢迎所有贡献，包括改进 DeepAnalyze 和分享用例（详见 [`CONTRIBUTION.md`](CONTRIBUTION.md)）。所有合并的 PR 都将被列为贡献者。\n\n- **[2025年10月27日]**：DeepAnalyze 引起了广泛关注，一周内获得了 **1000+** GitHub 星标和 **20万+** Twitter 浏览量。\n\n- **[2025年10月21日]**：DeepAnalyze 的 **论文**（https:\u002F\u002Farxiv.org\u002Fabs\u002F2510.16872）、**代码**（https:\u002F\u002Fgithub.com\u002Fruc-datalab\u002FDeepAnalyze）、**模型**（https:\u002F\u002Fhuggingface.co\u002FRUC-DataLab\u002FDeepAnalyze-8B）、**训练数据**（https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FRUC-DataLab\u002FDataScience-Instruct-500K）正式发布！\n\n## 🖥 演示\n\n### WebUI\n\nhttps:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Fassets\u002F04184975-7ee7-4ae0-8761-7a7550c5c8fe\n\u003Cp align=\"center\" width=\"100%\">\n上传数据，DeepAnalyze 即可进行面向数据的深度研究 🔍 以及任何以数据为中心的任务 🛠\n\u003C\u002Fp>\n\n- 克隆此仓库并下载 [DeepAnalyze-8B](https:\u002F\u002Fhuggingface.co\u002FRUC-DataLab\u002FDeepAnalyze-8B)。\n- 使用 vllm 部署 DeepAnalyze-8B：`vllm serve DeepAnalyze-8B`\n- 运行以下脚本启动 API 和界面，然后通过浏览器进行交互（http:\u002F\u002Flocalhost:4000）：\n    ```bash\n    cd demo\u002Fchat\u002Ffrontend\n    npm install\n    cd ..\n    bash start.sh\n    \n    # 停止 API 和界面\n    bash stop.sh\n    \n    # 如有需要，停止 vllm\n    ```\n- 如果您希望在特定 IP 地址下部署，请在 [.\u002Fdemo\u002Fchat\u002Fbackend.py](.\u002Fdemo\u002Fchat\u002Fbackend.py) 和 [.\u002Fdemo\u002Fchat\u002Ffrontend\u002Flib\u002Fconfig.ts](.\u002Fdemo\u002Fchat\u002Ffrontend\u002Flib\u002Fconfig.ts) 中将 localhost 替换为您的 IP 地址。\n\n### WebUI v2\n\nhttps:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Fassets\u002F2dd1d2aa-6fb9-4202-bc8d-cbe874844725\n\u003Cp align=\"center\" width=\"100%\">\n上传数据，DeepAnalyze即可进行以数据为导向的深度研究🔍以及任何以数据为中心的任务🛠。\n\u003C\u002Fp> \n\n- 更加简洁流畅的用户界面\n- 新增对HeyWhale API密钥的支持\n- 新增基于Docker的沙箱代码执行环境支持。\n- 使用方法与WebUI相同。\n\n    ```bash\n    cd demo\u002Fchat_v2\u002Ffrontend  \n    npm install\n    cd ..\n    cp .env.example .env \n    bash start.sh\n    # 停止API和界面\n    bash stop.sh\n    \n    # 如有需要，停止vllm\n    ```\n\n### JupyterUI\n\nhttps:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Fassets\u002Fa2335f45-be0e-4787-a4c1-e93192891c5f\n\u003Cp align=\"center\" width=\"100%\">\n熟悉Jupyter Notebook吗？试试通过JupyterUI使用DeepAnalyze吧！\n\u003C\u002Fp>\n\n- 本Demo以Jupyter Lab作为前端，新建笔记本，将`\u003C分析|理解|回答>`转换为Markdown单元格，将`\u003C代码>`转换为代码单元格并执行`\u003C执行>`。\n- 前往[demo\u002Fjupyter](.\u002Fdemo\u002Fjupyter)查看更多内容并尝试！\n- 👏非常感谢贡献者[@ChengJiale150](https:\u002F\u002Fgithub.com\u002FChengJiale150)。\n\n### CLI\n\nhttps:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Fassets\u002F018acae5-b979-4143-ae1e-5b74da453c1d\n\u003Cp align=\"center\" width=\"100%\">\n通过命令行界面体验DeepAnalyze\n\u003C\u002Fp>\n\n- 使用vllm部署DeepAnalyze-8B：`vllm serve DeepAnalyze-8B`\n\n- 启动API服务器并运行CLI界面：\n    ```bash\n    cd API\n    python start_server.py  # 在一个终端中\n    \n    cd demo\u002Fcli\n    python api_cli.py       # 在另一个终端中（英文）\n    # 或\n    python api_cli_ZH.py    # 在另一个终端中（中文）\n    ```\n    \n- CLI提供基于Rich的美观界面，支持文件上传和实时流式响应。\n\n- 支持英文和中文界面。\n\n    \n\n> [!TIP]\n>\n> 克隆此仓库，在本地部署DeepAnalyze作为您的数据分析师，无需任何工作流或闭源API即可完成任何数据科学任务。\n>\n> 🔥 Demo的UI为初始版本，欢迎进一步开发，我们将把您列为贡献者。\n\n## 🚀 快速入门\n\n### 🔑 **使用DeepAnalyze API**\n\n**API密钥现已开放！**\n\n如需申请密钥，请填写以下任一申请表单：\n*   **[主申请表（Google）](https:\u002F\u002Fforms.gle\u002FYxVkCzczqq8jeciw9)**\n*   **[备用申请表（飞书）](https:\u002F\u002Fheywhale.feishu.cn\u002Fshare\u002Fbase\u002FshrcnnBRgO0x2qhx40yq4m1HxUg)**\n\n**📚 如需全面的使用说明，请参阅API指南：**\n\n*   **[文档](.\u002Fdocs\u002FDeepAnalyze_API_Key_Usage_Guide.md)**\n*   **[飞书Wiki](https:\u002F\u002Fheywhale.feishu.cn\u002Fwiki\u002FTcVmw314liwCiKkxnttc2CnInfg)**\n\n\n\n### 模型下载\n\n可在[RUC-DataLab\u002FDeepAnalyze-8B · Hugging Face](https:\u002F\u002Fhuggingface.co\u002FRUC-DataLab\u002FDeepAnalyze-8B)或[DeepAnalyze-8B · 模型库](https:\u002F\u002Fwww.modelscope.cn\u002Fmodels\u002FRUC-DataLab\u002FDeepAnalyze-8B\u002Fsummary)下载模型。\n\n#### 📊 显存配置推荐参数表\n\n| GPU显存 | 模型类型 | 推荐max-model-len | 是否使用FP8 KV缓存 |\n|------------|------------|--------------------------|-----------------------|\n| **16GB** | 8位量化 | 8192 | ✓ |\n| **16GB** | 4位量化 | 49152 | ✓ |\n| **24GB** | 原始模型 | 16384 | ✓ |\n| **24GB** | 8位量化 | 98304 | ✓ |\n| **24GB** | 4位量化 | 131072 | ✓ |\n| **40GB** | 原始模型 | 131072 | ✓ |\n| **40GB** | 8位量化 | 131072 |  |\n| **80GB** | 原始模型 | 131072 |  |\n\n如需获取量化模型，可使用`.\u002Fquantize.py`。\n\n#### 🚀 vLLM启动命令模板\n\n##### 通用命令模板\n```bash\npython -m vllm.entrypoints.openai.api_server \\\n  --model \u003Cmodel_path> \\\n  --served-model-name DeepAnalyze-8B \\\n  --max-model-len \u003C从上表选择> \\\n  --gpu-memory-utilization 0.95 \\\n  --port 8000 \\\n  \u003C根据需要添加fp8> \\\n  --trust-remote-code\n```\n\n##### 场景示例命令\n\n**场景1：16GB显存用户（推荐4位量化版本）**\n\n```bash\npython -m vllm.entrypoints.openai.api_server \\\n  --model \u002Fpath\u002Fto\u002Fdeepanalyze\u002F4bit \\\n  --served-model-name DeepAnalyze-8B \\\n  --max-model-len 49152 \\\n  --gpu-memory-utilization 0.95 \\\n  --port 8000 \\\n  --kv-cache-dtype fp8 \\\n  --trust-remote-code\n```\n\n**场景2：24GB显存用户（追求最大上下文长度）**\n\n```bash\npython -m vllm.entrypoints.openai.api_server \\\n  --model \u002Fpath\u002Fto\u002Fdeepanalyze\u002F4bit \\\n  --served-model-name DeepAnalyze-8B \\\n  --max-model-len 131072 \\\n  --gpu-memory-utilization 0.95 \\\n  --port 8000 \\\n  --kv-cache-dtype fp8 \\\n  --trust-remote-code\n```\n\n**场景3：80GB显存用户（最佳性能）**\n\n```bash\npython -m vllm.entrypoints.openai.api_server \\\n  --model \u002Fpath\u002Fto\u002Foriginal\u002Fmodel \\\n  --served-model-name DeepAnalyze-8B \\\n  --max-model-len 131072 \\\n  --gpu-memory-utilization 0.95 \\\n  --port 8000 \\\n  --trust-remote-code\n```\n\n#### 快速选择指南\n\n- **显存有限（\u003C24GB）**：使用4位量化版本+FP8 KV缓存\n- **平衡配置（24-40GB）**：根据需求选择模型类型\n- **显存充足（≥40GB）**：使用原始模型以获得最佳精度\n\n启动后，可通过`http:\u002F\u002Flocalhost:8000\u002Fv1\u002Fcompletions`访问API服务。\n\n### 环境要求\n\n- 安装以下包：`torch`、`transformers`、`vllm>=0.8.5`\n    ```bash\n    conda create -n deepanalyze python=3.12 -y\n    conda activate deepanalyze\n    pip install -r requirements.txt\n    \n    # 用于训练\n    (cd .\u002Fdeepanalyze\u002Fms-swift\u002F && pip install -e .)\n    (cd .\u002Fdeepanalyze\u002FSkyRL\u002F && pip install -e .)\n    ```\n- [`requirements.txt`](requirements.txt)列出了DeepAnalyze推理所需的最低依赖项。训练时请参考[`.\u002Fdeepanalyze\u002Fms-swift\u002Frequirements.txt`](.\u002Fdeepanalyze\u002Fms-swift\u002Frequirements.txt)和[`.\u002Fdeepanalyze\u002FSkyRL\u002Fpyproject.toml`](.\u002Fdeepanalyze\u002FSkyRL\u002Fpyproject.toml)。\n- 建议将推理和训练环境分开，以避免依赖冲突。\n\n### 命令交互\n\n- 通过 vllm 部署 DeepAnalyze-8B：`vllm serve DeepAnalyze-8B`\n\n- 运行以下脚本以执行任何数据科学任务：\n  - 您可以指定**任意数据科学任务**，包括特定的数据任务和开放式数据研究。\n  - 您可以指定**任意数量的数据源**，DeepAnalyze 将自动探索这些数据源。\n  - 您可以指定**任意类型的数据源**，例如结构化数据（数据库、CSV、Excel）、半结构化数据（JSON、XML、YAML）以及非结构化数据（TXT、Markdown）。\n\n  ```python\n  from deepanalyze import DeepAnalyzeVLLM\n  \n  prompt = \"\"\"# 指令\n  生成一份数据科学报告。\n  \n  # 数据\n  文件1：{\"name\": \"bool.xlsx\", \"size\": \"4.8KB\"}\n  文件2：{\"name\": \"person.csv\", \"size\": \"10.6KB\"}\n  文件3：{\"name\": \"disabled.xlsx\", \"size\": \"5.6KB\"}\n  文件4：{\"name\": \"enlist.csv\", \"size\": \"6.7KB\"}\n  文件5：{\"name\": \"filed_for_bankrupcy.csv\", \"size\": \"1.0KB\"}\n  文件6：{\"name\": \"longest_absense_from_school.xlsx\", \"size\": \"16.0KB\"}\n  文件7：{\"name\": \"male.xlsx\", \"size\": \"8.8KB\"}\n  文件8：{\"name\": \"no_payment_due.xlsx\", \"size\": \"15.6KB\"}\n  文件9：{\"name\": \"unemployed.xlsx\", \"size\": \"5.6KB\"}\n  文件10：{\"name\": \"enrolled.csv\", \"size\": \"20.4KB\"}\"\"\"\n  \n  workspace = \"\u002Fhome\u002Fu2023000922\u002Fzhangshaolei\u002Fdeepanalyze_public\u002FDeepAnalyze\u002Fexample\u002Fanalysis_on_student_loan\u002F\"\n  \n  deepanalyze = DeepAnalyzeVLLM(\n      \"\u002Ffs\u002Ffast\u002Fu2023000922\u002Fzhangshaolei\u002Fcheckpoints\u002Fdeepanalyze-8b\u002F\"\n  )\n  answer = deepanalyze.generate(prompt, workspace=workspace)\n  print(answer[\"reasoning\"])\n  ```\n  您将获得一份深度研究报告，该报告可以渲染为 PDF 格式：\n  ```text\n  # 学生入学模式与院校转学的综合分析\n  \n  ## 引言及研究背景\n  \n  对学生入学模式的分析是教育研究中的一个重要领域，对院校规划、资源分配以及学生支持服务具有重大意义。本研究全面考察了一个包含六所教育机构共1,194条入学记录的数据集，并将其与补充的人口统计、财务及就业状况数据相结合。研究采用了网络分析、预测建模和时间序列模式识别等先进分析技术，以揭示宏观层面的院校趋势以及微观层面的学生流动模式。该数据集具有纵向特性，涵盖了十五个月的入学记录，从而为理解学生在高等教育体系中的复杂路径提供了独特的视角。\n  \n  我们的方法结合了对入学时长、转学概率和财务指标的定量分析，以及定性...\n  \n  本研究通过提供关于院校转学网络及其与学生学业成果之间关系的实证证据，丰富了有关学生流动性的现有文献...\n  .....\n  ```\n  \u003Cp align=\"center\" width=\"100%\">\n    \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fruc-datalab_DeepAnalyze_readme_dc32ff03f2a2.png\" alt=\"deepanalyze\" style=\"width: 100%; min-width: 300px; display: block; margin: auto;\">\n  \u003C\u002Fp>\n\n  > 更多示例和任务完成详情，请参阅 [DeepAnalyze 官网](https:\u002F\u002Fruc-deepanalyze.github.io\u002F)。\n\n### API\n- 您可以构建一个 OpenAI 风格的 API，使用此脚本（请注意将 [API\u002Fconfig.py](API\u002Fconfig.py) 中的 `MODEL_PATH = \"DeepAnalyze-8B\"` 更改为您的 vllm 模型名称）：\n\n  ```\n  python API\u002Fstart_server.py\n  ```\n\n- API 使用方法：\n\n  ```\n  FILE_RESPONSE=$(curl -s -X POST \"http:\u002F\u002Flocalhost:8200\u002Fv1\u002Ffiles\" \\\n      -F \"file=@data.csv\" \\\n      -F \"purpose=file-extract\")\n  \n  FILE_ID=$(echo $FILE_RESPONSE | jq -r '.id')\n  \n  curl -X POST http:\u002F\u002Flocalhost:8200\u002Fv1\u002Fchat\u002Fcompletions \\\n       -H \"Content-Type: application\u002Fjson\" \\\n       -d \"{\n          \\\"model\\\": \\\"DeepAnalyze-8B\\\",\n          \\\"messages\\\": [\n            {\n              \\\"role\\\": \\\"user\\\",\n              \\\"content\\\": \\\"生成一份数据科学报告。\\\",\n              \\\"file_ids\\\": [\\\"$FILE_ID\\\"]\n            }\n          ]\n        }\"\n  # 等待片刻\n  ```\n  \n- 详细信息请参阅 API\u002FREADME.md。\n\n## 🎈 开发您自己的 DeepAnalyze\n\n### 1. 下载模型和训练数据\n- 下载 [DeepSeek-R1-0528-Qwen3-8B](https:\u002F\u002Fhuggingface.co\u002Fdeepseek-ai\u002FDeepSeek-R1-0528-Qwen3-8B)。或者您可以直接基于 [DeepAnalyze-8B](https:\u002F\u002Fhuggingface.co\u002FRUC-DataLab\u002FDeepAnalyze-8B) 进行微调。\n  \n  - 如果您使用 DeepSeek-R1-0528-Qwen3-8B 作为基础模型，应添加特殊标记，使用如下命令：\n\n    ```shell\n    MODEL_PATH=path_to_DeepSeek-R1-0528-Qwen3-8B\n    SAVE_PATH=path_to_save_DeepSeek-R1-0528-Qwen3-8B-addvocab\n    \n    python deepanalyze\u002Fadd_vocab.py \\\n      --model_path \"$MODEL_PATH\" \\\n      --save_path \"$SAVE_PATH\" \\\n      --add_tags\n    ```\n\n- 下载训练数据 [DataScience-Instruct-500K](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FRUC-DataLab\u002FDataScience-Instruct-500K)。\n  \n  - 解压 `DataScience-Instruct-500K\u002FRL\u002Fdata.zip`\n\n\n### 2. 基于课程的智能体训练\n- 单一能力微调：[.\u002Fscripts\u002Fsingle.sh](.\u002Fscripts\u002Fsingle.sh)\n- 多能力智能体训练（冷启动）：[.\u002Fscripts\u002Fmulti_coldstart.sh](.\u002Fscripts\u002Fmulti_coldstart.sh)\n- 多能力智能体训练（强化学习）：[.\u002Fscripts\u002Fmulti_rl.sh](.\u002Fscripts\u002Fmulti_rl.sh)\n\n### 3. 评估\n- 我们已使用 vLLM 统一评估了大多数现有的数据科学基准测试（并且还在持续增加中……）。您可以直接按照 [.\u002Fplayground](.\u002Fplayground) 中的介绍，快速评估 DeepAnalyze 或您自己的智能体。\n\n\n## 👏 贡献\n> 我们欢迎任何形式的贡献，合并的 PR 将被列为贡献者。\n### 代码和模型方面的贡献\n\n- 我们欢迎对 DeepAnalyze 的代码、模型和 UI 的各种贡献，例如 Docker 打包、DeepAnalyze 模型的转换和量化，以及提交基于闭源 LLM 的 DeepAnalyze 工作流。\n- 您可以直接提交拉取请求。\n- 请参阅 [开发者指南](https:\u002F\u002Fmatchbench.github.io\u002Fmd_file\u002FDeveloperGuides.html) 获取贡献指南。\n\n### 案例研究方面的贡献\n\n- 我们也特别鼓励您分享使用 DeepAnalyze 的案例和反馈；这些对于帮助我们改进 DeepAnalyze 极其宝贵。\n- 您可以将您的案例放置在 [.example\u002F](.example\u002F) 下的新文件夹中。我们建议遵循 [.example\u002Fanalysis_on_student_loan\u002F](.example\u002Fanalysis_on_student_loan\u002F) 的文件结构，其中包括三个部分：\n    - `data\u002F`：存储上传的文件\n    - `prompt.txt`：输入指令\n    - `README.md`：文档说明。我们建议包含输入内容、DeepAnalyze 的输出、其他闭源 LLM 的输出（可选），以及您对该案例的评估和评论。\n- DeepAnalyze 只有 8B 参数，因此我们也欢迎那些 DeepAnalyze 表现略逊于闭源 LLM 的案例——这将有助于我们进一步改进 DeepAnalyze。\n\n## 🤝 致谢\n\n- **训练框架：** [ms-swift](https:\u002F\u002Fgithub.com\u002Fmodelscope\u002Fms-swift)、[SkyRL](https:\u002F\u002Fgithub.com\u002FNovaSky-AI\u002FSkyRL)\n\n- **训练数据来源：** [Reasoning-Table](https:\u002F\u002Fgithub.com\u002FMJinXiang\u002FReasoning-Table)、[Spider](https:\u002F\u002Fyale-lily.github.io\u002Fspider)、[BIRD](https:\u002F\u002Fbird-bench.github.io\u002F)、[DABStep](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Fdabstep)\n\n- **API密钥及相关服务：HeyWhale社区**。\n\n  **HeyWhale社区（www.heywhale.com）是全球领先的中文实践型AI社区。通过提供海量数据资源、实战案例、学习资料以及丰富的AI培训活动，汇聚了近百万名AI从业者和爱好者，共同分享见解、交流思想、协作创新，并在实践中快速提升技能。**\n\n\n## 🖋 引用\n\n如果本仓库对您有所帮助，请按以下方式引用：\n\n```\n@misc{deepanalyze,\n      title={DeepAnalyze: 自主数据科学的代理式大型语言模型}, \n      author={张绍磊、范举、范美浩、李国梁、杜晓勇},\n      year={2025},\n      eprint={2510.16872},\n      archivePrefix={arXiv},\n      primaryClass={cs.AI},\n      url={https:\u002F\u002Farxiv.org\u002Fabs\u002F2510.16872}, \n}\n```\n\n如有任何问题，欢迎提交Issue或联系`zhangshaolei98@ruc.edu.cn`。\n\n## 🌟 其他\n\n欢迎加入[DeepAnalyze微信交流群](.\u002Fassets\u002Fwechat.jpg)，与大家畅聊、分享观点！\n\n\u003Cp align=\"left\" width=\"100%\">\n\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fruc-datalab_DeepAnalyze_readme_cd3c0dae7cce.jpg\" alt=\"DeepAnalyze\" style=\"width: 35%; min-width: 300px; display: block; margin: auto;\">\n\u003C\u002Fp>\n\n如果您喜欢DeepAnalyze，请为该项目在GitHub上点个赞⭐。\n\n[![Star历史图表](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fruc-datalab_DeepAnalyze_readme_f69afff04683.png)](https:\u002F\u002Fwww.star-history.com\u002F#ruc-datalab\u002FDeepAnalyze?type=date&legend=top-left)","# DeepAnalyze 快速上手指南\n\nDeepAnalyze 是首个用于自主数据科学的代理型大语言模型（Agentic LLM），能够无需人工干预地自动完成数据准备、分析、建模、可视化及报告生成等全流程任务。\n\n## 1. 环境准备\n\n### 系统要求\n- **操作系统**: Linux (推荐 Ubuntu 20.04+) 或 macOS\n- **GPU**: NVIDIA GPU (支持 CUDA)，显存建议 **16GB** 以上（最低可运行量化版本）\n- **Python**: 3.9 - 3.11\n- **Node.js**: v18+ (用于部署 WebUI)\n\n### 前置依赖\n确保已安装以下基础工具：\n```bash\n# 检查 Python 版本\npython --version\n\n# 检查 Git\ngit --version\n\n# 检查 Node.js (WebUI 必需)\nnode --version\nnpm --version\n\n# 安装 vLLM (推理引擎)\npip install vllm\n```\n\n> **提示**: 国内用户建议使用清华源或阿里源加速 pip 安装：\n> `pip install vllm -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple`\n\n---\n\n## 2. 安装步骤\n\n### 第一步：克隆项目\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Fruc-datalab\u002FDeepAnalyze.git\ncd DeepAnalyze\n```\n\n### 第二步：下载模型\n您可以从 Hugging Face 或 ModelScope（魔搭社区，国内推荐）下载模型。\n\n**方案 A：使用 Hugging Face (需网络条件)**\n```bash\n# 需安装 huggingface-cli\npip install huggingface_hub\nhuggingface-cli download RUC-DataLab\u002FDeepAnalyze-8B --local-dir .\u002Fmodels\u002FDeepAnalyze-8B\n```\n\n**方案 B：使用 ModelScope (国内加速推荐)**\n```bash\n# 安装 modelscope\npip install modelscope\n# 下载模型\npython -c \"from modelscope import snapshot_download; snapshot_download('RUC-DataLab\u002FDeepAnalyze-8B', cache_dir='.\u002Fmodels')\"\n```\n\n### 第三步：启动推理服务 (vLLM)\n根据您的显存大小选择合适的启动命令。以下以 **24GB 显存** 为例（使用 4-bit 量化以获得更长上下文）：\n\n```bash\npython -m vllm.entrypoints.openai.api_server \\\n  --model .\u002Fmodels\u002FDeepAnalyze-8B \\\n  --served-model-name DeepAnalyze-8B \\\n  --max-model-len 131072 \\\n  --gpu-memory-utilization 0.95 \\\n  --port 8000 \\\n  --kv-cache-dtype fp8 \\\n  --trust-remote-code\n```\n\n> **显存适配参考**:\n> - **16GB**: 请使用 4-bit 量化模型，设置 `--max-model-len 49152` 并添加 `--kv-cache-dtype fp8`。\n> - **80GB**: 可使用原始模型，移除 `--kv-cache-dtype fp8` 参数以获得最佳精度。\n\n---\n\n## 3. 基本使用\n\n启动推理服务后，您可以通过以下三种方式与 DeepAnalyze 交互：\n\n### 方式一：命令行界面 (CLI) - 最快捷\n适合快速测试和终端用户。\n\n1. 启动后端 API（在新终端窗口）：\n   ```bash\n   cd API\n   python start_server.py\n   ```\n\n2. 启动 CLI 客户端（在另一个终端窗口）：\n   ```bash\n   cd demo\u002Fcli\n   # 中文界面\n   python api_cli_ZH.py\n   ```\n   *启动后即可上传文件并进行自然对话，系统将自动执行代码分析数据。*\n\n### 方式二：Web 界面 (WebUI) - 功能最全\n适合需要可视化操作和报告生成的场景。\n\n1. 安装前端依赖：\n   ```bash\n   cd demo\u002Fchat\u002Ffrontend\n   npm install\n   cd ..\n   ```\n\n2. 启动服务：\n   ```bash\n   bash start.sh\n   ```\n\n3. 访问浏览器：\n   打开 `http:\u002F\u002Flocalhost:4000`，上传 CSV\u002FExcel\u002FJSON 等数据文件，输入指令即可开始自主分析。\n\n### 方式三：调用 API\n如果您希望将 DeepAnalyze 集成到自己的应用中，可直接调用 OpenAI 兼容接口：\n\n```python\nfrom openai import OpenAI\n\nclient = OpenAI(\n    base_url=\"http:\u002F\u002Flocalhost:8000\u002Fv1\",\n    api_key=\"EMPTY\" # 本地部署无需真实 key\n)\n\nresponse = client.chat.completions.create(\n    model=\"DeepAnalyze-8B\",\n    messages=[\n        {\"role\": \"user\", \"content\": \"分析当前目录下的 data.csv 文件，找出销售额最高的产品并绘制趋势图。\"}\n    ]\n)\n\nprint(response.choices[0].message.content)\n```\n\n---\n\n**进阶提示**: \n- 若需申请官方 **DeepAnalyze API Key** 免部署使用，请访问 [Google 表单](https:\u002F\u002Fforms.gle\u002FYxVkCzczqq8jeciw9) 或 [飞书表单](https:\u002F\u002Fheywhale.feishu.cn\u002Fshare\u002Fbase\u002FshrcnnBRgO0x2qhx40yq4m1HxUg) 申请。\n- 详细文档请参阅项目根目录下的 `docs\u002FDeepAnalyze_API_Key_Usage_Guide.md`。","某电商公司的数据运营团队需要在每周一上午，基于上周分散在数据库、CSV 日志和 JSON 配置文件中的千万级用户行为数据，产出包含趋势洞察与归因分析的专业周报。\n\n### 没有 DeepAnalyze 时\n- **数据清洗耗时极长**：分析师需手动编写大量 Python 脚本清洗多源异构数据，仅格式统一就要花费半天时间。\n- **分析深度依赖个人经验**：面对开放性问题（如“为何某类商品转化率骤降”），往往只能做表面统计，难以自动下钻挖掘根本原因。\n- **报告生成重复劳动**：图表绘制完成后，需人工将结论复制粘贴到 PPT 或文档中，排版和文字润色占用大量精力。\n- **响应业务需求滞后**：从接到需求到交付报告通常需要 1-2 天，无法支持管理层对突发数据异常的即时决策。\n\n### 使用 DeepAnalyze 后\n- **全自动流水线作业**：DeepAnalyze 自主连接数据库并解析非结构化日志，一键完成从数据预处理到建模的全流程，无需人工干预。\n- **智能深度归因研究**：针对异常指标，DeepAnalyze 能像资深分析师一样自动设计验证实验，跨表关联分析并定位具体影响因素。\n- **一键生成专家级报告**：工具直接输出包含交互式可视化图表、专业文字解读及改进建议的完整分析报告，格式精美且逻辑严密。\n- **分钟级即时响应**：原本需要两天的工作现在仅需几分钟即可完成，让团队能随时应对临时的数据探查需求。\n\nDeepAnalyze 将数据科学家从繁琐的重复劳动中解放出来，真正实现了从“人找数据”到“数据找人”的自主分析变革。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fruc-datalab_DeepAnalyze_2685d7e0.png","ruc-datalab","RUC-DataLab","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fruc-datalab_3eb5d021.png",null,"http:\u002F\u002Fiir.ruc.edu.cn\u002F~fanj\u002F","https:\u002F\u002Fgithub.com\u002Fruc-datalab",[79,83,87,91,95,98,102,105],{"name":80,"color":81,"percentage":82},"Python","#3572A5",95,{"name":84,"color":85,"percentage":86},"Shell","#89e051",3.8,{"name":88,"color":89,"percentage":90},"Jupyter Notebook","#DA5B0B",1.1,{"name":92,"color":93,"percentage":94},"Jinja","#a52a22",0.1,{"name":96,"color":97,"percentage":94},"Dockerfile","#384d54",{"name":99,"color":100,"percentage":101},"Makefile","#427819",0,{"name":103,"color":104,"percentage":101},"Batchfile","#C1F12E",{"name":106,"color":107,"percentage":101},"Scheme","#1e4aec",4007,656,"2026-04-15T08:58:51","MIT",4,"Linux, macOS, Windows","需要 NVIDIA GPU，推荐显存 16GB+（最低支持量化后运行），需安装 vLLM 依赖的 CUDA 环境","未说明（取决于模型量化程度及上下文长度，建议系统内存充足以配合 GPU）",{"notes":117,"python":118,"dependencies":119},"核心推理引擎依赖 vLLM，需通过 'vllm serve' 启动服务。根据显存大小（16GB\u002F24GB\u002F40GB\u002F80GB）需选择不同量化版本（4-bit\u002F8-bit\u002F原始模型）并配置 max-model-len 和 FP8 KV Cache。前端演示依赖 Node.js (npm install)。支持 Docker 沙箱代码执行环境。模型文件需从 Hugging Face 或 ModelScope 下载。","3.8+",[120,121,122,123,124,125,126,127],"vllm","torch","transformers","numpy","pandas","fastapi","uvicorn","nodejs\u002Fnpm (前端依赖)",[15,35,16,129,13,14],"其他",[131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150],"agent","agentic","agentic-ai","chatbot","data","data-analysis","data-engineering","data-science","data-visualization","llm","ai","ai-scientist","database","qwen","science","python","open-source","python-programming","deep-research","jupyter","2026-03-27T02:49:30.150509","2026-04-16T08:14:12.958502",[],[]]