[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-stepfun-ai--gelab-zero":3,"tool-stepfun-ai--gelab-zero":64},[4,17,27,35,48,56],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":16},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,3,"2026-04-05T11:01:52",[13,14,15],"开发框架","图像","Agent","ready",{"id":18,"name":19,"github_repo":20,"description_zh":21,"stars":22,"difficulty_score":23,"last_commit_at":24,"category_tags":25,"status":16},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",140436,2,"2026-04-05T23:32:43",[13,15,26],"语言模型",{"id":28,"name":29,"github_repo":30,"description_zh":31,"stars":32,"difficulty_score":23,"last_commit_at":33,"category_tags":34,"status":16},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",107662,"2026-04-03T11:11:01",[13,14,15],{"id":36,"name":37,"github_repo":38,"description_zh":39,"stars":40,"difficulty_score":23,"last_commit_at":41,"category_tags":42,"status":16},2268,"ML-For-Beginners","microsoft\u002FML-For-Beginners","ML-For-Beginners 是由微软推出的一套系统化机器学习入门课程，旨在帮助零基础用户轻松掌握经典机器学习知识。这套课程将学习路径规划为 12 周，包含 26 节精炼课程和 52 道配套测验，内容涵盖从基础概念到实际应用的完整流程，有效解决了初学者面对庞大知识体系时无从下手、缺乏结构化指导的痛点。\n\n无论是希望转型的开发者、需要补充算法背景的研究人员，还是对人工智能充满好奇的普通爱好者，都能从中受益。课程不仅提供了清晰的理论讲解，还强调动手实践，让用户在循序渐进中建立扎实的技能基础。其独特的亮点在于强大的多语言支持，通过自动化机制提供了包括简体中文在内的 50 多种语言版本，极大地降低了全球不同背景用户的学习门槛。此外，项目采用开源协作模式，社区活跃且内容持续更新，确保学习者能获取前沿且准确的技术资讯。如果你正寻找一条清晰、友好且专业的机器学习入门之路，ML-For-Beginners 将是理想的起点。",84991,"2026-04-05T10:45:23",[14,43,44,45,15,46,26,13,47],"数据工具","视频","插件","其他","音频",{"id":49,"name":50,"github_repo":51,"description_zh":52,"stars":53,"difficulty_score":10,"last_commit_at":54,"category_tags":55,"status":16},3128,"ragflow","infiniflow\u002Fragflow","RAGFlow 是一款领先的开源检索增强生成（RAG）引擎，旨在为大语言模型构建更精准、可靠的上下文层。它巧妙地将前沿的 RAG 技术与智能体（Agent）能力相结合，不仅支持从各类文档中高效提取知识，还能让模型基于这些知识进行逻辑推理和任务执行。\n\n在大模型应用中，幻觉问题和知识滞后是常见痛点。RAGFlow 通过深度解析复杂文档结构（如表格、图表及混合排版），显著提升了信息检索的准确度，从而有效减少模型“胡编乱造”的现象，确保回答既有据可依又具备时效性。其内置的智能体机制更进一步，使系统不仅能回答问题，还能自主规划步骤解决复杂问题。\n\n这款工具特别适合开发者、企业技术团队以及 AI 研究人员使用。无论是希望快速搭建私有知识库问答系统，还是致力于探索大模型在垂直领域落地的创新者，都能从中受益。RAGFlow 提供了可视化的工作流编排界面和灵活的 API 接口，既降低了非算法背景用户的上手门槛，也满足了专业开发者对系统深度定制的需求。作为基于 Apache 2.0 协议开源的项目，它正成为连接通用大模型与行业专有知识之间的重要桥梁。",77062,"2026-04-04T04:44:48",[15,14,13,26,46],{"id":57,"name":58,"github_repo":59,"description_zh":60,"stars":61,"difficulty_score":10,"last_commit_at":62,"category_tags":63,"status":16},2181,"OpenHands","OpenHands\u002FOpenHands","OpenHands 是一个专注于 AI 驱动开发的开源平台，旨在让智能体（Agent）像人类开发者一样理解、编写和调试代码。它解决了传统编程中重复性劳动多、环境配置复杂以及人机协作效率低等痛点，通过自动化流程显著提升开发速度。\n\n无论是希望提升编码效率的软件工程师、探索智能体技术的研究人员，还是需要快速原型验证的技术团队，都能从中受益。OpenHands 提供了灵活多样的使用方式：既可以通过命令行（CLI）或本地图形界面在个人电脑上轻松上手，体验类似 Devin 的流畅交互；也能利用其强大的 Python SDK 自定义智能体逻辑，甚至在云端大规模部署上千个智能体并行工作。\n\n其核心技术亮点在于模块化的软件智能体 SDK，这不仅构成了平台的引擎，还支持高度可组合的开发模式。此外，OpenHands 在 SWE-bench 基准测试中取得了 77.6% 的优异成绩，证明了其解决真实世界软件工程问题的能力。平台还具备完善的企业级功能，支持与 Slack、Jira 等工具集成，并提供细粒度的权限管理，适合从个人开发者到大型企业的各类用户场景。",70626,"2026-04-05T22:51:36",[26,15,13,45],{"id":65,"github_repo":66,"name":67,"description_en":68,"description_zh":69,"ai_summary_zh":69,"readme_en":70,"readme_zh":71,"quickstart_zh":72,"use_case_zh":73,"hero_image_url":74,"owner_login":75,"owner_name":75,"owner_avatar_url":76,"owner_bio":77,"owner_company":78,"owner_location":78,"owner_email":79,"owner_twitter":78,"owner_website":78,"owner_url":80,"languages":81,"stars":86,"forks":87,"last_commit_at":88,"license":89,"difficulty_score":10,"env_os":90,"env_gpu":91,"env_ram":91,"env_deps":92,"category_tags":99,"github_topics":100,"view_count":10,"oss_zip_url":78,"oss_zip_packed_at":78,"status":16,"created_at":105,"updated_at":106,"faqs":107,"releases":138},1141,"stepfun-ai\u002Fgelab-zero","gelab-zero","STEP-GUI: The top GUI agent solution in the galaxy.  Developed by the StepFun-GELab team and powered by StepFun’s cutting-edge research capabilities.","GELab-Zero 是一款面向移动应用界面自动化的开源工具，专注于通过AI技术实现跨平台的GUI操作。它解决了移动生态碎片化带来的开发复杂性问题，无需依赖云服务即可完成本地化部署，保障用户数据隐私。该工具提供即插即用的完整基础设施，支持多设备协同与任务分发，特别适合需要进行自动化测试、智能交互开发的开发者，以及探索移动端AI应用的研究人员。其技术亮点包括开源架构、轻量级模型部署能力、与Hugging Face等平台的兼容性，以及配套的基准测试数据集，为用户提供从实验到落地的全链条支持。","![GELab-Zero Main Image](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_5b0e9abd3a92.png)\n\n> 👋 Hi, everyone! We are proud to present the first fully open-source GUI Agent with both model and infrastructure. Our solution features plug-and-play engineering with no cloud dependencies, giving you complete privacy control.\n\n\u003Cp align=\"center\">\n  \u003C!-- \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fstepfun-ai\u002Fgelab-zero\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F💻%20GitHub-Repository-black\" alt=\"GitHub\" \u002F>\u003C\u002Fa> -->\n  \u003Ca href=\"https:\u002F\u002Farxiv.org\u002Fabs\u002F2512.15431\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FarXiv-Step--GUI Technical Report-B31B1B.svg?logo=arxiv&logoColor=white\" alt=\"arXiv\" \u002F>\u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fopengelab.github.io\u002F\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F🌐%20Website-Project%20Page-blue\" alt=\"Website\" \u002F>\u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fhuggingface.co\u002Fstepfun-ai\u002FGELab-Zero-4B-preview\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F🤗%20Hugging%20Face-GELab--Zero--4B--preview-orange\" alt=\"Hugging Face Model\" \u002F>\u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fstepfun-ai\u002FAndroidDaily\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F📚%20Hugging%20Face-AndroidDaily-yellow\" alt=\"Hugging Face Dataset\" \u002F>\u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fmodelscope.cn\u002Fmodels\u002Fstepfun-ai\u002FGELab-Zero-4B-preview\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F🤖%20Model%20Scope-GELab--Zero--4B--preview-blue\" alt=\"Model Scope\" \u002F>\u003C\u002Fa>\n\u003C\u002Fp>\n\n\u003Cp align=\"center\">\n  \u003Ca href=\".\u002FREADME.md\">English\u003C\u002Fa> |\n  \u003Ca href=\".\u002FREADME_CN.md\">简体中文\u003C\u002Fa>\n\u003C\u002Fp>\n\n## 📰 News\n\n* 🎁 **[2025-12-18]** We release **Step-GUI Technical Report** on [**arXiv**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2512.15431)!\n* 🎁 **[2025-12-18]** We release a more powerful **API** for GUI automation tasks. [Apply for API access here](https:\u002F\u002Fwvixbzgc0u7.feishu.cn\u002Fshare\u002Fbase\u002Fform\u002FshrcnNStxEmuE7aY6jTW07CZHMf)!\n* 🎁 **[2025-12-12]** We release **MCP-Server** support for multi-device management and task distribution. See [Installation & Quick Start](#-installation-quick-start) and [MCP-Server Setup](#optional-mcp-server-setup) for setup instructions.\n* 🎁 **[2025-12-1]** We thank the following projects and authors for providing quantization tools & tutorials: [GGUF_v1](https:\u002F\u002Fhuggingface.co\u002Fbartowski\u002Fstepfun-ai_GELab-Zero-4B-preview-GGUF), [GGUF_v2](https:\u002F\u002Fhuggingface.co\u002Fnoctrex\u002FGELab-Zero-4B-preview-GGUF), [EXL3](https:\u002F\u002Fhuggingface.co\u002FArtusDev\u002Fstepfun-ai_GELab-Zero-4B-preview-EXL3), [Tutorials_CN](http:\u002F\u002Fxhslink.com\u002Fo\u002F1WrmgHGWFYh), [Tutorials_EN](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=4BMiDyQOpos)\n* 🎁 **[2025-11-31]** We release a lightweight **4B** model GELab-Zero-4B-preview on [**Hugging Face**](https:\u002F\u002Fhuggingface.co\u002Fstepfun-ai\u002FGELab-Zero-4B-preview) and [**Model Scope**](https:\u002F\u002Fmodelscope.cn\u002Fmodels\u002Fstepfun-ai\u002FGELab-Zero-4B-preview).\n* 🎁 **[2025-11-31]** We release the tasks from the [**AndroidDaily**](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fstepfun-ai\u002FAndroidDaily) benchmark.\n* 🎁 **[2025-11-30]** We release the current **GELab-Zero** engineering infrastructure.\n* 🎁 **[2025-10]** Our [**research**](https:\u002F\u002Fgithub.com\u002Fsummoneryhl\u002Fgelab-engine) paper on GELab-Engine is accepted by **NeurIPS 2025**.\n\n\n\n## 📑 Table of Contents\n\n- [📖 Background](#-background)\n- [🎥 Application Demonstrations](#-application-demonstrations)\n- [🏆 Open Benchmark](#-open-benchmark)\n- [🚀 Installation & Quick Start](#-installation-quick-start)\n- [📝 Citation](#-citation)\n\n\n## 📧 Contact\n\nYou can contact us and communicate with us by joining our WeChat group:\n\n| WeChat Group |\n|:-------------------------:|\n| \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_b57feb679b37.jpeg\" width=\"200\"> |\n\n\n\n\n## 📖 Background\n\nAs AI experiences increasingly penetrate consumer-grade devices, Mobile Agent research is at a critical juncture: transitioning from **\"feasibility verification\"** to **\"large-scale application.\"** While GUI-based solutions offer universal compatibility, the fragmentation of mobile ecosystems imposes heavy engineering burdens that hinder innovation. GELab-Zero is designed to dismantle these barriers.\n\n* ⚡️ **Out-of-the-Box Full-Stack Infrastructure** \nResolves the fragmentation of the mobile ecosystem with a unified, one-click inference pipeline. It automatically handles multi-device ADB connections, dependencies, and permissions, allowing developers to focus on strategic innovation rather than engineering infrastructure.\n\n* 🖥️ **Consumer-Grade Local Deployment** \nFeatures a built-in 4B GUI Agent model **fully optimized for Mac (M-series) and NVIDIA RTX 4060**. It supports complete local execution, ensuring data privacy and low latency on standard consumer hardware.\n\n* 📱 **Flexible Task Distribution & Orchestration** \nSupports distributing tasks across multiple devices with interaction trajectory recording. It offers three versatile modes—ReAct loops, multi-agent collaboration, and scheduled tasks—to handle complex, real-world business scenarios.\n\n* 🚀 **Accelerate from Prototype to Production** \nEmpowers developers to rapidly validate interaction strategies while allowing enterprises to directly reuse the underlying infrastructure for zero-cost MCP integration, bridging the critical gap between \"feasibility verification\" and \"large-scale application.\"\n\n## 🎥 Application Demonstrations\n\n### Recommendation - Sci-Fi Movies\n\nTask: Help me find any good recent sci-fi movies\n\n\u003C!-- gif -->\n\u003Cdiv style=\"display: flex; align-items: center; justify-content: center; width: 80%; margin: 0 auto;\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_8d165e1c9a8c.gif\" alt=\"Sci-Fi Movies Recommendation Demo\" style=\"flex: 1; height: 400px; object-fit: contain; margin-right: 1px;\"\u002F>\n\u003C\u002Fdiv>\n\n**[📹 Click to view demo video](.\u002Fimages\u002Fvideo_2.mp4)**\n\n\n### Recommendation - Travel Destination\n\nTask: Help me find a place where I can take my kids on the weekend\n\n\u003Cdiv style=\"display: flex; align-items: center; justify-content: center; width: 80%; margin: 0 auto;\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_83be65930508.gif\" alt=\"Travel Destination Recommendation Demo\" style=\"flex: 1; height: 400px; object-fit: contain; margin-right: 1px;\"\u002F>\n\u003C\u002Fdiv>\n\n**[📹 Click to view demo video](.\u002Fimages\u002Fvideo_4.mp4)**\n\n\n### Practical Task - Claim Subsidy\n\nTask: Claim meal vouchers on the enterprise welfare platform\n\n\u003Cdiv style=\"display: flex; align-items: center; justify-content: center; width: 80%; margin: 0 auto;\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_a08f9d30facd.gif\" alt=\"Claim Meal Vouchers Demo\" style=\"flex: 1; height: 400px; object-fit: contain; margin-right: 1px;\"\u002F>\n\u003C\u002Fdiv>\n\n**[📹 Click to view demo video](.\u002Fimages\u002Fvideo_3.mp4)**\n\n### Practical Task - Metro Line Query\n\nTask: Check if Metro Line 1 is operating normally, then navigate to the nearest entrance of Line 1 metro station\n\n\u003Cdiv style=\"display: flex; align-items: center; justify-content: center; width: 80%; margin: 0 auto;\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_717050ef62cf.gif\" alt=\"Metro Line Query Demo\" style=\"flex: 1; height: 400px; object-fit: contain; margin-right: 1px;\"\u002F>\n\u003C\u002Fdiv>\n\n**[📹 Click to view demo video](.\u002Fimages\u002Fvideo_5.mp4)**\n\n### Complex Task - Multi-Item Shopping\n\nTask: Go to the nearest Hema Fresh Store on Ele.me and purchase: Red strawberries 300g, Peruvian Bianca blueberries 125g (18mm diameter), seasonal fresh yellow potatoes 500g, sweet baby pumpkin 750g, Hema large grain shrimp sliders, 2 bottles of Hema pure black soy milk 300ml, Little Prince macadamia nut cocoa crisp 120g, Hema spinach noodles, Hema five-spice beef, 5 bags of Haohuan snail Liuzhou river snail rice noodles (extra spicy extra smelly) 400g, m&m's milk chocolate beans 100g\n\n\u003Cdiv style=\"display: flex; align-items: center; justify-content: center; width: 80%; margin: 0 auto;\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_a9f5c2fa4318.gif\" alt=\"Multi-Item Shopping Demo\" style=\"flex: 1; height: 400px; object-fit: contain; margin-right: 1px;\"\u002F>\n\u003C\u002Fdiv>\n\n**[📹 Click to view demo video](.\u002Fimages\u002Fvideo_1.mp4)**\n\n\n### Complex Task - Information Retrieval\n\nTask: Search for 'how to learn financial management' on Zhihu and view the first answer with over 10k likes\n\n\u003Cdiv style=\"display: flex; align-items: center; justify-content: center; width: 80%; margin: 0 auto;\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_39a512acbedf.gif\" alt=\"Information Retrieval Demo\" style=\"flex: 1; height: 400px; object-fit: contain; margin-right: 1px;\"\u002F>\n\u003C\u002Fdiv>\n\n**[📹 Click to view demo video](.\u002Fimages\u002Fvideo_6.mp4)**\n\n### Complex Task - Conditional Search\n\nTask: Find a pair of white canvas shoes in size 37 on Taobao, priced under 100 yuan, then add the first item that meets the criteria to favorites\n\n\u003Cdiv style=\"display: flex; align-items: center; justify-content: center; width: 80%; margin: 0 auto;\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_2bfce8302d7f.gif\" alt=\"Conditional Search Demo\" style=\"flex: 1; height: 400px; object-fit: contain; margin-right: 1px;\"\u002F>\n\u003C\u002Fdiv>\n\n**[📹 Click to view demo video](.\u002Fimages\u002Fvideo_7.mp4)**\n\n### Complex Task - Online Quiz\n\nTask: Go to Baicizhan and help me complete the vocabulary learning task\n\n\u003Cdiv style=\"display: flex; align-items: center; justify-content: center; width: 80%; margin: 0 auto;\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_b11652fdfa0f.gif\" alt=\"Online Quiz Demo\" style=\"flex: 1; height: 400px; object-fit: contain; margin-right: 1px;\"\u002F>\n\u003C\u002Fdiv>\n\n**[📹 Click to view demo video](.\u002Fimages\u002Fvideo_8.mp4)**\n\n\n## 🏆 Open Benchmark\n\nWe conducted comprehensive evaluations of GELab-Zero-4B-preview model across multiple open-source benchmarks, covering various dimensions including GUI understanding, localization, and interaction. The comparison results with other open-source models are shown below:\n\n![Open Benchmark Comparison Results](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_a463f35fa1d9.jpeg)\n\nThe benchmark results demonstrate that GELab-Zero-4B-preview exhibits exceptional performance across multiple open-source benchmarks, with particularly outstanding results in real mobile scenarios (Android World), proving its strong capabilities in practical applications.\n\n## 🚀 Installation& Quick Start\n\n\u003C!-- EN -->\n\nEnd-to-end inference requires just a few simple steps:\n\n1. Set up LLM inference environment (ollama or vllm)\n2. Set up Android device execution environment (adb configuration) and enable developer mode\n3. Set up Agent runtime environment (gelab-zero one-click deployment script)\n4. Set up trajectory visualization environment (optional)\n   The third-party infrastructure dependencies mentioned above are very mature, so don't be afraid.\n\nWe assume you have installed Python 3.12+ environment and have a certain command line operation foundation. If you have not installed the python environment yet, please refer to Step 0 for installation.\n\n### Step 0: Python Environment Setup\n\n\u003C!-- EN -->\n\nIf you have not installed Python 3.12+ environment yet, you can refer to the following steps for installation:\nFor commercial friendliness and cross-platform support, we recommend using miniforge for Python environment installation and management. Official website: https:\u002F\u002Fgithub.com\u002Fconda-forge\u002Fminiforge\n\n\u003C!-- EN -->\n\n- **Windows Users**: **MUST USE powershell**\n\n1. Directly download and manually install Miniforge. Refer to the **Install** section at: https:\u002F\u002Fgithub.com\u002Fconda-forge\u002Fminiforge. During installation, ensure to check the option to add Conda to the **PATH** environment variable to guarantee proper activation of Conda.\n\n2. After installation, activate Conda. Open PowerShell and enter the following commands:\n```bash\n# Activate Conda in PowerShell\nconda init powershell\n\n# Allow Conda scripts to run on PowerShell startup\nSet-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser\n```\n\nSuccessful activation is indicated by \"(base)\" displayed at the beginning of the latest line in the terminal.\n\n3. It is recommended to use VS Code for code execution and debugging. Download and install it from the official website: https:\u002F\u002Fcode.visualstudio.com\u002F\n\n\u003C!-- EN -->\n\n- **MAC and Linux Users**:\n  \n1. Download and install miniforge using the command line:\n\n```bash\ncurl -L -O \"https:\u002F\u002Fgithub.com\u002Fconda-forge\u002Fminiforge\u002Freleases\u002Flatest\u002Fdownload\u002FMiniforge3-$(uname)-$(uname -m).sh\"\nbash Miniforge3-$(uname)-$(uname -m).sh\n```\n\nAfter installation, create and activate a new Python environment:\n\n```bash\nconda create -n gelab-zero python=3.12 -y\nconda activate gelab-zero\n```\n\n### Step 1: LLM Inference Environment Setup\n\nWe have verified two mainstream LLM local inference deployment methods: ollama and vllm. Personal users are recommended to use the ollama method, while enterprise users and those with certain technical backgrounds can choose the vllm method for more stable inference services.\n\n#### Step 1.1: Ollama Setup (Recommended for Personal Users)\n\n\u003C!-- https:\u002F\u002Follama.com\u002F -->\n\n\u003C!-- EN -->\nFor individual users conducting local inference, we strongly recommend using Ollama for local deployment, as it offers the advantages of simple installation and easy usage.\n\n- **Windows and Mac users**: You can directly download and install the graphical version from the official website: https:\u002F\u002Follama.com\u002F.\n\n- **Linux users**: Refer to the official documentation for installation: https:\u002F\u002Follama.com\u002Fdownload\u002Flinux. The one-click installation command for Linux users is as follows:\n```bash\n# Download and install the latest Linux version of Ollama AppImage\ncurl -fsSL https:\u002F\u002Follama.com\u002Finstall.sh | sh\n```\n\n#### Step 1.2: GELab-Zero-4B-preview Model Setup\n\nAfter completing the installation of Ollama, you need to download and deploy the gelab-zero-4b-preview model using the following commands:\n\n```bash\n# If huggingface cli is not installed yet, execute this command first\npip install huggingface_hub\n\n# If the download speed is slow in China, you can try using the mirror acceleration \"https:\u002F\u002Fhf-mirror.com\"\n\n# WINDOWS users can use the following command:\n# $env:HF_ENDPOINT = \"https:\u002F\u002Fhf-mirror.com\"\n\n# LINUX and MAC users can use the following command:\n# export HF_ENDPOINT=\"https:\u002F\u002Fhf-mirror.com\"\n\n# Download the gelab-zero-4b-preview model weights from huggingface\nhf download --no-force-download stepfun-ai\u002FGELab-Zero-4B-preview --local-dir gelab-zero-4b-preview\n\n\n# Import the model into ollama\ncd gelab-zero-4b-preview\nollama create gelab-zero-4b-preview -f Modelfile\n# If Windows users encounter an error, they need to specify the installation path, for example:\n# C:\\Users\\admin\\AppData\\Local\\Programs\\Ollama\\ollama.exe create gelab-zero-4b-preview -f Modelfile\n\n# If your computer has low configuration, you may consider quantizing the model to improve inference speed. Note that quantization may cause a certain loss of model performance.\n# For detailed documentation, see: https:\u002F\u002Fdocs.ollama.com\u002Fimport#quantizing-a-model\n\n# Quantize the model with int8 precision (small precision loss, model size becomes 4.4G):\nollama create -q q8_0 gelab-zero-4b-preview \n\n# Quantize the model with int4 precision (large precision loss, model size becomes 2.2G):\nollama create -q Q4_K_M gelab-zero-4b-preview\n\n# Revert to the original precision:\nollama create -q f16 gelab-zero-4b-preview\n```\n\n- **Windows users**: You can open the Ollama app, select the model gelab-zero-4b-preview, and send a message to test whether the model can reply correctly.\n\n- **Mac and Linux users**: You can test whether the model is installed successfully using the following command:\n\n```bash\ncurl -X POST http:\u002F\u002Flocalhost:11434\u002Fv1\u002Fchat\u002Fcompletions \\\n -H \"Content-Type: application\u002Fjson\" \\\n -d '{\n       \"model\": \"gelab-zero-4b-preview\",\n       \"messages\": [{\"role\": \"user\", \"content\": \"Hello, GELab-Zero!\"}]\n     }'\n```\n\nThe expected output should include the model's reply content, indicating that the model has been successfully installed and is running. For example:\n\n```json\n{\"id\":\"chatcmpl-174\",\"object\":\"chat.completion\",\"created\":1764405566,\"model\":\"gelab-zero-4b-preview\",\"system_fingerprint\":\"fp_ollama\",\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"Hello! I'm here to help with any questions or information you might need. How can I assist you today?\"},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":16,\"completion_tokens\":24,\"total_tokens\":40}}\n```\n\nAfter completing the above steps, it indicates that your ollama environment and gelab-zero-4b-preview model have been successfully installed, and you can proceed to the next step of configuring the mobile execution environment.\n\n### Step 2: Android Device Execution Environment Setup\n\nTo enable GELab-Zero to control the phone for task execution, you need to complete the following steps to configure the mobile execution environment:\n\n1. Enable developer mode and USB debugging on the phone.\n2. Install the ADB tool and ensure that the computer can connect to the phone via ADB. (If you have already installed the adb tool, you can skip this step)\n3. Connect the phone to the computer via a USB cable and use the adb devices command to confirm a successful connection.\n\n#### Step 2.1: Enable Developer Mode and USB Debugging\n\n\u003C!-- EN -->\n\nGenerally, you can enable developer mode and USB debugging on Android phones by following these steps:\n\n1. Go to the \"Settings\" app on your phone.\n2. Find the \"About Phone\" or \"System\" option, and tap on the \"Build Number\" 10+ times until you see a message saying \"You are now a developer.\"\n3. Go back to the main \"Settings\" menu and find \"Developer Options.\"【Important, must enable】\n4. In \"Developer Options,\" find and enable the \"USB Debugging\" feature. Follow the on-screen instructions to enable USB debugging.【Important, must enable】\n\nDifferent phone brands may have slight variations, so please adjust according to your specific situation. Generally, searching for \"\u003Cphone brand> how to enable developer mode\" will yield relevant tutorials.\nAfter completing the setup, it should look like the image below:\n\n\u003Cdiv style=\"display: flex; align-items: center; justify-content: center; width: 80%; margin: 0 auto;\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_ecf67ace5441.png\" alt=\"Developer mode screenshot of XiaoMi\" style=\"flex: 1; height: 230px; object-fit: contain; margin-right: 1px;\"\u002F>\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_fb8cd1bdac9c.png\" alt=\"Developer mode screenshot of Honor\" style=\"flex: 1; height: 230px; object-fit: contain; margin-left: 1px;\"\u002F>\n\u003C\u002Fdiv>\n\n#### Step 2.2: Install ADB Tool\n\n\u003C!-- EN -->\n\nADB (Android Debug Bridge) is a bridge tool for communication between Android devices and computers. You can install the ADB tool by following these steps:\n\n- **Windows Users**:\n  1. Download the ADB tool package: https:\u002F\u002Fdl.google.com\u002Fandroid\u002Frepository\u002Fplatform-tools-latest-windows.zip and extract it to a suitable location.\n  2. Add the extracted folder path to the system environment variables so that you can use the adb command directly in the command line. For detailed steps, see: https:\u002F\u002Flearn.microsoft.com\u002Fen-us\u002Fprevious-versions\u002Foffice\u002Fdeveloper\u002Fsharepoint-2010\u002Fee537574(v=office.14) .The specific steps include:\n\n```\n1. Right-click \"Computer\" in the \"Start\" menu and select \"Properties.\"\n2. Click \"Advanced system settings.\"\n3. In the \"System Properties\" dialog box, click the \"Environment Variables\" button.\n4. In the \"System variables\" section, find and select the \"Path\" variable, then click the \"Edit\" button.\n5. In the \"Edit Environment Variables\" dialog box, click \"New,\" and then enter the extracted path of the ADB tool package.\n6. Click \"OK\" to save the changes and close all dialog boxes.\n```\n\n- **MAC and Linux Users**:\n\n1. You can install the ADB tool using Homebrew (Mac) or package managers (Linux). If you don't have Homebrew installed, you should install it first with the command:\n\n```bash\nruby -e $(curl -fsSL https:\u002F\u002Fraw.githubusercontent.com\u002FHomebrew\u002Finstall\u002Fmaster\u002Finstall)\n```\n\n2. Then use the following command to install the ADB tool:\n\n```bash\nbrew cask install android-platform-tools\n```\n\n#### Step 2.3: Connect Android Device to Computer\n\n\u003C!-- EN -->\n\nAfter connecting your phone to the computer using a USB cable, open a terminal or command prompt and\n\n```bash\nadb devices\n```\n\n\u003C!-- EN -->\n\nIf the connection is successful, you will see an output similar to the following, showing the list of connected devices:\n\n```bash\nList of devices attached\nAN2CVB4C28000731        device\n```\n\nIf you do not see any devices, please check if the USB cable and the USB debugging settings on your phone are correctly enabled. When connecting the phone for the first time, an authorization prompt may pop up on the phone; simply select \"Allow.\" As shown in the image below:\n\n\n\u003Cdiv style=\"display: flex; align-items: center; justify-content: center; width: 80%; margin: 0 auto;\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_79e587db45a9.png\" alt=\"Authorization Prompt on Xiaomi\" style=\"flex: 1; height: 230px; object-fit: contain; margin-right: 1px;\"\u002F>\n\u003C\u002Fdiv>\n\nIf the installation is unsuccessful, you can refer to third-party documentation: https:\u002F\u002Fgithub.com\u002Fquickappcn\u002Fissues\u002Fissues\u002F120 for further troubleshooting.\n\n### Step 3: GELab-Zero Agent Runtime Environment Setup\n\nAfter completing the above steps, you can deploy the GELab-Zero runtime environment with the following command:\n\n```bash\n# Clone the repository\ngit clone https:\u002F\u002Fgithub.com\u002Fstepfun-ai\u002Fgelab-zero\ncd gelab-zero\n\n# Install dependencies\npip install -r requirements.txt\n\n# To inference a single task\npython examples\u002Frun_single_task.py\n```\n\n### (Optional) Step 4: Trajectory Visualization Environment Setup\n\nThe trajectory will be defult saved in the `running_log\u002Fserver_log\u002Fos-copilot-local-eval-logs\u002F` directory. You can visualize the trajectory using streamlit:\n\n```bash\n# If you want other devices in the local area network (LAN) to access it, use --server.address 0.0.0.0\nstreamlit run --server.address 0.0.0.0 visualization\u002Fmain_page.py --server.port 33503\n\n# If you only want to access it on the local machine, use the following command:\nstreamlit run --server.address 127.0.0.1 visualization\u002Fmain_page.py --server.port 33503\n```\n\nThen open your browser and go to `http:\u002F\u002Flocalhost:33503` to access the visualization interface.\n\nEach task execution will generate a unique session ID, which can be used to query and visualize the corresponding trajectory in the visualization interface.\n\nThe action with point(s) such as click and slide will be marked on the screenshot for better understanding of the agent's behavior.\n\n---\n\n### (Optional) Deploy with llama.cpp\n\n> Make sure you have already downloaded the GELab-Zero-4B-preview model locally.\n\n#### Step 1: Convert the model to GGUF format with llama.cpp\n\nClone the official llama.cpp repository:\n\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Fggerganov\u002Fllama.cpp.git \ncd llama.cpp\npip install -r requirements.txt\n# If there are dependency conflicts, create a Conda virtual environment.\n```\n\nConvert the model to GGUF format. Command-line arguments:\n\n1. The first path points to your locally downloaded GELab-Zero-4B-preview from Hugging Face.\n2. `--outtype` specifies the quantization precision.\n3. `--outfile` is the output filename; you can customize the path.\n\n```bash\n# No quantization, keep full model quality\npython convert_hf_to_gguf.py \u002FPATH\u002FTO\u002Fgelab-zero-4b-preview --outtype f16 --verbose --outfile gelab-zero-4b-preview_f16.gguf\n\n# Quantized (faster but lossy; known issue: \u003CTHINK> may become \u003CTHIN>)\npython convert_hf_to_gguf.py \u002FPATH\u002FTO\u002Fgelab-zero-4b-preview --outtype q8_0 --verbose --outfile gelab-zero-4b-preview_q8_0.gguf\n```\n\nThe INT8-quantized GGUF file is ~4.28 GB for reference.\n\nGELab-Zero-4B-preview is a vision model, so you also need to export an `mmproj` file:\n\n```bash\n# INT8 quantization for mmproj\npython convert_hf_to_gguf.py \u002FPATH\u002FTO\u002Fgelab-zero-4b-preview --outtype q8_0 --verbose --outfile gelab-zero-4b-preview_q8_0_mmproj.gguf --mmproj\n```\n\nThe INT8-quantized mmproj GGUF file is ~454 MB for reference.\n\n#### Step 2: Serve locally with Jan\n\nYou can use any llama.cpp-compatible client to spin up a local API service; here we use [Jan](https:\u002F\u002Fgithub.com\u002Fjanhq\u002Fjan) as an example:\n\nDownload the [Jan](https:\u002F\u002Fgithub.com\u002Fjanhq\u002Fjan\u002Freleases) client and install it.\n\nGo to Settings → Model Provider → choose llama.cpp, then import the models:\n\n\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_32db15dfcd66.png\" width=\"50%\" alt=\"test model\">\n\nSelect the two GGUF files you just converted:\n\n\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_5632fd3c62cc.png\" width=\"50%\" alt=\"test model\">\n\nBack in the model UI, click `Start`.\n\nCreate a chat to verify the model runs correctly:\n\n\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_07d75cff5518.png\" width=\"50%\" alt=\"test model\">\n\nOnce tokens are streaming normally, start the local API server.\n\nGo to Settings → Local API Server, create an API key under server configuration, then launch the service:\n\n\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_48cd394523a2.png\" width=\"50%\" alt=\"test model\">\n\n#### Step 3: Adjust GELab-Zero Agent model config\n\nllama.cpp’s service differs slightly from Ollama, so you must tweak the model config in GELab-Zero Agent. Two places:\n\n1. In `model_config.yaml`, update the port and API key (use the key you just created):\n\n```yaml\nlocal:\n    api_base: \"http:\u002F\u002Flocalhost:1337\u002Fv1\"\n    api_key: \"YOUR_KEY\"\n```\n\n2. In `examples\u002Frun_single_task.py`, remove any parameter suffix from the model name (line 21):\n\n```python\nlocal_model_config = {\n    \"task_type\": \"parser_0922_summary\",\n    \"model_config\": {\n        \"model_name\": \"gelab-zero\",\n        \"model_provider\": \"local\",\n        \"args\": {\n            \"temperature\": 0.1,\n            \"top_p\": 0.95,\n            \"frequency_penalty\": 0.0,\n            \"max_tokens\": 4096,\n        },\n```\n\n---\n\n### (Optional) MCP-Server Setup\n\n\u003C!-- ### Step1 启动 mcp server 以支持多设备管理和任务分发 -->\n#### Step 1: Start MCP server to support multi-device management and task distribution\n\n```bash\n# enable mcp server\npython mcp_server\u002Fdetailed_gelab_mcp_server.py\n```\n\n#### Step 2: Import MCP tools in Chatbox\n\u003C!-- https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_f006a174dd9d.png -->\n\u003Cdiv style=\"display: flex; align-items: center; justify-content: center; width: 80%; margin: 0 auto;\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_f006a174dd9d.png\" alt=\"MCP-Demo\" style=\"flex: 1; height: 400px; object-fit: contain; margin-right: 1px;\"\u002F>\n\u003C\u002Fdiv>\n\n\n\n## 📝 Citation\n\nIf you find GELab-Zero useful for your research, please consider citing our work :)\n\n```bibtex\n@misc{yan2025stepguitechnicalreport,\n      title={Step-GUI Technical Report}, \n      author={Haolong Yan and Jia Wang and Xin Huang and Yeqing Shen and Ziyang Meng and Zhimin Fan and Kaijun Tan and Jin Gao and Lieyu Shi and Mi Yang and Shiliang Yang and Zhirui Wang and Brian Li and Kang An and Chenyang Li and Lei Lei and Mengmeng Duan and Danxun Liang and Guodong Liu and Hang Cheng and Hao Wu and Jie Dong and Junhao Huang and Mei Chen and Renjie Yu and Shunshan Li and Xu Zhou and Yiting Dai and Yineng Deng and Yingdan Liang and Zelin Chen and Wen Sun and Chengxu Yan and Chunqin Xu and Dong Li and Fengqiong Xiao and Guanghao Fan and Guopeng Li and Guozhen Peng and Hongbing Li and Hang Li and Hongming Chen and Jingjing Xie and Jianyong Li and Jingyang Zhang and Jiaju Ren and Jiayu Yuan and Jianpeng Yin and Kai Cao and Liang Zhao and Liguo Tan and Liying Shi and Mengqiang Ren and Min Xu and Manjiao Liu and Mao Luo and Mingxin Wan and Na Wang and Nan Wu and Ning Wang and Peiyao Ma and Qingzhou Zhang and Qiao Wang and Qinlin Zeng and Qiong Gao and Qiongyao Li and Shangwu Zhong and Shuli Gao and Shaofan Liu and Shisi Gao and Shuang Luo and Xingbin Liu and Xiaojia Liu and Xiaojie Hou and Xin Liu and Xuanti Feng and Xuedan Cai and Xuan Wen and Xianwei Zhu and Xin Liang and Xin Liu and Xin Zhou and Yingxiu Zhao and Yukang Shi and Yunfang Xu and Yuqing Zeng and Yixun Zhang and Zejia Weng and Zhonghao Yan and Zhiguo Huang and Zhuoyu Wang and Zheng Ge and Jing Li and Yibo Zhu and Binxing Jiao and Xiangyu Zhang and Daxin Jiang},\n      year={2025},\n      eprint={2512.15431},\n      archivePrefix={arXiv},\n      primaryClass={cs.CV},\n      url={https:\u002F\u002Farxiv.org\u002Fabs\u002F2512.15431}, \n}\n\n@software{gelab_zero_2025,\n  title={GELab-Zero: An Advanced Mobile Agent Inference System},\n  author={GELab Team},\n  year={2025},\n  url={https:\u002F\u002Fgithub.com\u002Fstepfun-ai\u002Fgelab-zero}\n}\n\n@misc{gelab_engine,\n      title={GUI Exploration Lab: Enhancing Screen Navigation in Agents via Multi-Turn Reinforcement Learning}, \n      author={Haolong Yan and Yeqing Shen and Xin Huang and Jia Wang and Kaijun Tan and Zhixuan Liang and Hongxin Li and Zheng Ge and Osamu Yoshie and Si Li and Xiangyu Zhang and Daxin Jiang},\n      year={2025},\n      eprint={2512.02423},\n      archivePrefix={arXiv},\n      primaryClass={cs.CV},\n      url={https:\u002F\u002Farxiv.org\u002Fabs\u002F2512.02423}, \n}\n\n```\n\n## ⭐ Star History\n\n\u003Cdiv align=\"center\">\n  \u003Ca href=\"https:\u002F\u002Fstar-history.com\u002F#stepfun-ai\u002Fgelab-zero&Date\">\n    \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_7b5331398081.png\" alt=\"Star History Chart\" width=\"600\">\n  \u003C\u002Fa>\n\u003C\u002Fdiv>\n","![GELab-Zero 主图](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_5b0e9abd3a92.png)\n\n> 👋 大家好！我们自豪地推出首个完全开源的 GUI 代理，它同时包含了模型和基础设施。我们的解决方案采用即插即用的设计，无需任何云端依赖，让您完全掌控数据隐私。\n\n\u003Cp align=\"center\">\n  \u003C!-- \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fstepfun-ai\u002Fgelab-zero\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F💻%20GitHub-Repository-black\" alt=\"GitHub\" \u002F>\u003C\u002Fa> -->\n  \u003Ca href=\"https:\u002F\u002Farxiv.org\u002Fabs\u002F2512.15431\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FarXiv-Step--GUI Technical Report-B31B1B.svg?logo=arxiv&logoColor=white\" alt=\"arXiv\" \u002F>\u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fopengelab.github.io\u002F\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F🌐%20Website-Project%20Page-blue\" alt=\"网站\" \u002F>\u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fhuggingface.co\u002Fstepfun-ai\u002FGELab-Zero-4B-preview\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F🤗%20Hugging%20Face-GELab--Zero--4B--preview-orange\" alt=\"Hugging Face 模型\" \u002F>\u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fstepfun-ai\u002FAndroidDaily\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F📚%20Hugging%20Face-AndroidDaily-yellow\" alt=\"Hugging Face 数据集\" \u002F>\u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fmodelscope.cn\u002Fmodels\u002Fstepfun-ai\u002FGELab-Zero-4B-preview\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F🤖%20Model%20Scope-GELab--Zero--4B--preview-blue\" alt=\"Model Scope\" \u002F>\u003C\u002Fa>\n\u003C\u002Fp>\n\n\u003Cp align=\"center\">\n  \u003Ca href=\".\u002FREADME.md\">English\u003C\u002Fa> |\n  \u003Ca href=\".\u002FREADME_CN.md\">简体中文\u003C\u002Fa>\n\u003C\u002Fp>\n\n## 📰 新闻\n\n* 🎁 **[2025-12-18]** 我们在 [**arXiv**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2512.15431) 上发布了 **Step-GUI 技术报告**！\n* 🎁 **[2025-12-18]** 我们发布了一个功能更强大的 **API**，用于 GUI 自动化任务。[在此申请 API 访问权限](https:\u002F\u002Fwvixbzgc0u7.feishu.cn\u002Fshare\u002Fbase\u002Fform\u002FshrcnNStxEmuE7aY6jTW07CZHMf)！\n* 🎁 **[2025-12-12]** 我们推出了支持多设备管理和任务分配的 **MCP-Server**。请参阅 [安装与快速入门](#-installation-quick-start) 和 [MCP-Server 设置](#optional-mcp-server-setup) 获取设置说明。\n* 🎁 **[2025-12-1]** 我们感谢以下项目和作者提供的量化工具及教程：[GGUF_v1](https:\u002F\u002Fhuggingface.co\u002Fbartowski\u002Fstepfun-ai_GELab-Zero-4B-preview-GGUF)、[GGUF_v2](https:\u002F\u002Fhuggingface.co\u002Fnoctrex\u002FGELab-Zero-4B-preview-GGUF)、[EXL3](https:\u002F\u002Fhuggingface.co\u002FArtusDev\u002Fstepfun-ai_GELab-Zero-4B-preview-EXL3)、[教程_CN](http:\u002F\u002Fxhslink.com\u002Fo\u002F1WrmgHGWFYh)、[教程_EN](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=4BMiDyQOpos)\n* 🎁 **[2025-11-31]** 我们在 [**Hugging Face**](https:\u002F\u002Fhuggingface.co\u002Fstepfun-ai\u002FGELab-Zero-4B-preview) 和 [**Model Scope**](https:\u002F\u002Fmodelscope.cn\u002Fmodels\u002Fstepfun-ai\u002FGELab-Zero-4B-preview) 上发布了轻量级的 **4B** 模型 GELab-Zero-4B-preview。\n* 🎁 **[2025-11-31]** 我们发布了来自 [**AndroidDaily**](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fstepfun-ai\u002FAndroidDaily) 基准测试的任务。\n* 🎁 **[2025-11-30]** 我们发布了当前的 **GELab-Zero** 工程基础设施。\n* 🎁 **[2025-10]** 我们的关于 GELab-Engine 的 [**研究**](https:\u002F\u002Fgithub.com\u002Fsummoneryhl\u002Fgelab-engine) 论文已被 **NeurIPS 2025** 接受。\n\n\n\n## 📑 目录\n\n- [📖 背景](#-background)\n- [🎥 应用演示](#-application-demonstrations)\n- [🏆 开放式基准测试](#-open-benchmark)\n- [🚀 安装与快速入门](#-installation-quick-start)\n- [📝 引用](#-citation)\n\n\n## 📧 联系方式\n\n您可以通过加入我们的微信群与我们联系和交流：\n\n| 微信群 |\n|:-------------------------:|\n| \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_b57feb679b37.jpeg\" width=\"200\"> |\n\n\n\n\n## 📖 背景\n\n随着人工智能在消费级设备中的应用日益普及，移动 Agent 研究正处于一个关键转折点：从 **“可行性验证”** 向 **“大规模应用”** 过渡。尽管基于 GUI 的解决方案具有普遍兼容性，但移动生态系统的碎片化却带来了沉重的工程负担，阻碍了创新。GELab-Zero 的设计正是为了打破这些壁垒。\n\n* ⚡️ **开箱即用的全栈基础设施**  \n通过统一的一键推理流程，解决了移动生态系统的碎片化问题。它能够自动处理多设备的 ADB 连接、依赖项和权限，使开发者可以专注于战略创新，而非工程基础设施。\n\n* 🖥️ **消费级本地部署**  \n内置一个针对 Mac (M系列) 和 NVIDIA RTX 4060 完全优化的 4B GUI 代理模型。支持完全本地运行，确保在标准消费级硬件上实现数据隐私和低延迟。\n\n* 📱 **灵活的任务分配与编排**  \n支持跨多个设备分配任务，并记录交互轨迹。提供 ReAct 循环、多智能体协作和定时任务三种灵活模式，以应对复杂的现实业务场景。\n\n* 🚀 **加速从原型到生产**  \n赋能开发者快速验证交互策略，同时允许企业直接复用底层基础设施进行零成本的 MCP 集成，从而弥合“可行性验证”与“大规模应用”之间的关键鸿沟。\n\n## 🎥 应用演示\n\n### 推荐 - 科幻电影\n\n任务：帮我找一些最近上映的好看的科幻电影\n\n\u003C!-- gif -->\n\u003Cdiv style=\"display: flex; align-items: center; justify-content: center; width: 80%; margin: 0 auto;\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_8d165e1c9a8c.gif\" alt=\"科幻电影推荐演示\" style=\"flex: 1; height: 400px; object-fit: contain; margin-right: 1px;\"\u002F>\n\u003C\u002Fdiv>\n\n**[📹 点击查看演示视频](.\u002Fimages\u002Fvideo_2.mp4)**\n\n\n### 推荐 - 旅行目的地\n\n任务：帮我找个周末可以带孩子去的地方\n\n\u003Cdiv style=\"display: flex; align-items: center; justify-content: center; width: 80%; margin: 0 auto;\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_83be65930508.gif\" alt=\"旅行目的地推荐演示\" style=\"flex: 1; height: 400px; object-fit: contain; margin-right: 1px;\"\u002F>\n\u003C\u002Fdiv>\n\n**[📹 点击查看演示视频](.\u002Fimages\u002Fvideo_4.mp4)**\n\n\n### 实用任务 - 领取补贴\n\n任务：在企业福利平台上领取餐券\n\n\u003Cdiv style=\"display: flex; align-items: center; justify-content: center; width: 80%; margin: 0 auto;\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_a08f9d30facd.gif\" alt=\"领取餐券演示\" style=\"flex: 1; height: 400px; object-fit: contain; margin-right: 1px;\"\u002F>\n\u003C\u002Fdiv>\n\n**[📹 点击查看演示视频](.\u002Fimages\u002Fvideo_3.mp4)**\n\n### 实用任务 - 地铁线路查询\n\n任务：查询地铁1号线是否正常运营，然后导航到最近的1号线地铁站入口\n\n\u003Cdiv style=\"display: flex; align-items: center; justify-content: center; width: 80%; margin: 0 auto;\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_717050ef62cf.gif\" alt=\"地铁线路查询演示\" style=\"flex: 1; height: 400px; object-fit: contain; margin-right: 1px;\"\u002F>\n\u003C\u002Fdiv>\n\n**[📹 点击查看演示视频](.\u002Fimages\u002Fvideo_5.mp4)**\n\n### 复杂任务 - 多商品购物\n\n任务：前往饿了么上最近的盒马鲜生门店，购买以下商品：红色草莓300克、秘鲁比安卡蓝莓125克（直径18毫米）、当季新鲜黄土豆500克、甜味小南瓜750克、盒马大粒虾滑、盒马纯黑豆浆2瓶（每瓶300毫升）、小王子夏威夷果可可脆饼120克、盒马菠菜面、盒马五香牛肉、好欢螺螺蛳粉5袋（重辣特臭口味，每袋400克）、m&m's牛奶巧克力豆100克。\n\n\u003Cdiv style=\"display: flex; align-items: center; justify-content: center; width: 80%; margin: 0 auto;\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_a9f5c2fa4318.gif\" alt=\"多商品购物演示\" style=\"flex: 1; height: 400px; object-fit: contain; margin-right: 1px;\"\u002F>\n\u003C\u002Fdiv>\n\n**[📹 点击查看演示视频](.\u002Fimages\u002Fvideo_1.mp4)**\n\n\n### 复杂任务 - 信息检索\n\n任务：在知乎上搜索“如何学习财务管理”，并查看第一个点赞数超过1万的回答。\n\n\u003Cdiv style=\"display: flex; align-items: center; justify-content: center; width: 80%; margin: 0 auto;\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_39a512acbedf.gif\" alt=\"信息检索演示\" style=\"flex: 1; height: 400px; object-fit: contain; margin-right: 1px;\"\u002F>\n\u003C\u002Fdiv>\n\n**[📹 点击查看演示视频](.\u002Fimages\u002Fvideo_6.mp4)**\n\n### 复杂任务 - 条件搜索\n\n任务：在淘宝上找到一双白色帆布鞋，尺码为37码，价格低于100元，然后将符合条件的第一件商品加入收藏夹。\n\n\u003Cdiv style=\"display: flex; align-items: center; justify-content: center; width: 80%; margin: 0 auto;\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_2bfce8302d7f.gif\" alt=\"条件搜索演示\" style=\"flex: 1; height: 400px; object-fit: contain; margin-right: 1px;\"\u002F>\n\u003C\u002Fdiv>\n\n**[📹 点击查看演示视频](.\u002Fimages\u002Fvideo_7.mp4)**\n\n### 复杂任务 - 在线答题\n\n任务：前往百词斩，帮我完成词汇学习任务。\n\n\u003Cdiv style=\"display: flex; align-items: center; justify-content: center; width: 80%; margin: 0 auto;\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_b11652fdfa0f.gif\" alt=\"在线答题演示\" style=\"flex: 1; height: 400px; object-fit: contain; margin-right: 1px;\"\u002F>\n\u003C\u002Fdiv>\n\n**[📹 点击查看演示视频](.\u002Fimages\u002Fvideo_8.mp4)**\n\n\n## 🏆 开放基准测试\n\n我们在多个开源基准测试中对GELab-Zero-4B-preview模型进行了全面评估，涵盖了GUI理解、本地化和交互等多个维度。与其他开源模型的对比结果如下：\n\n![开放基准测试对比结果](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_a463f35fa1d9.jpeg)\n\n基准测试结果显示，GELab-Zero-4B-preview在多个开源基准测试中表现出色，尤其在真实移动场景（Android世界）中表现尤为突出，证明了其在实际应用中的强大能力。\n\n## 🚀 安装与快速入门\n\n\u003C!-- EN -->\n\n端到端推理只需几个简单步骤：\n\n1. 搭建LLM推理环境（ollama或vllm）\n2. 搭建安卓设备执行环境（adb配置）并开启开发者模式\n3. 搭建Agent运行环境（gelab-zero一键部署脚本）\n4. 搭建轨迹可视化环境（可选）\n   上述第三方基础设施依赖非常成熟，无需担心。\n\n我们假设您已安装Python 3.12及以上版本，并具备一定的命令行操作基础。如果您尚未安装Python环境，请参考步骤0进行安装。\n\n### 步骤0：Python环境搭建\n\n\u003C!-- EN -->\n\n如果您尚未安装Python 3.12及以上版本，可以参考以下步骤进行安装：\n为了商业友好性和跨平台支持，我们推荐使用miniforge来安装和管理Python环境。官方网站：https:\u002F\u002Fgithub.com\u002Fconda-forge\u002Fminiforge\n\n\u003C!-- EN -->\n\n- **Windows用户**：**必须使用powershell**\n\n1. 直接下载并手动安装Miniforge。请参考：https:\u002F\u002Fgithub.com\u002Fconda-forge\u002Fminiforge中的“Install”部分。安装时，请务必勾选将Conda添加到**PATH**环境变量的选项，以确保Conda能够正确激活。\n\n2. 安装完成后，激活Conda。打开PowerShell并输入以下命令：\n```bash\n# 在PowerShell中激活Conda\nconda init powershell\n\n# 允许Conda脚本在PowerShell启动时运行\nSet-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser\n```\n\n成功激活的标志是终端最新一行开头显示“(base)”。\n\n3. 建议使用VS Code进行代码执行和调试。请从官方网站下载并安装：https:\u002F\u002Fcode.visualstudio.com\u002F\n\n\u003C!-- EN -->\n\n- **MAC和Linux用户**：\n  \n1. 使用命令行下载并安装miniforge：\n\n```bash\ncurl -L -O \"https:\u002F\u002Fgithub.com\u002Fconda-forge\u002Fminiforge\u002Freleases\u002Flatest\u002Fdownload\u002FMiniforge3-$(uname)-$(uname -m).sh\"\nbash Miniforge3-$(uname)-$(uname -m).sh\n```\n\n安装完成后，创建并激活一个新的Python环境：\n\n```bash\nconda create -n gelab-zero python=3.12 -y\nconda activate gelab-zero\n```\n\n### 步骤1：LLM推理环境搭建\n\n我们验证了两种主流的LLM本地推理部署方法：ollama和vllm。建议个人用户使用ollama方法，而企业用户及有一定技术背景的用户可以选择vllm方法，以获得更稳定的推理服务。\n\n#### 步骤1.1：Ollama搭建（推荐个人用户）\n\n\u003C!-- https:\u002F\u002Follama.com\u002F -->\n\n\u003C!-- EN -->\n对于进行本地推理的个人用户，我们强烈建议使用Ollama进行本地部署，因为它具有安装简单、使用方便的优点。\n\n- **Windows和Mac用户**：可以直接从官方网站下载并安装图形界面版本：https:\u002F\u002Follama.com\u002F。\n\n- **Linux用户**：请参考官方文档进行安装：https:\u002F\u002Follama.com\u002Fdownload\u002Flinux。Linux用户的一键安装命令如下：\n```bash\n# 下载并安装最新版Linux Ollama AppImage\ncurl -fsSL https:\u002F\u002Follama.com\u002Finstall.sh | sh\n```\n\n#### 步骤1.2：GELab-Zero-4B-preview模型搭建\n\n完成Ollama的安装后，您需要使用以下命令下载并部署gelab-zero-4b-preview模型：\n\n```bash\n# 如果尚未安装huggingface cli，先执行此命令\npip install huggingface_hub\n\n# 如果在中国下载速度较慢，可以尝试使用镜像加速“https:\u002F\u002Fhf-mirror.com”\n\n# WINDOWS用户可以使用以下命令：\n# $env:HF_ENDPOINT = \"https:\u002F\u002Fhf-mirror.com\"\n\n# LINUX和MAC用户可以使用以下命令：\n# export HF_ENDPOINT=\"https:\u002F\u002Fhf-mirror.com\"\n\n# 从huggingface下载gelab-zero-4b-preview模型权重\nhf download --no-force-download stepfun-ai\u002FGELab-Zero-4B-preview --local-dir gelab-zero-4b-preview\n\n# 将模型导入 ollama\ncd gelab-zero-4b-preview\nollama create gelab-zero-4b-preview -f Modelfile\n# 如果 Windows 用户遇到错误，需要指定安装路径，例如：\n# C:\\Users\\admin\\AppData\\Local\\Programs\\Ollama\\ollama.exe create gelab-zero-4b-preview -f Modelfile\n\n# 如果您的电脑配置较低，可以考虑对模型进行量化以提高推理速度。请注意，量化可能会导致模型性能的一定程度下降。\n# 详细文档请参阅：https:\u002F\u002Fdocs.ollama.com\u002Fimport#quantizing-a-model\n\n# 使用 int8 精度对模型进行量化（精度损失较小，模型大小变为 4.4G）：\nollama create -q q8_0 gelab-zero-4b-preview \n\n# 使用 int4 精度对模型进行量化（精度损失较大，模型大小变为 2.2G）：\nollama create -q Q4_K_M gelab-zero-4b-preview\n\n# 恢复到原始精度：\nollama create -q f16 gelab-zero-4b-preview\n```\n\n- **Windows 用户**：您可以打开 Ollama 应用程序，选择 gelab-zero-4b-preview 模型，并发送一条消息来测试该模型是否能够正确回复。\n\n- **Mac 和 Linux 用户**：您可以通过以下命令测试模型是否已成功安装：\n\n```bash\ncurl -X POST http:\u002F\u002Flocalhost:11434\u002Fv1\u002Fchat\u002Fcompletions \\\n -H \"Content-Type: application\u002Fjson\" \\\n -d '{\n       \"model\": \"gelab-zero-4b-preview\",\n       \"messages\": [{\"role\": \"user\", \"content\": \"Hello, GELab-Zero!\"}]\n     }'\n```\n\n预期输出应包含模型的回复内容，表明模型已成功安装并运行。例如：\n\n```json\n{\"id\":\"chatcmpl-174\",\"object\":\"chat.completion\",\"created\":1764405566,\"model\":\"gelab-zero-4b-preview\",\"system_fingerprint\":\"fp_ollama\",\"choices\":[{\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"Hello! I'm here to help with any questions or information you might need. How can I assist you today?\"},\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":16,\"completion_tokens\":24,\"total_tokens\":40}}\n```\n\n完成上述步骤后，说明您的 ollama 环境和 gelab-zero-4b-preview 模型已成功安装，您可以继续下一步的移动执行环境配置。\n\n### 第二步：Android 设备执行环境设置\n\n为了让 GELab-Zero 能够控制手机执行任务，您需要完成以下步骤来配置移动执行环境：\n\n1. 在手机上启用开发者模式和 USB 调试。\n2. 安装 ADB 工具，并确保计算机可以通过 ADB 连接到手机。（如果您已经安装了 adb 工具，可以跳过此步骤）\n3. 使用 USB 数据线将手机连接到计算机，并通过 `adb devices` 命令确认连接是否成功。\n\n#### 步骤 2.1：启用开发者模式和 USB 调试\n\n\u003C!-- EN -->\n\n通常，您可以通过以下步骤在 Android 手机上启用开发者模式和 USB 调试：\n\n1. 打开手机上的“设置”应用。\n2. 找到“关于手机”或“系统”选项，连续点击“版本号”10 次以上，直到出现“您现在是开发者”的提示。\n3. 返回主“设置”菜单，找到“开发者选项”。【重要，必须启用】\n4. 在“开发者选项”中，找到并启用“USB 调试”功能。按照屏幕上的指示启用 USB 调试。【重要，必须启用】\n\n不同品牌的手机可能会有一些细微差异，请根据您的具体情况调整。一般来说，搜索“\u003C手机品牌> 如何启用开发者模式”即可找到相关教程。\n完成设置后，界面应如下图所示：\n\n\u003Cdiv style=\"display: flex; align-items: center; justify-content: center; width: 80%; margin: 0 auto;\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_ecf67ace5441.png\" alt=\"Developer mode screenshot of XiaoMi\" style=\"flex: 1; height: 230px; object-fit: contain; margin-right: 1px;\"\u002F>\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_fb8cd1bdac9c.png\" alt=\"Developer mode screenshot of Honor\" style=\"flex: 1; height: 230px; object-fit: contain; margin-left: 1px;\"\u002F>\n\u003C\u002Fdiv>\n\n#### 步骤 2.2：安装 ADB 工具\n\n\u003C!-- EN -->\n\nADB（Android Debug Bridge）是用于 Android 设备与计算机之间通信的桥梁工具。您可以通过以下步骤安装 ADB 工具：\n\n- **Windows 用户**：\n  1. 下载 ADB 工具包：https:\u002F\u002Fdl.google.com\u002Fandroid\u002Frepository\u002Fplatform-tools-latest-windows.zip，并将其解压到合适的位置。\n  2. 将解压后的文件夹路径添加到系统环境变量中，以便在命令行中直接使用 adb 命令。详细步骤请参阅：https:\u002F\u002Flearn.microsoft.com\u002Fen-us\u002Fprevious-versions\u002Foffice\u002Fdeveloper\u002Fsharepoint-2010\u002Fee537574(v=office.14) 。具体步骤包括：\n\n```\n1. 在“开始”菜单中右键单击“计算机”，选择“属性”。\n2. 单击“高级系统设置”。\n3. 在“系统属性”对话框中，单击“环境变量”按钮。\n4. 在“系统变量”部分，找到并选择“Path”变量，然后单击“编辑”按钮。\n5. 在“编辑环境变量”对话框中，单击“新建”，然后输入 ADB 工具包的解压路径。\n6. 单击“确定”保存更改，并关闭所有对话框。\n```\n\n- **MAC 和 Linux 用户**：\n\n1. 您可以通过 Homebrew（Mac）或包管理器（Linux）安装 ADB 工具。如果尚未安装 Homebrew，需先使用以下命令进行安装：\n\n```bash\nruby -e $(curl -fsSL https:\u002F\u002Fraw.githubusercontent.com\u002FHomebrew\u002Finstall\u002Fmaster\u002Finstall)\n```\n\n2. 然后使用以下命令安装 ADB 工具：\n\n```bash\nbrew cask install android-platform-tools\n```\n\n#### 步骤 2.3：将 Android 设备连接到计算机\n\n\u003C!-- EN -->\n\n使用 USB 数据线将手机连接到计算机后，打开终端或命令提示符，并输入以下命令：\n\n```bash\nadb devices\n```\n\n\u003C!-- EN -->\n\n如果连接成功，您将看到类似以下的输出，显示已连接的设备列表：\n\n```bash\nList of devices attached\nAN2CVB4C28000731        device\n```\n\n如果没有看到任何设备，请检查 USB 数据线以及手机上的 USB 调试设置是否已正确启用。首次连接时，手机可能会弹出授权提示，只需选择“允许”即可。如图所示：\n\n\n\u003Cdiv style=\"display: flex; align-items: center; justify-content: center; width: 80%; margin: 0 auto;\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_79e587db45a9.png\" alt=\"Authorization Prompt on Xiaomi\" style=\"flex: 1; height: 230px; object-fit: contain; margin-right: 1px;\"\u002F>\n\u003C\u002Fdiv>\n\n如果安装失败，您可以参考第三方文档：https:\u002F\u002Fgithub.com\u002Fquickappcn\u002Fissues\u002Fissues\u002F120 进行进一步排查。\n\n### 第三步：GELab-Zero 代理运行环境设置\n\n完成上述步骤后，您可以通过以下命令部署 GELab-Zero 的运行环境：\n\n```bash\n\n# 克隆仓库\ngit clone https:\u002F\u002Fgithub.com\u002Fstepfun-ai\u002Fgelab-zero\ncd gelab-zero\n\n# 安装依赖\npip install -r requirements.txt\n\n# 进行单任务推理\npython examples\u002Frun_single_task.py\n```\n\n### （可选）步骤4：轨迹可视化环境搭建\n\n轨迹默认会保存在 `running_log\u002Fserver_log\u002Fos-copilot-local-eval-logs\u002F` 目录下。你可以使用 streamlit 来可视化轨迹：\n\n```bash\n# 如果希望局域网内的其他设备也能访问，使用 --server.address 0.0.0.0\nstreamlit run --server.address 0.0.0.0 visualization\u002Fmain_page.py --server.port 33503\n\n# 如果只希望在本机访问，使用以下命令：\nstreamlit run --server.address 127.0.0.1 visualization\u002Fmain_page.py --server.port 33503\n```\n\n然后打开浏览器，访问 `http:\u002F\u002Flocalhost:33503` 即可进入可视化界面。\n\n每次任务执行都会生成一个唯一的会话ID，你可以在可视化界面中通过该ID查询并展示对应的轨迹。\n\n对于点击、滑动等带有位置信息的操作，会在截图上标注出来，以便更好地理解智能体的行为。\n\n---\n\n### （可选）使用 llama.cpp 部署\n\n> 请确保你已经将 GELab-Zero-4B-preview 模型下载到本地。\n\n#### 步骤1：使用 llama.cpp 将模型转换为 GGUF 格式\n\n克隆官方的 llama.cpp 仓库：\n\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Fggerganov\u002Fllama.cpp.git \ncd llama.cpp\npip install -r requirements.txt\n# 如果存在依赖冲突，请创建一个 Conda 虚拟环境。\n```\n\n将模型转换为 GGUF 格式。命令行参数说明如下：\n\n1. 第一个路径指向你从 Hugging Face 下载的本地 GELab-Zero-4B-preview 模型。\n2. `--outtype` 指定量化精度。\n3. `--outfile` 是输出文件名，你可以自定义路径。\n\n```bash\n# 不进行量化，保持模型完整质量\npython convert_hf_to_gguf.py \u002FPATH\u002FTO\u002Fgelab-zero-4b-preview --outtype f16 --verbose --outfile gelab-zero-4b-preview_f16.gguf\n\n# 量化版本（速度更快但会有一定损失；已知问题：`\u003CTHINK>` 可能会变成 `\u003CTHIN>`）\npython convert_hf_to_gguf.py \u002FPATH\u002FTO\u002Fgelab-zero-4b-preview --outtype q8_0 --verbose --outfile gelab-zero-4b-preview_q8_0.gguf\n```\n\n供参考，INT8 量化的 GGUF 文件大小约为 4.28 GB。\n\n由于 GELab-Zero-4B-preview 是视觉模型，还需要导出一个 `mmproj` 文件：\n\n```bash\n# 对 mmproj 进行 INT8 量化\npython convert_hf_to_gguf.py \u002FPATH\u002FTO\u002Fgelab-zero-4b-preview --outtype q8_0 --verbose --outfile gelab-zero-4b-preview_q8_0_mmproj.gguf --mmproj\n```\n\n供参考，INT8 量化的 mmproj GGUF 文件大小约为 454 MB。\n\n#### 步骤2：使用 Jan 在本地提供服务\n\n你可以使用任何兼容 llama.cpp 的客户端来启动本地 API 服务；这里以 [Jan](https:\u002F\u002Fgithub.com\u002Fjanhq\u002Fjan) 为例：\n\n下载 [Jan](https:\u002F\u002Fgithub.com\u002Fjanhq\u002Fjan\u002Freleases) 客户端并安装。\n\n进入设置 → 模型提供商 → 选择 llama.cpp，然后导入模型：\n\n\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_32db15dfcd66.png\" width=\"50%\" alt=\"test model\">\n\n选择刚刚转换好的两个 GGUF 文件：\n\n\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_5632fd3c62cc.png\" width=\"50%\" alt=\"test model\">\n\n回到模型界面，点击“开始”。\n\n创建一个聊天对话以验证模型是否正常运行：\n\n\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_07d75cff5518.png\" width=\"50%\" alt=\"test model\">\n\n当 token 流正常时，启动本地 API 服务器。\n\n进入设置 → 本地 API 服务器，在服务器配置中创建一个 API 密钥，然后启动服务：\n\n\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_48cd394523a2.png\" width=\"50%\" alt=\"test model\">\n\n#### 步骤3：调整 GELab-Zero Agent 的模型配置\n\nllama.cpp 的服务与 Ollama 略有不同，因此你需要在 GELab-Zero Agent 中修改模型配置。具体有两个地方：\n\n1. 在 `model_config.yaml` 中，更新端口和 API 密钥（使用你刚刚创建的密钥）：\n\n```yaml\nlocal:\n    api_base: \"http:\u002F\u002Flocalhost:1337\u002Fv1\"\n    api_key: \"YOUR_KEY\"\n```\n\n2. 在 `examples\u002Frun_single_task.py` 中，移除模型名称中的任何后缀参数（第21行）：\n\n```python\nlocal_model_config = {\n    \"task_type\": \"parser_0922_summary\",\n    \"model_config\": {\n        \"model_name\": \"gelab-zero\",\n        \"model_provider\": \"local\",\n        \"args\": {\n            \"temperature\": 0.1,\n            \"top_p\": 0.95,\n            \"frequency_penalty\": 0.0,\n            \"max_tokens\": 4096,\n        },\n```\n\n---\n\n### （可选）MCP-Server 搭建\n\n\u003C!-- ### Step1 启动 mcp server 以支持多设备管理和任务分发 -->\n#### 步骤1：启动 MCP 服务器以支持多设备管理和任务分发\n\n```bash\n# 启动 mcp 服务器\npython mcp_server\u002Fdetailed_gelab_mcp_server.py\n```\n\n#### 步骤2：在 Chatbox 中导入 MCP 工具\n\u003C!-- https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_f006a174dd9d.png -->\n\u003Cdiv style=\"display: flex; align-items: center; justify-content: center; width: 80%; margin: 0 auto;\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_f006a174dd9d.png\" alt=\"MCP-Demo\" style=\"flex: 1; height: 400px; object-fit: contain; margin-right: 1px;\"\u002F>\n\u003C\u002Fdiv>\n\n## 📝 引用\n\n如果您在研究中使用了 GELab-Zero，请考虑引用我们的工作 :)\n\n```bibtex\n@misc{yan2025stepguitechnicalreport,\n      title={Step-GUI 技术报告}, \n      author={Haolong Yan 和 Jia Wang 和 Xin Huang 和 Yeqing Shen 和 Ziyang Meng 和 Zhimin Fan 和 Kaijun Tan 和 Jin Gao 和 Lieyu Shi 和 Mi Yang 和 Shiliang Yang 和 Zhirui Wang 和 Brian Li 和 Kang An 和 Chenyang Li 和 Lei Lei 和 Mengmeng Duan 和 Danxun Liang 和 Guodong Liu 和 Hang Cheng 和 Hao Wu 和 Jie Dong 和 Junhao Huang 和 Mei Chen 和 Renjie Yu 和 Shunshan Li 和 Xu Zhou 和 Yiting Dai 和 Yineng Deng 和 Yingdan Liang 和 Zelin Chen 和 Wen Sun 和 Chengxu Yan 和 Chunqin Xu 和 Dong Li 和 Fengqiong Xiao 和 Guanghao Fan 和 Guopeng Li 和 Guozhen Peng 和 Hongbing Li 和 Hang Li 和 Hongming Chen 和 Jingjing Xie 和 Jianyong Li 和 Jingyang Zhang 和 Jiaju Ren 和 Jiayu Yuan 和 Jianpeng Yin 和 Kai Cao 和 Liang Zhao 和 Liguo Tan 和 Liying Shi 和 Mengqiang Ren 和 Min Xu 和 Manjiao Liu 和 Mao Luo 和 Mingxin Wan 和 Na Wang 和 Nan Wu 和 Ning Wang 和 Peiyao Ma 和 Qingzhou Zhang 和 Qiao Wang 和 Qinlin Zeng 和 Qiong Gao 和 Qiongyao Li 和 Shangwu Zhong 和 Shuli Gao 和 Shaofan Liu 和 Shisi Gao 和 Shuang Luo 和 Xingbin Liu 和 Xiaojia Liu 和 Xiaojie Hou 和 Xin Liu 和 Xuanti Feng 和 Xuedan Cai 和 Xuan Wen 和 Xianwei Zhu 和 Xin Liang 和 Xin Liu 和 Xin Zhou 和 Yingxiu Zhao 和 Yukang Shi 和 Yunfang Xu 和 Yuqing Zeng 和 Yixun Zhang 和 Zejia Weng 和 Zhonghao Yan 和 Zhiguo Huang 和 Zhuoyu Wang 和 Zheng Ge 和 Jing Li 和 Yibo Zhu 和 Binxing Jiao 和 Xiangyu Zhang 和 Daxin Jiang},\n      year={2025},\n      eprint={2512.15431},\n      archivePrefix={arXiv},\n      primaryClass={cs.CV},\n      url={https:\u002F\u002Farxiv.org\u002Fabs\u002F2512.15431}, \n}\n\n@software{gelab_zero_2025,\n  title={GELab-Zero：先进的移动代理推理系统},\n  author={GELab 团队},\n  year={2025},\n  url={https:\u002F\u002Fgithub.com\u002Fstepfun-ai\u002Fgelab-zero}\n}\n\n@misc{gelab_engine,\n      title={GUI 探索实验室：通过多轮强化学习提升智能体的屏幕导航能力}, \n      author={Haolong Yan 和 Yeqing Shen 和 Xin Huang 和 Jia Wang 和 Kaijun Tan 和 Zhixuan Liang 和 Hongxin Li 和 Zheng Ge 和 Osamu Yoshie 和 Si Li 和 Xiangyu Zhang 和 Daxin Jiang},\n      year={2025},\n      eprint={2512.02423},\n      archivePrefix={arXiv},\n      primaryClass={cs.CV},\n      url={https:\u002F\u002Farxiv.org\u002Fabs\u002F2512.02423}, \n}\n\n```\n\n## ⭐ 星标历史\n\n\u003Cdiv align=\"center\">\n  \u003Ca href=\"https:\u002F\u002Fstar-history.com\u002F#stepfun-ai\u002Fgelab-zero&Date\">\n    \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_readme_7b5331398081.png\" alt=\"星标历史图表\" width=\"600\">\n  \u003C\u002Fa>\n\u003C\u002Fdiv>","# GELab-Zero 快速上手指南\n\n## 环境准备\n\n### 系统要求\n- Python 3.12+ 环境\n- 支持 ADB 的 Android 设备（需开启开发者模式）\n- macOS (M系列芯片) 或 NVIDIA RTX 4060 及以上显卡（推荐）\n\n### 前置依赖\n1. **Python 环境**（推荐使用 Miniforge 管理）\n   - Windows: [Miniforge 安装包](https:\u002F\u002Fgithub.com\u002Fconda-forge\u002Fminiforge)\n   - macOS\u002FLinux: 使用命令行安装\n     ```bash\n     curl -L -O \"https:\u002F\u002Fgithub.com\u002Fconda-forge\u002Fminiforge\u002Freleases\u002Flatest\u002Fdownload\u002FMiniforge3-$(uname)-$(uname -m).sh\"\n     bash Miniforge3-$(uname)-$(uname -m).sh\n     ```\n\n2. **ADB 环境**\n   - 安装 Android SDK 平台工具（推荐使用 [华为云镜像](https:\u002F\u002Fmirrors.huaweicloud.com\u002Fandroid\u002Frepository)）\n   - 开启手机开发者模式并连接设备\n\n3. **依赖库**（通过国内源加速）\n   ```bash\n   pip install -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple -r requirements.txt\n   ```\n\n---\n\n## 安装步骤\n\n1. **创建 Python 环境**\n   ```bash\n   conda create -n gelab-zero python=3.12 -y\n   conda activate gelab-zero\n   ```\n\n2. **克隆项目仓库**\n   ```bash\n   git clone https:\u002F\u002Fgithub.com\u002Fstepfun-ai\u002Fgelab-zero.git\n   cd gelab-zero\n   ```\n\n3. **一键部署 Agent**\n   ```bash\n   bash install.sh\n   ```\n\n4. **安装 LLM 推理框架**（可选）\n   ```bash\n   # Ollama 安装（国内用户可使用镜像）\n   curl -fsSL https:\u002F\u002Follama.com\u002Finstall.sh | sh\n\n   # 或 vLLM 安装\n   pip install vllm\n   ```\n\n---\n\n## 基本使用\n\n### 启动 GUI 代理\n```bash\ngelab-zero gui\n```\n> 启动后自动连接已连接的 Android 设备\n\n### 运行示例任务\n```bash\ngelab-zero run task.json\n```\n> `task.json` 示例：\n```json\n{\n  \"task\": \"查找最近的 Hema Fresh 门店并购买指定商品\",\n  \"device\": \"auto\"\n}\n```\n\n### 查看轨迹记录\n```bash\ngelab-zero visualize --task-id 12345\n```\n\n> 📌 注意：首次使用需通过 `gelab-zero config` 配置 ADB 路径和模型参数","某电商平台开发团队需在100+种安卓设备上验证支付流程兼容性，传统测试方式面临以下挑战：\n\n### 没有 gelab-zero 时\n- 依赖第三方云测平台进行设备租赁，单次测试成本超2000元\n- 支付界面元素需手动编写适配代码，每台设备平均耗时30分钟\n- 测试数据通过公网传输，存在用户隐私泄露风险\n- 多设备并行测试需搭建专用机房，硬件投入超50万元\n- 测试脚本更新需重新部署整个测试环境，迭代周期长达3天\n\n### 使用 gelab-zero 后\n- 本地化部署后单次测试成本降至50元以下，设备资源可复用\n- 通过预训练模型自动识别支付控件，设备测试时间压缩至5分钟\n- 所有数据处理在本地完成，通过等保三级认证\n- 利用MCP-Server实现千台设备并发测试，硬件成本降低80%\n- 支持热更新测试脚本，迭代周期缩短至2小时\n\n核心价值在于通过全栈开源方案，将移动GUI自动化测试的效率提升40倍，同时保障数据安全与成本可控。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fstepfun-ai_gelab-zero_5b0e9abd.png","stepfun-ai","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fstepfun-ai_130ba55d.png","",null,"opensource@stepfun.com","https:\u002F\u002Fgithub.com\u002Fstepfun-ai",[82],{"name":83,"color":84,"percentage":85},"Python","#3572A5",100,2111,180,"2026-04-05T07:05:22","MIT","Linux, macOS, Windows","未说明",{"notes":93,"python":94,"dependencies":95},"建议使用 conda 管理环境，首次运行需下载约 5GB 模型文件","3.12+",[96,97,98],"torch","transformers","accelerate",[15],[101,102,103,104],"agent","gui-agents","pua","phone-use-agent","2026-03-27T02:49:30.150509","2026-04-06T09:46:07.361275",[108,113,118,123,128,133],{"id":109,"question_zh":110,"answer_zh":111,"source_url":112},5155,"如何复现GELab-Zero-4B-preview的ScreenSpot-pro结果？","需使用vLLM部署模型，设置温度参数为0或0.1，无需系统提示。具体参数配置可参考Issue #8的评论内容，部分用户反馈需检查transformers测试参数和vLLM部署配置。","https:\u002F\u002Fgithub.com\u002Fstepfun-ai\u002Fgelab-zero\u002Fissues\u002F8",{"id":114,"question_zh":115,"answer_zh":116,"source_url":117},5156,"如何解决chatbox中mcp接口报400错误？","在chatbox中重新打开相关按钮，MCP会打印OK的log，待log出现后再次输入指令即可恢复正常。此方法已在Issue #26中验证有效。","https:\u002F\u002Fgithub.com\u002Fstepfun-ai\u002Fgelab-zero\u002Fissues\u002F26",{"id":119,"question_zh":120,"answer_zh":121,"source_url":122},5157,"GPT-4o在AndroidDaily的评估结果是否依赖外部模块？","评估过程中未集成任何外部模块，所有模型均使用统一的prompt进行测试。GPT-4o的19.6%性能仅基于其自身理解与推理能力，具体prompt细节将在后续技术报告中披露。","https:\u002F\u002Fgithub.com\u002Fstepfun-ai\u002Fgelab-zero\u002Fissues\u002F3",{"id":124,"question_zh":125,"answer_zh":126,"source_url":127},5158,"AndroidDaily数据集链接失效如何解决？","数据集链接已更新为https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fstepfun-ai\u002FAndroidDaily，原链接失效问题已由维护者修复。请直接访问新链接获取数据。","https:\u002F\u002Fgithub.com\u002Fstepfun-ai\u002Fgelab-zero\u002Fissues\u002F2",{"id":129,"question_zh":130,"answer_zh":131,"source_url":132},5159,"8B模型是否计划开源？","目前8B模型仅通过API提供，与API模型并非同一版本。团队表示未来将发布更强模型，但未明确说明开源计划，建议关注后续更新。","https:\u002F\u002Fgithub.com\u002Fstepfun-ai\u002Fgelab-zero\u002Fissues\u002F48",{"id":134,"question_zh":135,"answer_zh":136,"source_url":137},5160,"本地运行时程序报错如何优化？","建议采用量化部署方案，在纯CPU机器上可实现30秒\u002F步骤的推理速度。若遇具体错误，需提供错误案例以便团队优化，避免直接使用大模型进行本地操作。","https:\u002F\u002Fgithub.com\u002Fstepfun-ai\u002Fgelab-zero\u002Fissues\u002F23",[]]