[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-feima09--GMTalker":3,"tool-feima09--GMTalker":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",150037,2,"2026-04-10T23:33:47",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",108322,"2026-04-10T11:39:34",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[52,13,15,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":32,"last_commit_at":59,"category_tags":60,"status":17},4721,"markitdown","microsoft\u002Fmarkitdown","MarkItDown 是一款由微软 AutoGen 团队打造的轻量级 Python 工具，专为将各类文件高效转换为 Markdown 格式而设计。它支持 PDF、Word、Excel、PPT、图片（含 OCR）、音频（含语音转录）、HTML 乃至 YouTube 链接等多种格式的解析，能够精准提取文档中的标题、列表、表格和链接等关键结构信息。\n\n在人工智能应用日益普及的今天，大语言模型（LLM）虽擅长处理文本，却难以直接读取复杂的二进制办公文档。MarkItDown 恰好解决了这一痛点，它将非结构化或半结构化的文件转化为模型“原生理解”且 Token 效率极高的 Markdown 格式，成为连接本地文件与 AI 分析 pipeline 的理想桥梁。此外，它还提供了 MCP（模型上下文协议）服务器，可无缝集成到 Claude Desktop 等 LLM 应用中。\n\n这款工具特别适合开发者、数据科学家及 AI 研究人员使用，尤其是那些需要构建文档检索增强生成（RAG）系统、进行批量文本分析或希望让 AI 助手直接“阅读”本地文件的用户。虽然生成的内容也具备一定可读性，但其核心优势在于为机器",93400,"2026-04-06T19:52:38",[52,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":72,"owner_name":73,"owner_avatar_url":74,"owner_bio":73,"owner_company":73,"owner_location":73,"owner_email":73,"owner_twitter":73,"owner_website":73,"owner_url":75,"languages":76,"stars":101,"forks":102,"last_commit_at":103,"license":104,"difficulty_score":10,"env_os":105,"env_gpu":106,"env_ram":107,"env_deps":108,"category_tags":116,"github_topics":118,"view_count":32,"oss_zip_url":73,"oss_zip_packed_at":73,"status":17,"created_at":124,"updated_at":125,"faqs":126,"releases":157},4973,"feima09\u002FGMTalker","GMTalker","GMTalker 由光明实验室媒体智能团队打造的3d数字人。系统集成了语音识别、语音合成、自然语言理解、嘴型动画驱动。支持windows、Linux、安卓快速部署。","GMTalker 是由光明实验室媒体智能团队打造的开源 3D 数字人交互系统。它基于虚幻引擎（Unreal Engine）渲染，集成了语音识别、语音合成、自然语言理解及嘴型动画驱动等核心模块，能够让用户通过语音与逼真的 3D 卡通数字人进行实时、流畅的对话。\n\n该项目主要解决了传统数字人开发门槛高、部署复杂以及难以在低配置设备上实现离线实时交互的痛点。GMTalker 实现了全链路离线流式对话，响应速度达到毫秒级，并支持用户随时打断对话，显著提升了交互的自然度与灵活性。\n\nGMTalker 非常适合开发者、研究人员以及希望将数字人技术应用于展示、教育或商业场景的团队使用。其独特的技术亮点在于极高的兼容性与轻量化：仅需 2GB 显存即可运行完整项目，支持 Windows、Linux 和安卓跨平台一键部署；同时兼容华为昇腾 NPU 与纯 CPU 模式，无需额外依赖即可快速搭建后端。此外，它还支持大语言模型接入、自定义角色形象及微表情控制，为构建个性化的智能数字助手提供了坚实的技术基础。","# GMTalker\n\u003C!-- \u003Cp align=\"center\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffeima09_GMTalker_readme_241844ca42d1.png\" alt=\"项目 Logo\" style=\"width:60%;\"\u002F>\n\u003C\u002Fp> -->\n\n\u003Cp align=\"center\">\n  \u003Ca >English\u003C\u002Fa> | \u003Ca href=\"README_CN.md\">中文\u003C\u002Fa>\n\u003C\u002Fp>\n\n\u003Cp align=\"center\">\n  \u003Ca href=\"#news\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FNEWS-Log-red?style=flat-square\" \u002F>\n  \u003C\u002Fa>\n  \u003Ca href=\"#features\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FFeatures-Features-blue?style=flat-square\" \u002F>\n  \u003C\u002Fa>\n  \u003Ca href=\"#install\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FInstall-Install-success?style=flat-square\" \u002F>\n  \u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fhuggingface.co\u002Fcalyi\u002FGMTalker\" target=\"_blank\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FHugging-Download-yellow?style=flat-square\" \u002F>\n  \u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fdrive.google.com\u002Ffile\u002Fd\u002F1prydilmo-ftSUjC4L10qylfhr_eYpKYS\u002Fview?usp=sharing\" target=\"_blank\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FUE5_Project-Download-orange?style=flat-square\" \u002F>\n  \u003C\u002Fa>\n\u003C\u002Fp>\n\n---\n**GMTalker**​​, an interactive digital human rendered by Unreal Engine, is developed by the Media Intelligence Team at Bright Laboratory. The system integrates speech recognition, speech synthesis, natural language understanding, and lip-sync animation driving. It supports rapid deployment on Windows and requires only 2GB of VRAM to run the entire project.It can be deployed on Windows, Linux, and Android. This project demonstrates ​​the demo effects of **3D cartoon digital human avatars**​​, suitable for presentations, expansions, and commercial integration.\n\n\u003C!-- System Architecture Diagram -->\n\u003Cp align=\"center\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffeima09_GMTalker_readme_bef77900b115.png\" alt=\"System Architecture Diagram\" style=\"width:60%;\"\u002F>\n  \u003Cbr\u002F>\n  \u003Cem>System Architecture Diagram\u003C\u002Fem>\n\u003C\u002Fp>\n\n\u003Ca name=\"features\">\u003C\u002Fa>\n## 🧱 Features\n- Cross-Platform: Deploy on Windows, Linux, and Android with a single codebase.\n- Offline & Real-Time: Fully offline streaming dialogue with millisecond response.\n- Smart Interaction: Wake-up, interrupt, and voice cloning support.\n- Model Flexibility: Compatible with LLMs, custom Agents, and local knowledge bases.\n- Avatar Customization: Custom characters with lip-sync and micro-expressions.\n- Quick Setup: Easy backend configuration, no extra dependencies needed.\n- Hardware Support: Runs on Huawei Ascend NPU or CPU-only mode.\n\n\u003Ctable align=\"center\">\n  \u003Ctr>\n    \u003Ctd style=\"text-align:center\">\n      \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffeima09_GMTalker_readme_4acae96c5a69.gif\" alt=\"demo1\" width=\"50%\" style=\"display: block; margin: 0 auto;\" \u002F>\n    \u003C\u002Ftd>\n    \u003Ctd style=\"text-align:center\">\n      \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffeima09_GMTalker_readme_da837b699e5e.gif\" alt=\"demo2\" width=\"58%\" style=\"display: block; margin: 0 auto;\" \u002F>\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n\u003C\u002Ftable>\n\n\u003C!-- \u003Ctable>\n  \u003Ctr>\n    \u003Cth align=\"center\">Feature Introduction\u003C\u002Fth>\n    \u003Cth align=\"center\">Demonstration Video\u003C\u002Fth>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd>\u003Cstrong>Interrupt\u003C\u002Fstrong>\u003Cbr>Allows users to interrupt conversations in real time via voice, enhancing interaction flexibility\u003C\u002Ftd>\n    \u003Ctd>\n      \u003Cvideo src=\"https:\u002F\u002Fprivate-user-images.githubusercontent.com\u002F63825035\u002F477330917-45670b4b-a2ee-4345-8365-2a43233e2c8b.mp4?jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NTUwNTAwMzAsIm5iZiI6MTc1NTA0OTczMCwicGF0aCI6Ii82MzgyNTAzNS80NzczMzA5MTctNDU2NzBiNGItYTJlZS00MzQ1LTgzNjUtMmE0MzIzM2UyYzhiLm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTA4MTMlMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwODEzVDAxNDg1MFomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPWFhMGZlYWEyOWUyM2RhNDY3YzA1ZjFkZDNlYTNhNTM0NzJiMTMxMWE4NTY5MWRjYmNiZTI1NzlhNGEyMzE1ZGYmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.U0ugXLKWtNxhBhBOKYxHMdLD0crRIDZEgz1O9uEsCUM\" controls width=\"70%\">\u003C\u002Fvideo>\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n\u003C\u002Ftable> -->\n\n\u003Ca name=\"news\">\u003C\u002Fa>\n## 🔥 NEWS\n\n- 🗓️ **2025.10.15**: Backend now supports Docker deployment, see [Docker Documentation](.\u002Fdocs\u002Fdocker.md) for details.\n- 🗓️ **2025.10.10**:It now offers comprehensive support for both GPU and NPU 910B servers. The FunASR speech recognition is based on the ONNX Runtime, while the TTS speech synthesis leverages torch_npu.\n- 🗓️ **2025.9.12**: The project now offers comprehensive support for Android, Linux, Web, and Windows platforms. With no GPU required on the client side.\n- 🗓️ **2025.9.1**: Upgraded the model with a lightweight lip-sync driver and packaged the complete UE project into a standalone executable (.exe), allowing it to run smoothly on ordinary laptops.\n- 🗓️ **2025.8.25**: Updated[Import UE avatar](.\u002Fdocs\u002Fue\u002Fimport_tutorial.md) | [Character Overview](.\u002Fdocs\u002Fue\u002Fcharacter_overview.md) | [Animation Overview](.\u002Fdocs\u002Fue\u002Fanimation_overview.md) documents.\n- 🗓️ **2025.8.19**: Released UE5 project files, including the **GuangDUNDUN** character.\n  (jointly developed by Guangming Lab and the Shenzhen Guangming District Government).\n- 🗓️ **2025.8.12**: Added WebUI usage guide for quick project deployment.\n\n\n## 💬 Join Our Community\n\n\u003Cp align=\"center\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffeima09_GMTalker_readme_b5389f671f98.png\" alt=\"GMTalker technical exchange group\" width=\"200\"\u002F>\n  \u003Cbr\u002F>\n  \u003Cstrong>Scan QR code to join GMTalker technical exchange group\u003C\u002Fstrong>\n\u003C\u002Fp>\n\n\u003Ca name=\"install\">\u003C\u002Fa>  \n## 📦 Quick Start  \n\n#### After configuring the backend, launch the application by downloading the installation package. With FunASR and MeloTTS, it can be started with one click—no additional environment setup or dependencies required.\n\n### ​​Hardware Requirements​  \n- **Operating System**: Windows 10+ \u002F Linux\n- **Memory**: 8GB+ RAM  \n- **GPU Support**: Minimum 2GB VRAM (NVIDIA GPU with CUDA support recommended)\n\n1. **Cloning project**\n```bash\ngit clone  https:\u002F\u002Fgithub.com\u002Ffeima09\u002FGMTalker.git\n```\n\n2. **One click start**\n\n**Windows:**\n```bash\nwebui.bat\n```\n\n**Linux:**\n```bash\nchmod +x webui.sh\n.\u002Fwebui.sh\n```\n\n**Docker Deployment (Recommended):**\n```bash\n# Using docker-compose\ndocker-compose up -d\n\n# Or using docker cli\ndocker run -d \\\n  --name gmtalker \\\n  -p 5002:5002 -p 7860:7860 \\\n  -v $(pwd)\u002Fconfigs:\u002Fapp\u002Fconfigs \\\n  huiji2333\u002Fgmtalker:latest\n```\n👉 [Docker Deployment Guide](docs\u002Fdocker.md)\n\n3. **Accessing Services**\n- Main service:` http:\u002F\u002F127.0.0.1:5002 `\n- Web configuration interface:` http:\u002F\u002F127.0.0.1:7860 `\n\n👉 Click here to view the WebUI User [Guide](docs\u002Fwebui.md)\n\n4. **Download UE Executable​**\n- Download and launch GLM3.exe [Windows version Google Drive](https:\u002F\u002Fdrive.google.com\u002Ffile\u002Fd\u002F1EO_E33blpLcKop6l1Ta5-PQTCtZVYxLu\u002Fview?usp=sharing) [Windows version Baidu Netdisk](https:\u002F\u002Fpan.baidu.com\u002Fs\u002F1WbiLS5wyGss_JvUet5mx_g?pwd=w2sb) [Linux version Google Drive](https:\u002F\u002Fdrive.google.com\u002Ffile\u002Fd\u002F1ZpKmLAm2yiKJT_4tPzX4VGv7_RNoWNx5\u002Fview?usp=sharing) [Linux version Baidu Netdisk](https:\u002F\u002Fpan.baidu.com\u002Fs\u002F1adBv9ZYMC5pBhPckaVHJJg?pwd=kit5)\n\n5. **​​Deploy Essential Local AI Services​**\n- Download the FunASR speech recognition lazy package [here](https:\u002F\u002Fgithub.com\u002F1m1ng\u002FFunASR\u002Freleases\u002Fdownload\u002FComplete-Package\u002FFunASR.7z), then run run_server_2pass.batto start it with one click.\n- Download the MeloTTS speech recognition lazy package [here](https:\u002F\u002Fgithub.com\u002F1m1ng\u002FMeloTTS\u002Freleases\u002Fdownload\u002FComplete-Package\u002FMeloTTS.7z), then run start.batto start it with one click.\n\n👉 If you need to develop from source code, please click here to view the complete installation [guide](docs\u002Finstall.md),Please refer to the backend overall [architecture](docs\u002Frelate.md)\n\n\n## 📊 Comparison with Other Open-Source Solutions\n\n| Project Name     | 3D Avatar | UE5 Rendering | Voice Input | Voice Interruption | Lip Sync | Body Movements | Local Deployment (Win) | Star ⭐ |\n|------------------|:---------:|:-------------:|:-----------:|:-------------------:|:--------:|:--------------:|:-----------------------:|:-------:|\n| LiveTalking      | ❌        | ❌            | ❌          | ❌                  | ✅       | ❌             | ❌                      | 6.1k    |\n| OpenAvatarChat   | ✅        | ❌            | ✅          | ❌                  | ✅       | ❌             | ❌                      | 1.6k    |\n| MNN              | ✅        | ❌            | ✅          | ❌                  | ✅       | ✅             | ❌                      | 12.6k   |\n| Fay              | ❌        | ✅            | ✅          | ✅                  | ✅       | ✅             | ✅                      | 11.6k   |\n| **GMTalker**     | ✅        | ✅            | ✅          | ✅                  | ✅       | ✅             | ✅                      | 🚀      |\n\n> ✅ indicates full support for the feature, while ❌ indicates it is missing or unsupported.\n\n## 📚 About Guangming Laboratory\n\nThe Guangdong Provincial Laboratory of Artificial Intelligence and Digital Economy (Shenzhen) (hereinafter referred to as Guangming Laboratory) is one of the third batch of Guangdong Provincial Laboratories approved for construction by the Guangdong Provincial Government. The laboratory focuses on cutting-edge theories and future technological trends in global artificial intelligence and the digital economy, dedicated to serving major national development strategies and significant needs.\n\nRelying on Shenzhen's industrial, geographical, and policy advantages, Guangming Laboratory brings together global scientific research forces and fully unleashes the agglomeration effect of scientific and technological innovation resources. Centered around the core task of building a domestic AI computing power ecosystem, and driven by the development of multimodal AI technology and its application ecosystem, the laboratory strives to break through key technologies, produce original achievements, and continuously advance technological innovation and industrial empowerment.\n\nThe laboratory's goal is to accelerate the supply of diversified applications and full-scenario penetration of artificial intelligence technology, achieving mutual reinforcement of technological innovation and industrial driving forces, and continuously promoting the generation of new quality productivity powered by AI.\n\n---\n\n### 🌐 Contact Us (Project Collaboration)\n\n- Website: [Guangming Laboratory Official Site](https:\u002F\u002Fwww.gml.ac.cn\u002F)  \n- Email: [mafei@gml.ac.cn](mafei@gml.ac.cn)\u002F[xuhongbo@gml.ac.cn](xuhongbo@gml.ac.cn)     \n\n> **Acknowledgements**  \n> Thanks to all team members and partners who participated in the development and support of the GMTalker project. (Fei Ma, Hongbo Xu, Minghui Li, Yiming Luo, Haijun Zhu, Yiyao Zhuo, Chao Song)\n\n## License\n\nThis project is licensed under the **Creative Commons Attribution-NonCommercial 4.0 International License (CC BY-NC 4.0)**.\n\nYou are free to use, modify, and share the code and assets for **non-commercial purposes**, provided that you **give appropriate credit**.\n\n🔗 [Full License Text](https:\u002F\u002Fcreativecommons.org\u002Flicenses\u002Fby-nc\u002F4.0\u002Flegalcode)  \n🔍 [Human-readable Summary](https:\u002F\u002Fcreativecommons.org\u002Flicenses\u002Fby-nc\u002F4.0\u002F)","# GMTalker\n\u003C!-- \u003Cp align=\"center\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffeima09_GMTalker_readme_241844ca42d1.png\" alt=\"项目 Logo\" style=\"width:60%;\"\u002F>\n\u003C\u002Fp> -->\n\n\u003Cp align=\"center\">\n  \u003Ca >English\u003C\u002Fa> | \u003Ca href=\"README_CN.md\">中文\u003C\u002Fa>\n\u003C\u002Fp>\n\n\u003Cp align=\"center\">\n  \u003Ca href=\"#news\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FNEWS-Log-red?style=flat-square\" \u002F>\n  \u003C\u002Fa>\n  \u003Ca href=\"#features\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FFeatures-Features-blue?style=flat-square\" \u002F>\n  \u003C\u002Fa>\n  \u003Ca href=\"#install\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FInstall-Install-success?style=flat-square\" \u002F>\n  \u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fhuggingface.co\u002Fcalyi\u002FGMTalker\" target=\"_blank\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FHugging-Download-yellow?style=flat-square\" \u002F>\n  \u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fdrive.google.com\u002Ffile\u002Fd\u002F1prydilmo-ftSUjC4L10qylfhr_eYpKYS\u002Fview?usp=sharing\" target=\"_blank\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FUE5_Project-Download-orange?style=flat-square\" \u002F>\n  \u003C\u002Fa>\n\u003C\u002Fp>\n\n---\n**GMTalker**，一款由虚幻引擎渲染的交互式数字人，由光明实验室媒体智能团队开发。该系统集成了语音识别、语音合成、自然语言理解和唇形同步动画驱动等功能，支持在Windows上快速部署，且整个项目运行仅需2GB显存。它还可以部署在Windows、Linux和Android平台上。本项目展示了**3D卡通数字人形象**的演示效果，适用于展示、扩展及商业集成。\n\n\u003C!-- 系统架构图 -->\n\u003Cp align=\"center\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffeima09_GMTalker_readme_bef77900b115.png\" alt=\"System Architecture Diagram\" style=\"width:60%;\"\u002F>\n  \u003Cbr\u002F>\n  \u003Cem>系统架构图\u003C\u002Fem>\n\u003C\u002Fp>\n\n\u003Ca name=\"features\">\u003C\u002Fa>\n## 🧱 功能特性\n- 跨平台：使用单一代码库即可部署于Windows、Linux和Android。\n- 离线实时：完全离线流式对话，响应时间可达毫秒级。\n- 智能交互：支持唤醒、打断及语音克隆功能。\n- 模型灵活：兼容大语言模型、自定义Agent以及本地知识库。\n- 头像自定义：可定制角色，具备唇形同步和微表情功能。\n- 快速搭建：后端配置简单，无需额外依赖。\n- 硬件支持：可在华为Ascend NPU或纯CPU模式下运行。\n\n\u003Ctable align=\"center\">\n  \u003Ctr>\n    \u003Ctd style=\"text-align:center\">\n      \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffeima09_GMTalker_readme_4acae96c5a69.gif\" alt=\"demo1\" width=\"50%\" style=\"display: block; margin: 0 auto;\" \u002F>\n    \u003C\u002Ftd>\n    \u003Ctd style=\"text-align:center\">\n      \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffeima09_GMTalker_readme_da837b699e5e.gif\" alt=\"demo2\" width=\"58%\" style=\"display: block; margin: 0 auto;\" \u002F>\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n\u003C\u002Ftable>\n\n\u003C!-- \u003Ctable>\n  \u003Ctr>\n    \u003Cth align=\"center\">功能介绍\u003C\u002Fth>\n    \u003Cth align=\"center\">演示视频\u003C\u002Fth>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd>\u003Cstrong>打断\u003C\u002Fstrong>\u003Cbr>允许用户通过语音实时打断对话，提升交互灵活性\u003C\u002Ftd>\n    \u003Ctd>\n      \u003Cvideo src=\"https:\u002F\u002Fprivate-user-images.githubusercontent.com\u002F63825035\u002F477330917-45670b4b-a2ee-4345-8365-2a43233e2c8b.mp4?jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NTUwNTAwMzAsIm5iZiI6MTc1NTA0OTczMCwicGF0aCI6Ii82MzgyNTAzNS80NzczMzA5MTctNDU2NzBiNGItYTJlZS00MzQ1LTgzNjUtMmE0MzIzM2UyYzhiLm1wND9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVSHQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTA4MTMlMkZ1cy1lYXN0LTElMkFzMyUyRmF3czRfcmVxdWVzdCZYLAFtei1EYXRlPTIwMjUwODEzVDAxNDg1MFomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.U0ugXLKWtNxhBhBOKYxHMdLD0crRIDZEgz1O9uEsCUM\" controls width=\"70%\">\u003C\u002Fvideo>\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n\u003C\u002Ftable> -->\n\n\u003Ca name=\"news\">\u003C\u002Fa>\n## 🔥 最新动态\n\n- 🗓️ **2025年10月15日**：后端现已支持Docker部署，详情请参阅[Docker文档](.\u002Fdocs\u002Fdocker.md)。\n- 🗓️ **2025年10月10日**：现全面支持GPU和NPU 910B服务器。FunASR语音识别基于ONNX Runtime，而TTS语音合成则利用torch_npu。\n- 🗓️ **2025年9月12日**：项目现已全面支持Android、Linux、Web和Windows平台，客户端无需配备GPU。\n- 🗓️ **2025年9月1日**：升级了模型，引入轻量级唇形同步驱动，并将完整的UE项目打包成独立可执行文件（.exe），使其能够在普通笔记本电脑上流畅运行。\n- 🗓️ **2025年8月25日**：更新了[导入UE头像](.\u002Fdocs\u002Fue\u002Fimport_tutorial.md) | [角色概述](.\u002Fdocs\u002Fue\u002Fcharacter_overview.md) | [动画概述](.\u002Fdocs\u002Fue\u002Fanimation_overview.md)等文档。\n- 🗓️ **2025年8月19日**：发布了UE5项目文件，其中包括**光DUNDUN**角色。\n  （由光明实验室与深圳市光明区政府联合开发）。\n- 🗓️ **2025年8月12日**：新增了WebUI使用指南，便于快速部署项目。\n\n\n## 💬 加入我们的社区\n\n\u003Cp align=\"center\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffeima09_GMTalker_readme_b5389f671f98.png\" alt=\"GMTalker技术交流群\" width=\"200\"\u002F>\n  \u003Cbr\u002F>\n  \u003Cstrong>扫描二维码加入GMTalker技术交流群\u003C\u002Fstrong>\n\u003C\u002Fp>\n\n\u003Ca name=\"install\">\u003C\u002Fa>  \n## 📦 快速入门  \n\n#### 配置好后端后，下载安装包即可启动应用。借助FunASR和MeloTTS，只需一键即可启动，无需额外环境设置或依赖项。\n\n### ​​硬件要求​  \n- **操作系统**：Windows 10及以上 \u002F Linux\n- **内存**：8GB及以上RAM  \n- **GPU支持**：最低2GB显存（推荐配备CUDA支持的NVIDIA GPU）\n\n1. **克隆项目**\n```bash\ngit clone  https:\u002F\u002Fgithub.com\u002Ffeima09\u002FGMTalker.git\n```\n\n2. **一键启动**\n\n**Windows:**\n```bash\nwebui.bat\n```\n\n**Linux:**\n```bash\nchmod +x webui.sh\n.\u002Fwebui.sh\n```\n\n**Docker部署（推荐）：**\n```bash\n# 使用docker-compose\ndocker-compose up -d\n\n# 或者使用 Docker CLI\ndocker run -d \\\n  --name gmtalker \\\n  -p 5002:5002 -p 7860:7860 \\\n  -v $(pwd)\u002Fconfigs:\u002Fapp\u002Fconfigs \\\n  huiji2333\u002Fgmtalker:latest\n```\n👉 [Docker 部署指南](docs\u002Fdocker.md)\n\n3. **访问服务**\n- 主服务：`http:\u002F\u002F127.0.0.1:5002`\n- Web 配置界面：`http:\u002F\u002F127.0.0.1:7860`\n\n👉 点击此处查看 WebUI 用户[指南](docs\u002Fwebui.md)\n\n4. **下载 UE 可执行文件**\n- 下载并启动 GLM3.exe [Windows 版 Google Drive](https:\u002F\u002Fdrive.google.com\u002Ffile\u002Fd\u002F1EO_E33blpLcKop6l1Ta5-PQTCtZVYxLu\u002Fview?usp=sharing) [Windows 版百度网盘](https:\u002F\u002Fpan.baidu.com\u002Fs\u002F1WbiLS5wyGss_JvUet5mx_g?pwd=w2sb) [Linux 版 Google Drive](https:\u002F\u002Fdrive.google.com\u002Ffile\u002Fd\u002F1ZpKmLAm2yiKJT_4tPzX4VGv7_RNoWNx5\u002Fview?usp=sharing) [Linux 版百度网盘](https:\u002F\u002Fpan.baidu.com\u002Fs\u002F1adBv9ZYMC5pBhPckaVHJJg?pwd=kit5)\n\n5. **部署本地必备 AI 服务**\n- 下载 FunASR 语音识别懒人包[这里](https:\u002F\u002Fgithub.com\u002F1m1ng\u002FFunASR\u002Freleases\u002Fdownload\u002FComplete-Package\u002FFunASR.7z)，然后运行 run_server_2pass.bat 即可一键启动。\n- 下载 MeloTTS 语音合成懒人包[这里](https:\u002F\u002Fgithub.com\u002F1m1ng\u002FMeloTTS\u002Freleases\u002Fdownload\u002FComplete-Package\u002FMeloTTS.7z)，然后运行 start.bat 即可一键启动。\n\n👉 如果需要从源码开发，请点击此处查看完整的安装[指南](docs\u002Finstall.md)，请参阅后端整体[架构](docs\u002Frelate.md)\n\n\n## 📊 与其他开源方案对比\n\n| 项目名称     | 3D 头像 | UE5 渲染 | 语音输入 | 语音打断 | 唇形同步 | 身体动作 | 本地部署（Win） | 星标 ⭐ |\n|------------------|:---------:|:-------------:|:-----------:|:-------------------:|:--------:|:--------------:|:-----------------------:|:-------:|\n| LiveTalking      | ❌        | ❌            | ❌          | ❌                  | ✅       | ❌             | ❌                      | 6.1k    |\n| OpenAvatarChat   | ✅        | ❌            | ✅          | ❌                  | ✅       | ❌             | ❌                      | 1.6k    |\n| MNN              | ✅        | ❌            | ✅          | ❌                  | ✅       | ✅             | ❌                      | 12.6k   |\n| Fay              | ❌        | ✅            | ✅          | ✅                  | ✅       | ✅             | ✅                      | 11.6k   |\n| **GMTalker**     | ✅        | ✅            | ✅          | ✅                  | ✅       | ✅             | ✅                      | 🚀      |\n\n> ✅ 表示完全支持该功能，而 ❌ 表示缺失或不支持。\n\n## 📚 关于光明实验室\n\n广东省人工智能与数字经济实验室（深圳）（以下简称“光明实验室”）是广东省政府批准建设的第三批省级实验室之一。实验室聚焦全球人工智能和数字经济领域的前沿理论与未来技术趋势，致力于服务国家重大发展战略和重要需求。\n\n依托深圳的产业、区位和政策优势，光明实验室汇聚全球科研力量，充分发挥科技创新资源的集聚效应。围绕构建国内人工智能算力生态的核心任务，以多模态人工智能技术及其应用生态发展为驱动，实验室努力突破关键技术，产出原创成果，持续推动技术创新与产业赋能。\n\n实验室的目标是加速人工智能技术的多元化应用供给和全场景渗透，实现技术创新与产业驱动力的相互促进，不断推动以人工智能为动力的新质生产力的形成。\n\n---\n\n### 🌐 联系我们（项目合作）\n\n- 官网：[光明实验室官方网站](https:\u002F\u002Fwww.gml.ac.cn\u002F)  \n- 邮箱：[mafei@gml.ac.cn](mafei@gml.ac.cn)\u002F[xuhongbo@gml.ac.cn](xuhongbo@gml.ac.cn)     \n\n> **致谢**  \n> 感谢所有参与 GMTalker 项目开发和支持的团队成员及合作伙伴。（马飞、徐洪波、李明辉、罗一鸣、朱海俊、卓义尧、宋超）\n\n## 许可证\n\n本项目采用 **知识共享署名-非商业性使用 4.0 国际许可协议（CC BY-NC 4.0）** 许可。\n\n您可以在**非商业用途**的前提下自由使用、修改和分享代码及资源，但需**适当注明出处**。\n\n🔗 [完整许可文本](https:\u002F\u002Fcreativecommons.org\u002Flicenses\u002Fby-nc\u002F4.0\u002Flegalcode)  \n🔍 [通俗易懂的摘要](https:\u002F\u002Fcreativecommons.org\u002Flicenses\u002Fby-nc\u002F4.0\u002F)","# GMTalker 快速上手指南\n\nGMTalker 是由光明实验室媒体智能团队开发的交互式数字人项目，基于 Unreal Engine 5 渲染，集成了语音识别、语音合成、大语言模型理解及唇形驱动动画。该项目支持跨平台部署（Windows\u002FLinux\u002FAndroid），最低仅需 2GB 显存即可运行，适合快速构建 3D 卡通数字人应用。\n\n## 1. 环境准备\n\n### 系统要求\n- **操作系统**：Windows 10+ 或 Linux\n- **内存**：8GB RAM 及以上\n- **显卡**：推荐 NVIDIA GPU（支持 CUDA），最低显存要求 2GB\n  - *注：也支持华为昇腾 NPU 或纯 CPU 模式运行*\n- **网络**：首次运行需下载模型文件，建议保持网络畅通\n\n### 前置依赖\n本项目后端已封装为独立包，**无需手动配置 Python 环境或安装额外依赖**。只需下载对应的懒人包即可一键启动核心服务。\n\n## 2. 安装步骤\n\n### 第一步：克隆项目代码\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Ffeima09\u002FGMTalker.git\ncd GMTalker\n```\n\n### 第二步：下载并启动核心 AI 服务\n你需要分别下载语音识别（FunASR）和语音合成（MeloTTS）的离线包。国内用户可使用以下方案：\n\n1. **下载 FunASR 语音识别包**\n   - 下载地址：[GitHub Release](https:\u002F\u002Fgithub.com\u002F1m1ng\u002FFunASR\u002Freleases\u002Fdownload\u002FComplete-Package\u002FFunASR.7z)\n   - 解压后，双击运行 `run_server_2pass.bat` (Windows) 或执行对应脚本启动服务。\n\n2. **下载 MeloTTS 语音合成包**\n   - 下载地址：[GitHub Release](https:\u002F\u002Fgithub.com\u002F1m1ng\u002FMeloTTS\u002Freleases\u002Fdownload\u002FComplete-Package\u002FMeloTTS.7z)\n   - 解压后，双击运行 `start.bat` (Windows) 或执行对应脚本启动服务。\n\n### 第三步：启动 WebUI 管理界面\n返回项目根目录，根据系统选择启动命令：\n\n**Windows:**\n```bash\nwebui.bat\n```\n\n**Linux:**\n```bash\nchmod +x webui.sh\n.\u002Fwebui.sh\n```\n\n**Docker 部署（推荐）:**\n```bash\ndocker run -d \\\n  --name gmtalker \\\n  -p 5002:5002 -p 7860:7860 \\\n  -v $(pwd)\u002Fconfigs:\u002Fapp\u002Fconfigs \\\n  huiji2333\u002Fgmtalker:latest\n```\n\n## 3. 基本使用\n\n### 访问服务\n启动成功后，在浏览器中打开以下地址：\n- **主服务接口**：`http:\u002F\u002F127.0.0.1:5002`\n- **Web 配置界面**：`http:\u002F\u002F127.0.0.1:7860`\n  > 在 Web 配置界面中可调整模型参数、角色设置及知识库连接。\n\n### 运行 3D 数字人客户端\n要看到完整的 3D 卡通形象交互效果，需下载编译好的 UE5 执行程序：\n\n- **Windows 版**：\n  - [Google Drive 下载](https:\u002F\u002Fdrive.google.com\u002Ffile\u002Fd\u002F1EO_E33blpLcKop6l1Ta5-PQTCtZVYxLu\u002Fview?usp=sharing)\n  - [百度网盘下载](https:\u002F\u002Fpan.baidu.com\u002Fs\u002F1WbiLS5wyGss_JvUet5mx_g?pwd=w2sb) (提取码: w2sb)\n- **Linux 版**：\n  - [Google Drive 下载](https:\u002F\u002Fdrive.google.com\u002Ffile\u002Fd\u002F1ZpKmLAm2yiKJT_4tPzX4VGv7_RNoWNx5\u002Fview?usp=sharing)\n  - [百度网盘下载](https:\u002F\u002Fpan.baidu.com\u002Fs\u002F1adBv9ZYMC5pBhPckaVHJJg?pwd=kit5) (提取码: kit5)\n\n下载并解压后，直接运行 `GLM3.exe` (Windows) 或对应 Linux 可执行文件。程序将自动连接本地后端服务，此时你对着麦克风说话，屏幕上的 3D 角色（如“光墩墩”）将进行实时语音对话、唇形同步及肢体动作反馈。\n\n> **提示**：确保第二步中的 FunASR 和 MeloTTS 服务以及第三步的 WebUI 均在运行状态下，客户端才能正常交互。","某连锁银行计划在智能柜台部署 3D 数字人客服，以替代传统文字菜单，提供面对面的业务引导与咨询。\n\n### 没有 GMTalker 时\n- **硬件门槛极高**：传统高精度数字人方案通常依赖昂贵的高端显卡，单节点显存占用往往超过 8GB，导致银行大量旧款终端无法利旧，硬件升级成本巨大。\n- **交互延迟明显**：语音识别、大模型思考与口型驱动分属不同模块，串联处理导致响应延迟高达数秒，用户提问后需长时间等待，体验割裂且不自然。\n- **部署维护复杂**：各组件依赖环境繁琐，跨平台（如从服务器迁移到安卓平板）时需重新适配代码，运维团队难以快速批量上线。\n- **缺乏情感表达**：现有方案多为简单的“念稿式”播报，嘴型与声音匹配度低，且无法根据语境展现微表情，难以建立用户信任感。\n\n### 使用 GMTalker 后\n- **低成本广泛覆盖**：GMTalker 仅需 2GB 显存即可流畅运行，并支持华为昇腾 NPU 甚至纯 CPU 模式，银行可直接在现有的老旧终端和安卓平板上快速部署，大幅降低硬件投入。\n- **毫秒级实时互动**：系统集成了端到端的离线流式对话能力，实现了语音唤醒、打断及毫秒级响应，用户可随时插话，交流过程如真人般流畅自然。\n- **一键跨平台落地**：基于统一代码库，GMTalker 支持 Windows、Linux 和安卓快速部署，配合新增的 Docker 后端支持，运维人员可在一天内完成数百个网点的系统更新。\n- **生动拟人化服务**：内置的高精度唇形驱动与微表情系统，让数字人能随语调变化展现丰富神态，精准匹配银行业务场景，显著提升了服务的亲和力与专业度。\n\nGMTalker 通过极致的轻量化架构与全链路实时交互能力，让高质量 3D 数字人得以低成本、大规模地走进真实的金融营业大厅。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffeima09_GMTalker_241844ca.png","feima09",null,"https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Ffeima09_c5d4b098.png","https:\u002F\u002Fgithub.com\u002Ffeima09",[77,81,85,89,92,95,98],{"name":78,"color":79,"percentage":80},"Python","#3572A5",99.8,{"name":82,"color":83,"percentage":84},"Shell","#89e051",0.1,{"name":86,"color":87,"percentage":88},"PowerShell","#012456",0,{"name":90,"color":91,"percentage":88},"Batchfile","#C1F12E",{"name":93,"color":94,"percentage":88},"Nushell","#4E9906",{"name":96,"color":97,"percentage":88},"Dockerfile","#384d54",{"name":99,"color":100,"percentage":88},"HTML","#e34c26",1190,48,"2026-04-07T03:54:04","NOASSERTION","Windows, Linux, Android","非必需（支持 CPU 模式）。若使用 GPU，推荐 NVIDIA GPU（支持 CUDA），最低显存 2GB；也支持华为昇腾 NPU 910B。","8GB+",{"notes":109,"python":110,"dependencies":111},"该项目客户端无需 GPU 即可运行，最低仅需 2GB 显存即可在普通笔记本上流畅运行整个项目。支持一键启动脚本（webui.bat\u002Fsh），也可通过 Docker 部署。语音识别和合成服务需单独下载懒人包运行。许可证为 CC BY-NC 4.0，仅限非商业用途。","未说明",[112,113,114,115],"FunASR (基于 ONNX Runtime)","MeloTTS (基于 torch_npu)","Unreal Engine 5 (用于渲染)","Docker (可选部署方式)",[14,52,15,13,16,117],"其他",[119,120,121,122,123],"3dhuman","ai","api","python","andorid","2026-03-27T02:49:30.150509","2026-04-11T18:31:40.164661",[127,132,137,142,147,152],{"id":128,"question_zh":129,"answer_zh":130,"source_url":131},22585,"TTS 测试失败，提示“由于目标计算机积极拒绝，无法连接”怎么办？","该错误通常是因为端口配置不匹配。TTS 默认部署在 8001 端口，但如果您使用的是 GPT-SOVITS 模块，其默认端口可能是 9872。请检查并修改配置文件中的端口设置，确保 GMTalker 连接的端口与 TTS 服务实际运行的端口一致。此外，也可以下载专用的懒人包（MeloTTS），下载地址：https:\u002F\u002Fgithub.com\u002F1m1ng\u002FMeloTTS\u002Freleases\u002Fdownload\u002FComplete-Package\u002FMeloTTS.7z，并仔细阅读 README 文档进行正确部署。","https:\u002F\u002Fgithub.com\u002Ffeima09\u002FGMTalker\u002Fissues\u002F17",{"id":133,"question_zh":134,"answer_zh":135,"source_url":136},22586,"导入 Audio2Face 模型时一直失败报错，如何解决？","首先建议检查网络连接问题，尝试多次重新导入。需要注意的是，开发团队计划弃用资源占用较大的 Audio2Face 方案，转而采用轻量级唇形同步技术以提升效率。如果问题持续，可能是因为该方案即将被淘汰或存在兼容性波动，建议关注后续版本更新或尝试网络环境较好的时段操作。","https:\u002F\u002Fgithub.com\u002Ffeima09\u002FGMTalker\u002Fissues\u002F6",{"id":138,"question_zh":139,"answer_zh":140,"source_url":141},22587,"系统是否支持语音打断功能？","当前版本已支持通过唤醒词进行随意打断。用户可以在说话过程中使用预设的唤醒词来中断数字人的当前播报并进行新的交互。","https:\u002F\u002Fgithub.com\u002Ffeima09\u002FGMTalker\u002Fissues\u002F1",{"id":143,"question_zh":144,"answer_zh":145,"source_url":146},22588,"启动 WebUI 后，浏览器访问 5002 端口显示 Not Found 是怎么回事？","5002 端口是后端 API 接口地址，并非用于浏览器直接访问的网页界面。WebUI 前端界面通常运行在 7860 端口（即 http:\u002F\u002Flocalhost:7860\u002F）。请勿直接在浏览器中打开 5002 端口，前端程序会自动在后台与该 API 端口通信。","https:\u002F\u002Fgithub.com\u002Ffeima09\u002FGMTalker\u002Fissues\u002F15",{"id":148,"question_zh":149,"answer_zh":150,"source_url":151},22589,"项目是否支持手机端使用？是否允许商用？","手机端版本已经发布，用户可以下载使用。关于商用许可，具体条款请参考项目仓库中的 LICENSE 文件或联系作者确认，但移动端功能的可用性已得到官方确认。","https:\u002F\u002Fgithub.com\u002Ffeima09\u002FGMTalker\u002Fissues\u002F8",{"id":153,"question_zh":154,"answer_zh":155,"source_url":156},22590,"交流群二维码过期了，如何获取最新的入群方式？","官方会不定期更新交流群的二维码。如果遇到二维码过期的情况，请查看项目 Issue 区最新的回复（如 Issue #16 或 #13 的评论），维护者通常会在那里上传最新的有效二维码图片。","https:\u002F\u002Fgithub.com\u002Ffeima09\u002FGMTalker\u002Fissues\u002F13",[]]