[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-szczyglis-dev--py-gpt":3,"tool-szczyglis-dev--py-gpt":62},[4,18,26,36,46,54],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",160411,2,"2026-04-18T23:33:24",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":42,"last_commit_at":43,"category_tags":44,"status":17},8272,"opencode","anomalyco\u002Fopencode","OpenCode 是一款开源的 AI 编程助手（Coding Agent），旨在像一位智能搭档一样融入您的开发流程。它不仅仅是一个代码补全插件，而是一个能够理解项目上下文、自主规划任务并执行复杂编码操作的智能体。无论是生成全新功能、重构现有代码，还是排查难以定位的 Bug，OpenCode 都能通过自然语言交互高效完成，显著减少开发者在重复性劳动和上下文切换上的时间消耗。\n\n这款工具专为软件开发者、工程师及技术研究人员设计，特别适合希望利用大模型能力来提升编码效率、加速原型开发或处理遗留代码维护的专业人群。其核心亮点在于完全开源的架构，这意味着用户可以审查代码逻辑、自定义行为策略，甚至私有化部署以保障数据安全，彻底打破了传统闭源 AI 助手的“黑盒”限制。\n\n在技术体验上，OpenCode 提供了灵活的终端界面（Terminal UI）和正在测试中的桌面应用程序，支持 macOS、Windows 及 Linux 全平台。它兼容多种包管理工具，安装便捷，并能无缝集成到现有的开发环境中。无论您是追求极致控制权的资深极客，还是渴望提升产出的独立开发者，OpenCode 都提供了一个透明、可信",144296,1,"2026-04-16T14:50:03",[13,45],"插件",{"id":47,"name":48,"github_repo":49,"description_zh":50,"stars":51,"difficulty_score":32,"last_commit_at":52,"category_tags":53,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",109154,"2026-04-18T11:18:24",[14,15,13],{"id":55,"name":56,"github_repo":57,"description_zh":58,"stars":59,"difficulty_score":32,"last_commit_at":60,"category_tags":61,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[45,13,15,14],{"id":63,"github_repo":64,"name":65,"description_en":66,"description_zh":67,"ai_summary_zh":68,"readme_en":69,"readme_zh":70,"quickstart_zh":71,"use_case_zh":72,"hero_image_url":73,"owner_login":74,"owner_name":75,"owner_avatar_url":76,"owner_bio":77,"owner_company":78,"owner_location":79,"owner_email":78,"owner_twitter":78,"owner_website":80,"owner_url":81,"languages":82,"stars":102,"forks":103,"last_commit_at":104,"license":105,"difficulty_score":32,"env_os":106,"env_gpu":107,"env_ram":108,"env_deps":109,"category_tags":121,"github_topics":122,"view_count":32,"oss_zip_url":78,"oss_zip_packed_at":78,"status":17,"created_at":143,"updated_at":144,"faqs":145,"releases":175},9508,"szczyglis-dev\u002Fpy-gpt","py-gpt","Desktop AI Assistant powered by GPT-5, GPT-4, o1, o3, Gemini, Claude, Ollama, DeepSeek, Perplexity, Grok, Bielik, chat, vision, voice, RAG, image and video generation, agents, tools, MCP, plugins, speech synthesis and recognition, web search, memory, presets, assistants,and more. Linux, Windows, Mac","py-gpt 是一款功能全面的桌面级 AI 助手，旨在将强大的大语言模型能力直接带入用户的本地电脑。它支持 Windows、Linux 和 Mac 系统，让用户无需依赖网页端，即可在本地环境中流畅使用 GPT-4\u002F5、o1、Claude、Gemini、Grok 以及通过 Ollama 运行的本地模型（如 Llama 3、DeepSeek）。\n\n这款工具解决了用户在多模型切换、数据隐私保护及复杂任务自动化方面的痛点。它不仅提供基础的聊天对话，更集成了“文件对话”（基于 LlamaIndex 的 RAG 技术）、代码生成与执行、系统命令控制、网络搜索、图像视频生成以及语音交互等高级功能。用户可以让 AI 读取本地文档、自动执行脚本，甚至通过插件扩展无限可能。\n\npy-gpt 非常适合开发者、研究人员、数据分析师以及追求高效工作流的普通用户。对于技术人员，它是调试代码和管理本地模型的得力帮手；对于需要处理大量文档的研究者，其知识库检索功能能极大提升效率；而丰富的语音和多模态特性，也让它成为日常办公的理想伴侣。其独特的亮点在于高度模块化的插件系统与对多种前后端模型的统一支持，真正实现了“一个界","py-gpt 是一款功能全面的桌面级 AI 助手，旨在将强大的大语言模型能力直接带入用户的本地电脑。它支持 Windows、Linux 和 Mac 系统，让用户无需依赖网页端，即可在本地环境中流畅使用 GPT-4\u002F5、o1、Claude、Gemini、Grok 以及通过 Ollama 运行的本地模型（如 Llama 3、DeepSeek）。\n\n这款工具解决了用户在多模型切换、数据隐私保护及复杂任务自动化方面的痛点。它不仅提供基础的聊天对话，更集成了“文件对话”（基于 LlamaIndex 的 RAG 技术）、代码生成与执行、系统命令控制、网络搜索、图像视频生成以及语音交互等高级功能。用户可以让 AI 读取本地文档、自动执行脚本，甚至通过插件扩展无限可能。\n\npy-gpt 非常适合开发者、研究人员、数据分析师以及追求高效工作流的普通用户。对于技术人员，它是调试代码和管理本地模型的得力帮手；对于需要处理大量文档的研究者，其知识库检索功能能极大提升效率；而丰富的语音和多模态特性，也让它成为日常办公的理想伴侣。其独特的亮点在于高度模块化的插件系统与对多种前后端模型的统一支持，真正实现了“一个界面，掌控所有 AI 能力”。","# PyGPT - Desktop AI Assistant\n\n[![pygpt](https:\u002F\u002Fsnapcraft.io\u002Fpygpt\u002Fbadge.svg)](https:\u002F\u002Fsnapcraft.io\u002Fpygpt)\n\nRelease: **2.7.12** | build: **2026-02-06** | Python: **>=3.10, \u003C3.14**\n\n> Official website: https:\u002F\u002Fpygpt.net | Documentation: https:\u002F\u002Fpygpt.readthedocs.io\n> \n> Discord: https:\u002F\u002Fpygpt.net\u002Fdiscord | Snap: https:\u002F\u002Fsnapcraft.io\u002Fpygpt | PyPi: https:\u002F\u002Fpypi.org\u002Fproject\u002Fpygpt-net\n> \n> Compiled version for Linux (`zip`) and Windows 10\u002F11 (`msi`) 64-bit: https:\u002F\u002Fpygpt.net\u002F#download\n> \n> ❤️ Donate: https:\u002F\u002Fwww.buymeacoffee.com\u002Fszczyglis | https:\u002F\u002Fgithub.com\u002Fsponsors\u002Fszczyglis-dev\n\n## Overview\n\n**PyGPT** is **all-in-one** Desktop AI Assistant that provides direct interaction with OpenAI language models, including `GPT-5`, `GPT-4`, `o1`, `o3` and more, through the `OpenAI API`. By utilizing other SDKs and `LlamaIndex`, the application also supports alternative LLMs, like those available on `HuggingFace`, locally available models via `Ollama` (like `gpt-oss`, `Llama 3`,`Mistral`, `DeepSeek V3\u002FR1` or `Bielik`), and other models like `Google Gemini`, `Anthropic Claude`, `Perplexity \u002F Sonar`, and `xAI Grok`.\n\nThis assistant offers multiple modes of operation such as chat, assistants, agents, completions, and image-related tasks like image generation and image analysis. **PyGPT** has filesystem capabilities for file I\u002FO, can generate and run Python code, execute system commands, execute custom commands and manage file transfers. It also allows models to perform web searches with the `DuckDuckGo`, `Google` and `Microsoft Bing`.\n\nFor audio interactions, **PyGPT** includes speech synthesis using the `Microsoft Azure`, `Google`, `Eleven Labs` and `OpenAI` Text-To-Speech services. Additionally, it features speech recognition capabilities provided by `OpenAI Whisper`, `Google` and `Bing` enabling the application to understand spoken commands and transcribe audio inputs into text. It features context memory with save and load functionality, enabling users to resume interactions from predefined points in the conversation. Prompt creation and management are streamlined through an intuitive preset system.\n\n**PyGPT**'s functionality extends through plugin support, allowing for custom enhancements (with multiple plugins included). Its multi-modal capabilities make it an adaptable tool for a range of AI-assisted operations, such as text-based interactions, system automation, daily assisting, vision applications, natural language processing, code generation and image creation.\n\nMultiple operation modes are included, such as chat, text completion, assistant, agents, vision, Chat with Files (via `LlamaIndex`), commands execution, external API calls and image generation, making **PyGPT** a multi-tool for many AI-driven tasks.\n\n**Showcase** (mp4, version `2.5.65`, build `2025-07-24`):\n\nhttps:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Fassets\u002Fd8305109-8b1b-41cb-b3ba-8c654271a95c\n\n**Screenshots** (version `2.5.64`, build `2025-07-23`):\n\nDark theme:\n![v2_main](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_af3a500512c8.png)\n\nLight theme:\n![v2_light](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_6924a06d1216.png)\n\nYou can download compiled 64-bit versions for Windows and Linux here: https:\u002F\u002Fpygpt.net\u002F#download\n\n## Features\n\n- Desktop AI Assistant for `Linux`, `Windows` and `Mac`, written in Python.\n- Works similarly to `ChatGPT`, but locally (on a desktop computer).\n- 11 modes of operation: Chat, Chat with Files, Realtime + audio, Research (Perplexity), Completion, Image and Video generation, Assistants, Experts, Computer use, Agents and Autonomous Mode.\n- Supports multiple models like `OpenAI GPT-5`, `GPT-4`, `o1`, `o3`, `o4`, `Google Gemini`, `Anthropic Claude`, `xAI Grok`, `DeepSeek V3\u002FR1`, `Perplexity \u002F Sonar`, and any model accessible through `LlamaIndex` and `Ollama` such as `DeepSeek`, `gpt-oss`, `Llama 3`, `Mistral`, `Bielik`, etc.\n- Chat with your own Files: integrated `LlamaIndex` support: chat with data such as: `txt`, `pdf`, `csv`, `html`, `md`, `docx`, `json`, `epub`, `xlsx`, `xml`, webpages, `Google`, `GitHub`, video\u002Faudio, images and other data types, or use conversation history as additional context provided to the model.\n- Built-in vector databases support and automated files and data embedding.\n- Image generation via models like `DALL-E`, `gpt-image`, `Imagen`, `Gemini`, and `Nano Banana`.\n- Video generation via models like `Veo3` and `Sora2`.\n- Internet access via `DuckDuckGo`, `Google` and `Microsoft Bing`.\n- Speech synthesis via `Microsoft Azure`, `Google`, `Eleven Labs` and `OpenAI` Text-To-Speech services.\n- Speech recognition via `OpenAI Whisper`, `Google` and `Microsoft Speech Recognition`.\n- Plugins support with built-in plugins like `Files I\u002FO`, `Code Interpreter`, `Web Search`, `Google`, `Facebook`, `X\u002FTwitter`, `Slack`, `Telegram`, `GitHub`, `MCP`, and many more.\n- MCP support.\n- Real-time video camera capture in Vision mode.\n- Image analysis via vision models.\n- Included support features for individuals with disabilities: customizable keyboard shortcuts, voice control, and translation of on-screen actions into audio via speech synthesis.\n- Handles and stores the full context of conversations (short and long-term memory).\n- Integrated calendar, day notes and search in contexts by selected date.\n- Tools and commands execution (via plugins: access to the local filesystem, Python Code Interpreter, system commands execution, and more).\n- Custom commands creation and execution.\n- Crontab \u002F Task scheduler included.\n- Built-in real-time Python Code Interepreter.\n- Manages files and attachments with options to upload, download, and organize.\n- Context history with the capability to revert to previous contexts (long-term memory).\n- Allows you to easily manage prompts with handy editable presets.\n- Provides an intuitive operation and interface.\n- Includes a notepad.\n- Includes simple painter \u002F drawing tool.\n- Includes an node-based Agents Builder.\n- Supports multiple languages.\n- Requires no previous knowledge of using AI models.\n- Fully configurable.\n- Themes support.\n- Real-time code syntax highlighting.\n- Built-in token usage calculation.\n- Possesses the potential to support future OpenAI models.\n- **Open source**; source code is available on `GitHub`.\n- Utilizes the user's own API key.\n- and many more.\n\nThe application is free, open-source, and runs on PCs with `Linux`, `Windows 10`, `Windows 11` and `Mac`. \nFull Python source code is available on `GitHub`.\n\n**PyGPT uses the user's API key  -  to use the GPT models, \nyou must have a registered OpenAI account and your own API key. Local models do not require any API keys.**\n\nYou can also use built-it LlamaIndex support to connect to other Large Language Models (LLMs), \nsuch as those on HuggingFace. Additional API keys may be required.\n\n# Installation\n\n## Binaries (Linux, Windows 10 and 11)\n\nYou can download compiled binary versions for `Linux` and `Windows` (10\u002F11). \n\n**PyGPT** binaries require a PC with Windows 10, 11, or Linux. Simply download the installer or the archive with the appropriate version from the download page at https:\u002F\u002Fpygpt.net, extract it, or install it, and then run the application. A binary version for Mac is not available, so you must run PyGPT from PyPi or from the source code on Mac. Currently, only 64-bit binaries are available.\n\nLinux version requires `GLIBC` >= `2.35`.\n\n## Microsoft Store (Windows)\n\nFor Windows 10\u002F11, you can install **PyGPT** directly from Microsoft Store:\n\n[![Get it from Microsoft Store](https:\u002F\u002Fget.microsoft.com\u002Fimages\u002Fen-us%20dark.svg)](https:\u002F\u002Fapps.microsoft.com\u002Fdetail\u002FXP99R4MX3X65VQ)\n\nLink to MS Store: https:\u002F\u002Fapps.microsoft.com\u002Fdetail\u002FXP99R4MX3X65VQ\n\n## AppImage (Linux)\n\nYou can download the latest **PyGPT** `AppImage` for Linux from the release page:\n\n**Releases:** https:\u002F\u002Fgithub.com\u002Fszczyglis-dev\u002Fpy-gpt\u002Freleases\n\n**Tip:** Remember to give execution permissions to the downloaded file:\n\n```chmod +x .\u002FPyGPT-X.X.X-x86_64.AppImage```\n\nTo manage future updates you can use `AppImageUpdate` tool:\n\nYou can download it from: https:\u002F\u002Fgithub.com\u002FAppImage\u002FAppImageUpdate\u002Freleases\n\nAfter downloading, run the following command in terminal:\n\n```appimageupdatetool .\u002FPyGPT-X.X.X-x86_64.AppImage```\n\n## Snap Store (Linux)\n\nYou can install **PyGPT** directly from Snap Store:\n\n```commandline\nsudo snap install pygpt\n```\n\nTo manage future updates use:\n\n```commandline\nsudo snap refresh pygpt\n```\n\n[![Get it from the Snap Store](https:\u002F\u002Fsnapcraft.io\u002Fstatic\u002Fimages\u002Fbadges\u002Fen\u002Fsnap-store-black.svg)](https:\u002F\u002Fsnapcraft.io\u002Fpygpt)\n\n**Using camera:** to use camera in Snap version you must connect the camera with:\n\n```commandline\nsudo snap connect pygpt:camera\n```\n\n**Using microphone:** to use microphone in Snap version you must connect the microphone with:\n\n```commandline\nsudo snap connect pygpt:audio-record :audio-record\nsudo snap connect pygpt:alsa\n```\n\n**Using audio output:** to use audio output in Snap version you must connect the audio with:\n\n```commandline\nsudo snap connect pygpt:audio-playback\nsudo snap connect pygpt:alsa\n```\n\n**Connecting IPython in Docker in Snap version**:\n\nTo use IPython in the Snap version, you must connect PyGPT to the Docker daemon:\n\n```commandline\nsudo snap connect pygpt:docker-executables docker:docker-executables\n```\n\n````commandline\nsudo snap connect pygpt:docker docker:docker-daemon\n````\n\n## PyPi (pip)\n\nThe application can also be installed from `PyPi` using `pip install`:\n\n1. Create virtual environment:\n\n```commandline\npython3 -m venv venv\nsource venv\u002Fbin\u002Factivate\n```\n\n2. Install from PyPi:\n\n``` commandline\npip install pygpt-net\n```\n\n3. Once installed run the command to start the application:\n\n``` commandline\npygpt\n```\n\n## Running from GitHub source code\n\nAn alternative method is to download the source code from `GitHub` and execute the application using the Python interpreter (`>=3.10`, `\u003C3.14`). \n\n### Install with pip\n\n1. Clone git repository or download .zip file:\n\n```commandline\ngit clone https:\u002F\u002Fgithub.com\u002Fszczyglis-dev\u002Fpy-gpt.git\ncd py-gpt\n```\n\n2. Create a new virtual environment:\n\n```commandline\npython3 -m venv venv\nsource venv\u002Fbin\u002Factivate\n```\n\n3. Install requirements:\n\n```commandline\npip install -r requirements.txt\n```\n\n4. Run the application:\n\n```commandline\npython3 run.py\n```\n\n### Install with Poetry\n\n1. Clone git repository or download .zip file:\n\n```commandline\ngit clone https:\u002F\u002Fgithub.com\u002Fszczyglis-dev\u002Fpy-gpt.git\ncd py-gpt\n```\n\n2. Install Poetry (if not installed):\n\n```commandline\npip install poetry\n```\n\n3. Create a new virtual environment that uses Python 3.10:\n\n```commandline\npoetry env use python3.10\npoetry shell\n```\n\nor (Poetry >= 2.0):\n\n```commandline\npoetry env use python3.10\npoetry env activate\n```\n\n4. Install requirements:\n\n```commandline\npoetry install\n```\n\n5. Run the application:\n\n```commandline\npoetry run python3 run.py\n```\n\n**Tip**: you can use `PyInstaller` to create a compiled version of\nthe application for your system (required version >= `6.0.0`).\n\n### Troubleshooting\n\nIf you have a problems with `xcb` plugin with newer versions of PySide on Linux, e.g. like this:\n\n```commandline\nqt.qpa.plugin: Could not load the Qt platform plugin \"xcb\" in \"\" even though it was found.\nThis application failed to start because no Qt platform plugin could be initialized. \nReinstalling the application may fix this problem.\n```\n\n...then install `libxcb`:\n\n```commandline\nsudo apt install libxcb-cursor0\n```\n\nIf you have a problems with audio on Linux, then try to install `portaudio19-dev` and\u002For `libasound2`:\n\n```commandline\nsudo apt install portaudio19-dev\n```\n\n```commandline\nsudo apt install libasound2\nsudo apt install libasound2-data \nsudo apt install libasound2-plugins\n```\n\n**Problems with GLIBC on Linux**\n\nIf you encounter error: \n\n```commandline\nError loading Python lib libpython3.10.so.1.0: dlopen: \u002Flib\u002Fx86_64-linux-gnu\u002Flibm.so.6: version GLIBC_2.35 not found (required by libpython3.10.so.1.0)\n```\nwhen trying to run the compiled version for Linux, try updating GLIBC to version `2.35`, or use a newer operating system that has at least version `2.35` of GLIBC.\n\n**Access to camera in Snap version:**\n\n\n```commandline\nsudo snap connect pygpt:camera\n```\n\n**Access to microphone in Snap version:**\n\nTo use microphone in Snap version you must connect the microphone with:\n\n```commandline\nsudo snap connect pygpt:audio-record :audio-record\n```\n\n**Snap and AppArmor permission denied**\n\nSnap installs AppArmor profiles for each application by default. The profile for PyGPT is created at:\n\n`\u002Fvar\u002Flib\u002Fsnapd\u002Fapparmor\u002Fprofiles\u002Fsnap.pygpt.pygpt`\n\nThe application should work with the default profile; however, if you encounter errors like:\n\n`PermissionError: [Errno 13] Permission denied: '\u002Fetc\u002Fhttpd\u002Fconf\u002Fmime.types'`\n\nadd the appropriate access rules to the profile file, for example:\n\n```\n# \u002Fvar\u002Flib\u002Fsnapd\u002Fapparmor\u002Fprofiles\u002Fsnap.pygpt.pygpt\n\n...\n\n\u002Fetc\u002Fhttpd\u002Fconf\u002Fmime.types r\n```\n\nand reload the profiles.\n\nAlternatively, you can try removing snap and reinstalling it:\n\n`sudo snap remove --purge pygpt`\n\n`sudo snap install pygpt`\n\n\n**Access to a microphone and audio in Windows version:**\n\nIf you have a problems with audio or a microphone in the non-binary PIP\u002FPython version on Windows, check to see if FFmpeg is installed. If it's not, install it and add it to the PATH. You can find a tutorial on how to do this here: https:\u002F\u002Fphoenixnap.com\u002Fkb\u002Fffmpeg-windows. The binary version already includes FFmpeg.\n\n**Windows and VC++ Redistributable**\n\nOn Windows, the proper functioning requires the installation of the `VC++ Redistributable`, which can be found on the Microsoft website:\n\nhttps:\u002F\u002Flearn.microsoft.com\u002Fen-us\u002Fcpp\u002Fwindows\u002Flatest-supported-vc-redist\n\nThe libraries from this environment are used by `PySide6` - one of the base packages used by PyGPT. \nThe absence of the installed libraries may cause display errors or completely prevent the application from running.\n\nIt may also be necessary to add the path `C:\\path\\to\\venv\\Lib\\python3.x\\site-packages\\PySide6` to the `PATH` variable.\n\n**WebEngine\u002FChromium renderer and OpenGL problems**\n\nIf you have a problems with `WebEngine \u002F Chromium` renderer you can force the legacy mode by launching the app with command line arguments:\n\n``` ini\npython3 run.py --legacy=1\n```\n\nand to force disable OpenGL hardware acceleration:\n\n``` ini\npython3 run.py --disable-gpu=1\n```\n\nYou can also manualy enable legacy mode by editing config file - open the `%WORKDIR%\u002Fconfig.json` config file in editor and set the following options:\n\n``` json\n\"render.engine\": \"legacy\",\n\"render.open_gl\": false,\n```\n\n## Other requirements\n\nFor operation, an internet connection is needed (for API connectivity), a registered OpenAI account, \nand an active API key that must be input into the program. Local models, such as `Llama3` do not require OpenAI account and any API keys.\n\n## Debugging and logging\n\nPlease go to `Debugging and Logging` section for instructions on how to log and diagnose issues in a more detailed manner.\n\n\n# Quick Start\n\n## Setting-up API Key(s)\n\nYou can configure API keys for various providers, such as OpenAI, Anthropic, Google, xAI, Perplexity, OpenRouter, and more. This flexibility allows you to use different providers based on your needs.\n\nDuring the initial setup, configure your API keys within the application.\n\nTo do so, navigate to the menu:\n\n`Config -> Settings -> API Keys`\n\nHere, you can add or manage API keys for any supported provider.\n\n![v2_api_keys](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_bf85d02016ea.png)\n\n**Configuring Provider**\n\n1. **Select the Provider:** Choose a tab with provider.\n2. **Enter the API Key:** Paste the corresponding API key for the selected provider.\n\n**Example**\n\n- **OpenAI:** Obtain your API key by registering on the OpenAI website: https:\u002F\u002Fplatform.openai.com and navigating to https:\u002F\u002Fplatform.openai.com\u002Faccount\u002Fapi-keys.\n- **Anthropic, Google, etc.:** Follow similar steps on their respective platforms.\n\n**Note:** The ability to use models or services depends on your access level with the respective provider. If you wish to use custom API endpoints or local APIs that do not require API keys, simply enter any value into the API key field to bypass prompts about an empty key.\n\n# Work modes\n\n## Chat\n\n**+ Inline Vision and Image generation**\n\nIn **PyGPT**, this mode mirrors `ChatGPT`, allowing you to chat with models like `GPT-5`, `GPT-4`, `o1`, `o3`, `Claude`, `Gemini`, `Grok`, `Perplexity (Sonar)`, `Deepseek`, and more. It works with the OpenAI SDK using the `Responses API` and `ChatCompletions API. You can also use SDKs from Google GenAI, Anthropic, or xAI if the native SDK is enabled. You can set the endpoint for `ChatCompletions in Config -> Settings -> API Keys`.\n\n**Tip:** This mode uses the provider SDK directly. If there's no native client built into the app, models like Sonar, or Llama3 are supported in Chat mode via LlamaIndex or OpenAI-compatible API endpoints. The app automatically switches to these endpoints when using non-OpenAI models. You can enable or disable the use of the native API SDK (per provider) in `Settings -> API Keys`. If the native SDK is disabled, the OpenAI SDK will be used via the compatible ChatCompletions API endpoint.\n\nCurrently built-in native clients:\n\n- Anthropic SDK\n- OpenAI SDK\n- Google GenAI SDK\n- xAI SDK\n\nThe main part of the interface is a chat window where you see your conversations. Below it is a message box for typing. On the right side, you can set up or change the model and system prompt. You can also save these settings as presets to easily switch between models or tasks.\n\nAbove where you type your messages, the interface shows you the number of tokens your message will use up as you type it – this helps to keep track of usage. There is also a feature to attach and upload files in this area. Go to the `Files and Attachments` section for more information on how to use attachments.\n\n![v2_mode_chat](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_970013e68074.png)\n\n**Vision:** If you want to send photos from your disk or images from your camera for analysis, and the selected model does not support Vision, you must enable the `Vision (inline)` plugin in the Plugins menu. This plugin allows you to send photos or images from your camera for analysis in any Chat mode.\n\n![v3_vision_plugins](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_057ad314f863.png)\n\nWith this plugin, you can capture an image with your camera or attach an image and send it for analysis to discuss the photograph:\n\n![v3_vision_chat](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_aabca0bc4da3.png)\n\n**Image generation:** If you want to generate images directly in chat you must enable plugin `Image generation (inline)` in the Plugins menu.\nPlugin allows you to generate images in Chat mode:\n\n![v3_img_chat](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_b624079b3f47.png)\n\n##  Chat with Files (LlamaIndex)\n\nThis mode enables chat interaction with your documents and entire context history through conversation. \nIt seamlessly incorporates `LlamaIndex` into the chat interface, allowing for immediate querying of your indexed documents.\n\n**Tip:** If you do not want to call tools\u002Fcommands, disable the checkbox `+Tools`. It will speed up the response time when using local models. You can also enable the ReAct agent for tool calls in: `Settings -> Indexes \u002F LlamaIndex -> Chat -> Use ReAct agent for Tool calls in Chat with Files mode`. Stream mode is disabled if the ReAct agent and `+Tools` checkbox are active.\n\n**Querying single files**\n\nYou can also query individual files \"on the fly\" using the `query_file` command from the `Files I\u002FO` plugin. This allows you to query any file by simply asking a question about that file. A temporary index will be created in memory for the file being queried, and an answer will be returned from it. From version `2.1.9` similar command is available for querying web and external content: `Directly query web content with LlamaIndex`.\n\n**For example:**\n\nIf you have a file: `data\u002Fmy_cars.txt` with content `My car is red.`\n\nYou can ask for: `Query the file my_cars.txt about what color my car is.`\n\nAnd you will receive the response: `Red`.\n\nNote: this command indexes the file only for the current query and does not persist it in the database. To store queried files also in the standard index you must enable the option `Auto-index readed files` in plugin settings. Remember to enable `+ Tools` checkbox to allow usage of tools and commands from plugins. \n\n**Using Chat with Files mode**\n\nIn this mode, you are querying the whole index, stored in a vector store database.\nTo start, you need to index (embed) the files you want to use as additional context.\nEmbedding transforms your text data into vectors. If you're unfamiliar with embeddings and how they work, check out this article:\n\nhttps:\u002F\u002Fstackoverflow.blog\u002F2023\u002F11\u002F09\u002Fan-intuitive-introduction-to-text-embeddings\u002F\n\nFor a visualization from OpenAI's page, see this picture:\n\n![vectors](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_f7c7e5f55e3b.png)\n\nSource: https:\u002F\u002Fcdn.openai.com\u002Fnew-and-improved-embedding-model\u002Fdraft-20221214a\u002Fvectors-3.svg\n\nTo index your files, simply copy or upload them  into the `data` directory and initiate indexing (embedding) by clicking the `Index all` button, or right-click on a file and select `Index...`. Additionally, you have the option to utilize data from indexed files in any Chat mode by activating the `Chat with Files (LlamaIndex, inline)` plugin.\n\n![v2_idx1](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_4027d34ee466.png)\n\nAfter the file(s) are indexed (embedded in vector store), you can use context from them in chat mode:\n\n![v2_idx2](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_8d502c2a5c4e.png)\n\nBuilt-in file loaders: \n\n**Files:**\n\n- CSV files (csv)\n- Epub files (epub)\n- Excel .xlsx spreadsheets (xlsx)\n- HTML files (html, htm)\n- IPYNB Notebook files (ipynb)\n- Image (vision) (jpg, jpeg, png, gif, bmp, tiff, webp)\n- JSON files (json)\n- Markdown files (md)\n- PDF documents (pdf)\n- Plain-text files (txt)\n- Video\u002Faudio (mp4, avi, mov, mkv, webm, mp3, mpeg, mpga, m4a, wav)\n- Word .docx documents (docx)\n- XML files (xml)\n\n**Web\u002Fexternal content:**\n\n- Bitbucket\n- ChatGPT Retrieval Plugin\n- GitHub Issues\n- GitHub Repository\n- Google Calendar\n- Google Docs\n- Google Drive \n- Google Gmail\n- Google Keep\n- Google Sheets\n- Microsoft OneDrive\n- RSS\n- SQL Database\n- Sitemap (XML)\n- Twitter\u002FX posts\n- Webpages (crawling any webpage content)\n- YouTube (transcriptions)\n\nYou can configure data loaders in `Settings \u002F Indexes \u002F LlamaIndex \u002F Data Loaders` by providing list of keyword arguments for specified loaders.\nYou can also develop and provide your own custom loader and register it within the application.\n\nLlamaIndex is also integrated with context database - you can use data from database (your context history) as additional context in discussion. \nOptions for indexing existing context history or enabling real-time indexing new ones (from database) are available in `Settings \u002F Indexes \u002F LlamaIndex` section.\n\n**WARNING:** remember that when indexing content, API calls to the embedding model are used. Each indexing consumes additional tokens. Always control the number of tokens used on the provider's page.\n\n**Tip:** Using the Chat with Files mode, you have default access to files manually indexed from the \u002Fdata directory. However, you can use additional context by attaching a file - such additional context from the attachment does not land in the main index, but only in a temporary one, available only for the given conversation.\n\n**Token limit:** When you use `Chat with Files` in non-query mode, LlamaIndex adds extra context to the system prompt. If you use a plugins (which also adds more instructions to system prompt), you might go over the maximum number of tokens allowed. If you get a warning that says you've used too many tokens, turn off plugins you're not using or turn off the \"+ Tools\" option to reduce the number of tokens used by the system prompt.\n\n**Available vector stores** (provided by `LlamaIndex`):\n\n```\n- ChromaVectorStore\n- ElasticsearchStore\n- PinecodeVectorStore\n- QdrantVectorStore\n- RedisVectorStore\n- SimpleVectorStore\n```\n\nYou can configure selected vector store by providing config options like `api_key`, etc. in `Settings -> LlamaIndex` window. See the section: `Configuration \u002F Vector stores` for configuration reference.\n\n\n**Configuring data loaders**\n\nIn the `Settings -> LlamaIndex -> Data loaders` section you can define the additional keyword arguments to pass into data loader instance. See the section: `Configuration \u002F Data Loaders` for configuration reference.\n\n\n## Chat with Audio\n\nThis mode works like the Chat mode but with native support for audio input and output using a Realtime and Live APIs. In this mode, audio input and output are directed to and from the model directly, without the use of external plugins. This enables faster and better audio communication.\n\nCurrently, in beta. \n\nAt this moment, only OpenAI real-time models (via the Realtime API) and Google Gemini real-time models (via the Live API) are supported.\n\n## Research\n\nThis mode (when using Sonar and R1 models) operates using the Perplexity API: https:\u002F\u002Fperplexity.ai.\n\nIt allows for deep web searching and utilizes Sonar models, available in `Perplexity AI`.\n\nIt requires a Perplexity API key, which can be generated at: https:\u002F\u002Fperplexity.ai.\n\nFrom version `2.5.27` also OpenAI deep-research models are available in this mode.\n\n## Completion\n\nAn older mode of operation that allows working in the standard text completion mode. However, it allows for a bit more flexibility with the text by enabling you to initiate the entire discussion in any way you like.\n\nSimilar to chat mode, on the right-hand side of the interface, there are convenient presets. These allow you to fine-tune instructions and swiftly transition between varied configurations and pre-made prompt templates.\n\nAdditionally, this mode offers options for labeling the AI and the user, making it possible to simulate dialogues between specific characters - for example, you could create a conversation between Batman and the Joker, as predefined in the prompt. This feature presents a range of creative possibilities for setting up different conversational scenarios in an engaging and exploratory manner.\n\nFrom version `2.0.107` the `davinci` models are deprecated and has been replaced with `gpt-3.5-turbo-instruct` model in Completion mode.\n\n## Image and video generation\n\n**PyGPT** enables quick and easy image creation with models like `DALL-E 3`, `gpt-image-1`, `Imagen 3\u002F4`, and `Nano Banana`, as well as video generation using `Veo3` and `Sora2`.\nGenerating images and videos is akin to a chat conversation  -  a user's prompt triggers the generation, followed by downloading, saving to the computer, and displaying the image onscreen. You can send raw prompt to the model in `Image generation` mode or ask the model for the best prompt.\n\n![v3_img](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_b54dd1d338f4.png)\n\nImage generation using image models is also available in every mode via plugin `Image Generation (inline)`. Just ask any model, in any mode, like e.g. GPT or Gemini to generate an image and it will do it inline, without need to mode change.\n\nIf you want to generate images directly in chat you must enable plugin **Image generation (inline)** in the Plugins menu.\nPlugin allows you to generate images in Chat mode:\n\n![v3_img_chat](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_b624079b3f47.png)\n\nFor OpenAI models, you can also enable remote image generation in `Config -> Settings -> Remote Tools`. If enabled, image generation will be available natively within the conversation, without plugins, in Chat mode.\n\nTo use `Imagen` models you must enable `VertexAI` in `Config -> Settings -> API Keys -> Google -> Advanced options`.\n\n### Remix, Edit, or Extend\n\nTo remix or extend from a previous image or video instead of creating a new one from scratch, enable the `Remix\u002FExtend` option checkbox in the toolbox. The last generated image or video in the current context will be used as a reference for your prompt, allowing you to request changes to the generated content. If the `Remix\u002FExtend` option is enabled, uploading an image attachment as a reference will not take effect.\n\n### Raw mode\n\nThere is an option for switching prompt generation mode.\n\nIf **Raw Mode** is enabled, a model will receive the prompt exactly as you have provided it.\nIf **Raw Mode** is disabled, a model will generate the best prompt for you based on your instructions.\n\n### Image storage\n\nOnce you've generated an image, you can easily save it anywhere on your disk by right-clicking on it. \nYou also have the options to delete it or view it in full size in your web browser.\n\n**Tip:** Use presets to save your prepared prompts. \nThis lets you quickly use them again for generating new images later on.\n\nThe app keeps a history of all your prompts, allowing you to revisit any session and reuse previous \nprompts for creating new images.\n\nImages are stored in ``img`` directory in **PyGPT** user data folder.\n\n## Assistants\n\nThis mode uses the OpenAI's **Assistants API**.\n\nThis mode expands on the basic chat functionality by including additional external tools like a `Code Interpreter` for executing code, `Retrieval Files` for accessing files, and custom `Functions` for enhanced interaction and integration with other APIs or services. In this mode, you can easily upload and download files. **PyGPT** streamlines file management, enabling you to quickly upload documents and manage files created by the model.\n\nSetting up new assistants is simple - a single click is all it takes, and they instantly sync with the `OpenAI API`. Importing assistants you've previously created with OpenAI into **PyGPT** is also a seamless process.\n\n![v2_mode_assistant](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_d1604fab5061.png)\n\nIn Assistant mode you are allowed to storage your files in remote vector store (per Assistant) and manage them easily from app:\n\n![v2_mode_assistant_upload](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_e0f7574874a1.png)\n\nPlease note that token usage calculation is unavailable in this mode. Nonetheless, file (attachment) \nuploads are supported. Simply navigate to the `Files` tab to effortlessly manage files and attachments which \ncan be sent to the OpenAI API.\n\n### Vector stores (via Assistants API)\n\nAssistant mode supports the use of external vector databases offered by the OpenAI API. This feature allows you to store your files in a database and then search them using the Assistant's API. Each assistant can be linked to one vector database—if a database is linked, all files uploaded in this mode will be stored in the linked vector database. If an assistant does not have a linked vector database, a temporary database is automatically created during the file upload, which is accessible only in the current thread. Files from temporary databases are automatically deleted after 7 days.\n\nTo enable the use of vector stores, enable the `Chat with Files` checkbox in the Assistant settings. This enables the `File search` tool in Assistants API.\n\nTo manage external vector databases, click the DB icon next to the vector database selection list in the Assistant creation and editing window (screen below). In this management window, you can create a new vector database, edit an existing one, or import a list of all existing databases from the OpenAI server:\n\n![v2_assistant_stores](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_1ab57abccf71.png)\n\nYou can define, using `Expire days`, how long files should be automatically kept in the database before deletion (as storing files on OpenAI incurs costs). If the value is set to 0, files will not be automatically deleted.\n\nThe vector database in use will be displayed in the list of uploaded files, on the field to the right—if a file is stored in a database, the name of the database will be displayed there; if not, information will be shown indicating that the file is only accessible within the thread:\n\n![v2_assistant_stores_upload](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_95cb619fe2a1.png)\n\n\n##  Agent (LlamaIndex) \n\nMode that allows the use of agents offered by `LlamaIndex`.\n\nIncludes built-in agents (Workflow):\n\n- FunctionAgent\n- ReAct\n- Structured Planner (sub-tasks)\n- CodeAct (connected to Code Interpreter plugin)\n- Supervisor + worker\n\nIncludes built-in agents (Legacy):\n\n- OpenAI Assistants\n\nIn the future, the list of built-in agents will be expanded.\n\nYou can create your own types (workflows\u002Fpatterns) using the built-in visual node-based editor found in the `Tools -> Agents Builder`.\n\nYou can also create your own agent by creating a new provider that inherits from `pygpt_net.provider.agents.base`.\n\n**Tools and Plugins**  \n\nIn this mode, all commands from active plugins are available (commands from plugins are automatically converted into tools for the agent on-the-fly).\n\n**RAG - using indexes**  \n\nIf an index is selected in the agent preset, a tool for reading data from the index is automatically added to the agent, creating a RAG automatically.\n\nMultimodality is currently unavailable, only text is supported. Vision support will be added in the future.\n\n**Loop \u002F Evaluate Mode**\n\nYou can run the agent in autonomous mode, in a loop, and with evaluation of the current output. When you enable the `Loop \u002F Evaluate` checkbox, after the final response is given, the quality of the answer will be rated on a percentage scale of `0% to 100%` by another agent. If the response receives a score lower than the one expected (set using a slider at the bottom right corner of the screen, with a default value `75%`), a prompt will be sent to the agent requesting improvements and enhancements to the response.\n\nSetting the expected (required) score to `0%` means that the response will be evaluated every time the agent produces a result, and it will always be prompted to self-improve its answer. This way, you can put the agent in an autonomous loop, where it will continue to operate until it succeeds.\n\nYou can choose between two methods of evaluation:\n\n- By the percentage of tasks completed\n- By the accuracy (score) of the final response\n\nYou can set the limit of steps in such a loop by going to `Settings -> Agents and experts -> LlamaIndex agents -> Max evaluation steps `. The default value is `3`, meaning the agent will only make three attempts to improve or correct its answer. If you set the limit to zero, there will be no limit, and the agent can operate in this mode indefinitely (watch out for tokens!).\n\nYou can change the prompts used for evaluating the response in `Settings -> Prompts -> Agent: evaluation prompt in loop`. Here, you can adjust it to suit your needs, for example, by defining more or less critical feedback for the responses received.\n\n## Agent (OpenAI)\n\nThe mode operates on the `openai-agents` library integrated into the application:\n\nhttps:\u002F\u002Fgithub.com\u002Fopenai\u002Fopenai-agents-python\n\nIt allows running agents for OpenAI models and models compatible with the OpenAI API.\n\nIn this mode, you can use pre-configured Experts in Expert mode presets - they will be launched as agents (in the `openai_agents_experts` type, which allows launching one main agent and subordinate agents to which queries will be appropriately directed).\n\n**Agent types (workflows\u002Fpatterns):**\n\n- `Agent with experts` - uses attached experts as sub-agents\n- `Agent with experts + feedback` - uses attached experts as sub-agents + feedback agent in a loop\n- `Agent with feedback` - single agent + feedback agent in a loop\n- `Planner` - planner agent, 3 sub-agents inside: planner, base agent + feedback\n- `Research bot` - researcher, 3 sub-agents inside: planner, searcher and writer as base agent\n- `Simple agent` - a single agent.\n- `Evolve` - in each generation (cycle), the best response from a given parent agent is selected; in the next generation, the cycle repeats.\n- `B2B` - bot-to-bot communication, involving two bots interacting with each other while keeping a human in the loop.\n- `Supervisor + Worker` - one agent (supervisor) acts as a bridge between the user and the second agent (worker). The user provides a query to the supervisor, who then sends instructions to the worker until the task is completed by the worker.\n\nYou can create your own types (workflows\u002Fpatterns) using the built-in visual node-based editor found in the `Tools -> Agents Builder`.\n\nThere are also predefined presets added as examples:\n\n- `Coder`\n- `Experts agent`\n- `Planner`\n- `Researcher`\n- `Simple agent`\n- `Writer with Feedback`\n- `2 bots`\n- `Supervisor + worker`\n\nIn the Agents (OpenAI) mode, all remote tools are available for the base agent according to the configuration in the Config -> Settings -> Remote tools menu.\n\nRemote tools for experts can be selected separately for each expert in the preset configuration.\n\nLocal tools (from plugins) are available for agents and experts according to the enabled plugins, as in other modes.\n\nIn agents with feedback and plans, tools can be allowed in a preset configuration for each agent. They also have separate prompts that can be configured in presets.\n\n**Description of how different types of agents work:**\n\nBelow is a pattern for how different types of agents work. You can use these patterns to create agents for different tasks by modifying the appropriate prompts in the preset for the specific task.\n\n**Simple Agent**\n- The agent completes its task and then stops working.\n\n**Agent with Feedback**\n- The first agent answers a question.\n- The second agent (feedback) evaluates the answer and, if necessary, goes back to the first agent to enforce corrections.\n- The cycle repeats until the feedback agent is satisfied with the evaluation.\n\n**Agent with Experts**\n- The agent completes the assigned task on its own or delegates it to the most suitable expert (another agent).\n\n**Agent with Experts + Feedback**\n- The first agent answers a question or delegates it to the most suitable expert.\n- The second agent (feedback) evaluates and, if necessary, goes back to the first agent to enforce corrections.\n- The cycle repeats until the feedback agent is satisfied with the evaluation.\n\n**Research Bot**\n- The first agent (planner) prepares a list of phrases to search.\n- The second agent (search) finds information based on the phrases and creates a summary.\n- The third agent (writer) prepares a report based on the summary.\n\n**Planner**\n- The first agent (planner) breaks down a task into sub-tasks and sends the list to the second agent.\n- The second agent performs the task based on the prepared task list.\n- The third agent, responsible for feedback, evaluates, requests corrections if needed, and sends the request back to the first agent. The cycle repeats.\n\n**Evolve**\n- You select the number of agents (parents) to operate in each generation (iteration).\n- Each agent prepares a separate answer to a question.\n- The best agent (producing the best answer) in a generation is selected by the next agent (chooser).\n- Another agent (feedback) verifies the best answer and suggests improvements.\n- A request for improving the best answer is sent to a new pair of agents (new parents).\n- From this new pair, the best answer is selected again in the next generation, and the cycle repeats.\n\n**B2B**\n- A human provides a topic for discussion.\n- Bot 1 generates a response and sends it to Bot 2.\n- Bot 2 receives the response from Bot 1 as input, provides an answer, and sends the response back to Bot 1 as its input. This cycle repeats.\n- The human can interrupt the loop at any time and update the entire discussion.\n\n**Supervisor + Worker**\n\n- A human provides a query to the Supervisor.\n- The Supervisor prepares instructions for the Worker and sends them to the Worker.\n- The Worker completes the task and returns the result to the Supervisor.\n- If the task is completed, the Supervisor returns the result to the user. If not, the Supervisor sends another instruction to the Worker to complete the task or asks the user if there are any questions.\n- The cycle repeats until the task is completed.\n\n**Tip**: Starting from version `2.5.97`, you can assign and use Experts in all of the agent types.\n\n**Limitations:**\n\n- When the `Computer use` tool is selected for an expert or when the `computer-use` model is chosen, all other tools will not be available for that model.\n\n##  Agent (Autonomous) \n\nThis is an older version of the Agent mode, still available as legacy. However, it is recommended to use the newer mode: `Agent (LlamaIndex)`.\n\n**WARNING: Please use this mode with caution** - autonomous mode, when connected with other plugins, may produce unexpected results!\n\nThe mode activates autonomous mode, where AI begins a conversation with itself. \nYou can set this loop to run for any number of iterations. Throughout this sequence, the model will engage\nin self-dialogue, answering his own questions and comments, in order to find the best possible solution, subjecting previously generated steps to criticism.\n\n**WARNING:** Setting the number of run steps (iterations) to `0` activates an infinite loop which can generate a large number of requests and cause very high token consumption, so use this option with caution! Confirmation will be displayed every time you run the infinite loop.\n\nThis mode is similar to `Auto-GPT` - it can be used to create more advanced inferences and to solve problems by breaking them down into subtasks that the model will autonomously perform one after another until the goal is achieved.\n\nYou can create presets with custom instructions for multiple agents, incorporating various workflows, instructions, and goals to achieve.\n\nAll plugins are available for agents, so you can enable features such as file access, command execution, web searching, image generation, vision analysis, etc., for your agents. Connecting agents with plugins can create a fully autonomous, self-sufficient system. All currently enabled plugins are automatically available to the Agent.\n\nWhen the `Auto-stop` option is enabled, the agent will attempt to stop once the goal has been reached.\n\nIn opposition to `Auto-stop`, when the `Always continue...` option is enabled, the agent will use the \"always continue\" prompt to generate additional reasoning and automatically proceed to the next step, even if it appears that the task has been completed.\n\n**Options**\n\nThe agent is essentially a **virtual** mode that internally sequences the execution of a selected underlying mode. \nYou can choose which internal mode the agent should use in the settings:\n\n```Settings \u002F Agent (autonomous) \u002F Sub-mode to use```\n\nDefault mode is: `Chat`.\n\nIf you want to use the LlamaIndex mode when running the agent, you can also specify which index `LlamaIndex` should use with the option:\n\n```Settings \u002F Agents and experts \u002F Index to use```\n\n![v2_agent_settings](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_a72474fd0f7c.png)\n\n\n##  Experts (co-op, co-operation mode)\n\nExpert mode allows for the creation of experts (using presets) and then consulting them during a conversation. In this mode, a primary base context is created for conducting the conversation. From within this context, the model can make requests to an expert to perform a task and return the results to the main thread. When an expert is called in the background, a separate context is created for them with their own memory. This means that each expert, during the life of one main context, also has access to their own memory via their separate, isolated context.\n\n**In simple terms - you can imagine an expert as a separate, additional instance of the model running in the background, which can be called at any moment for assistance, with its own context and memory, as well as its own specialized instructions in a given subject.**\n\nExperts do not share contexts with one another, and the only point of contact between them is the main conversation thread. In this main thread, the model acts as a manager of experts, who can exchange data between them as needed.\n\nAn expert is selected based on the name in the presets; for example, naming your expert as: ID = python_expert, name = \"Python programmer\" will create an expert whom the model will attempt to invoke for matters related to Python programming. You can also manually request to refer to a given expert:\n\n```bash\nCall the Python expert to generate some code.\n```\n\nExperts can be activated or deactivated - to enable or disable use RMB context menu to select the `Enable\u002FDisable` options from the presets list. Only enabled experts are available to use in the thread.\n\nExperts can also be used in `Agent (autonomous)` mode - by creating a new agent using a preset. Simply move the appropriate experts to the active list to automatically make them available for use by the agent.\n\nYou can also use experts in \"inline\" mode - by activating the `Experts (inline)` plugin. This allows for the use of experts in any mode, such as normal chat.\n\nExpert mode, like agent mode, is a \"virtual\" mode - you need to select a target mode of operation for it, which can be done in the settings at `Settings \u002F Agent (autonomous) \u002F Sub-mode for experts`.\n\nYou can also ask for a list of active experts at any time:\n\n```bash\nGive me a list of active experts.\n```\n\n##  Computer use\n\nThis mode allows for autonomous computer control.\n\nIn this mode, the model takes control of the mouse and keyboard and can navigate within the user's environment. The `Computer use` remote tool is used here: https:\u002F\u002Fplatform.openai.com\u002Fdocs\u002Fguides\u002Ftools-computer-use, combined with the `Mouse and Keyboard` plugin.\n\n**Example of use:**\n\n```Click on the Start Menu to open it, search for the Notepad in the list, and run it.```\n\nYou can change the environment in which the navigation mode operates by using the list at the bottom of the toolbox.\n\n**Available Environments:**\n\n- Browser\n- Linux\n- Windows\n- Mac\n\nYou can run this mode in Sandbox (using `Playwright` - https:\u002F\u002Fplaywright.dev\u002F) - to do it, just enable the `Sandbox` switch in the toolbox. Playwright browsers must be installed on your system. To do so, run:\n\n```bash\npip install playwright\nplaywright install \u003Cchromium|firefox|webkit>\n```\nAfter that, set the path to directory with installed browsers in `Mouse and Keyborad` plugin settings option: `Sandbox  (Playwright) \u002F Browsers directory`.\n\nCompiled binary and Snap versions have `chromium` preinstalled in the package.\n\n\n**Tip:** DO NOT enable the `Mouse and Keyboard` plugin in Computer use mode—it is already connected to Computer use mode \"in the background.\"\n\n\n# Context and memory\n\n## Short and long-term memory\n\n**PyGPT** features a continuous chat mode that maintains a long context of the ongoing dialogue. It preserves the entire conversation history and automatically appends it to each new message (prompt) you send to the AI. Additionally, you have the flexibility to revisit past conversations whenever you choose. The application keeps a record of your chat history, allowing you to resume discussions from the exact point you stopped.\n\n## Handling multiple contexts\n\nOn the left side of the application interface, there is a panel that displays a list of saved conversations. You can save numerous contexts and switch between them with ease. This feature allows you to revisit and continue from any point in a previous conversation. **PyGPT** automatically generates a summary for each context, akin to the way `ChatGPT` operates and gives you the option to modify these titles itself.\n\n![v2_context_list](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_bcdfb2157bb4.png)\n\nYou can disable context support in the settings by using the following option:\n\n``` ini\nConfig -> Settings -> Use context \n```\n\n## Clearing history\n\nYou can clear the entire memory (all contexts) by selecting the menu option:\n\n``` ini\nFile -> Clear history...\n```\n\n## Context storage\n\nOn the application side, the context is stored in the `SQLite` database located in the working directory (`db.sqlite`).\nIn addition, all history is also saved to `.txt` files for easy reading.\n\nOnce a conversation begins, a title for the chat is generated and displayed on the list to the left. This process is similar to `ChatGPT`, where the subject of the conversation is summarized, and a title for the thread is created based on that summary. You can change the name of the thread at any time.\n\n# Files And Attachments\n\n## Uploading attachments\n\n**Using Your Own Files as Additional Context in Conversations**\n\nYou can use your own files (for example, to analyze them) during any conversation. You can do this in two ways: by indexing (embedding) your files in a vector database, which makes them available all the time during a \"Chat with Files\" session, or by adding a file attachment (the attachment file will only be available during the conversation in which it was uploaded).\n\n**Attachments**\n\n**PyGPT** makes it simple for users to upload files and send them to the model for tasks like analysis, similar to attaching files in `ChatGPT`. There's a separate `Attachments` tab next to the text input area specifically for managing file uploads. \n\n**Tip: Attachments uploaded in group are available in all contexts in group**.\n\n![v2_file_input](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_125464c12bfd.png)\n\nYou can use attachments to provide additional context to the conversation. Uploaded files will be converted into text using loaders from LlamaIndex, and then embedded into the vector store. You can upload any file format supported by the application through LlamaIndex. Supported formats include:\n\nText-based types:\n\n- CSV files (csv)\n- Epub files (epub)\n- Excel .xlsx spreadsheets (xlsx)\n- HTML files (html, htm)\n- IPYNB Notebook files (ipynb)\n- JSON files (json)\n- Markdown files (md)\n- PDF documents (pdf)\n- Plain-text files (txt and etc.)\n- Word .docx documents (docx)\n- XML files (xml)\n\nMedia-types:\n\n- Image (using vision) (jpg, jpeg, png, gif, bmp, tiff, webp)\n- Video\u002Faudio (mp4, avi, mov, mkv, webm, mp3, mpeg, mpga, m4a, wav)\n\nArchives:\n\n- zip\n- tar, tar.gz, tar.bz2\n\nThe content from the uploaded attachments will be used in the current conversation and will be available throughout (per context). There are 3 modes available for working with additional context from attachments:\n\n- `Full context`: Provides best results. This mode attaches the entire content of the read file to the user's prompt. This process happens in the background and may require a large number of tokens if you uploaded extensive content.\n\n- `RAG`: The indexed attachment will only be queried in real-time using LlamaIndex. This operation does not require any additional tokens, but it may not provide access to the full content of the file 1:1.\n\n- `Summary`: When queried, an additional query will be generated in the background and executed by a separate model to summarize the content of the attachment and return the required information to the main model. You can change the model used for summarization in the settings under the `Files and attachments` section.\n\nIn the `RAG` and `Summary` mode, you can enable an additional setting by going to `Settings -> Files and attachments -> Use history in RAG query`. This allows for better preparation of queries for RAG. When this option is turned on, the entire conversation context is considered, rather than just the user's last query. This allows for better searching of the index for additional context. In the `RAG limit` option, you can set a limit on how many recent entries in a discussion should be considered (`0 = no limit, default: 3`).\n\n**Important**: When using `Full context` mode, the entire content of the file is included in the prompt, which can result in high token usage each time. If you want to reduce the number of tokens used, instead use the `RAG` option, which will only query the indexed attachment in the vector database to provide additional context.\n\n**Images as Additional Context**\n\nFiles such as jpg, png, and similar images are a special case. By default, images are not used as additional context; they are analyzed in real-time using a vision model. If you want to use them as additional context instead, you must enable the \"Allow images as additional context\" option in the settings: `Files and attachments -> Allow images as additional context`.\n\n**Uploading larger files and auto-index**\n\nTo use the `RAG` mode, the file must be indexed in the vector database. This occurs automatically at the time of upload if the `Auto-index on upload` option in the `Attachments` tab is enabled. When uploading large files, such indexing might take a while - therefore, if you are using the `Full context` option, which does not use the index, you can disable the `Auto-index` option to speed up the upload of the attachment. In this case, it will only be indexed when the `RAG` option is called for the first time, and until then, attachment will be available in the form of `Full context` and `Summary`.\n\n**Embeddings**\n\nWhen using RAG to query attachments, the documents are indexed into a temporary vector store. With multiple providers and models available, you can select the model used for querying attachments in: `Config -> Settings -> Files and Attachments`. You can also choose the embedding models for specified providers in `Config -> Settings -> Indexes \u002F LlamaIndex -> Embeddings -> Default embedding models` list. By default, when querying an attachment using RAG, the default embedding model and provider corresponding to the RAG query model will be used. If no default configuration is provided for a specific provider, the global embedding configuration will be used.\n\nFor example, if the RAG query model is `gpt-4o-mini`, then the default model for the provider `OpenAI` will be used. If the default model for `OpenAI` is not specified on the list, the global provider and model will be used.\n\n## Downloading files\n\n**PyGPT** enables the automatic download and saving of files created by the model. This is carried out in the background, with the files being saved to an `data` folder located within the user's working directory. To view or manage these files, users can navigate to the `Files` tab which features a file browser for this specific directory. Here, users have the interface to handle all files sent by the AI.\n\nThis `data` directory is also where the application stores files that are generated locally by the AI, such as code files or any other data requested from the model. Users have the option to execute code directly from the stored files and read their contents, with the results fed back to the AI. This hands-off process is managed by the built-in plugin system and model-triggered commands. You can also indexing files from this directory (using integrated `LlamaIndex`) and use it's contents as additional context provided to discussion.\n\nThe `Files I\u002FO` plugin takes care of file operations in the `data` directory, while the `Code Interpreter` plugin allows for the execution of code from these files.\n\n![v2_file_output](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_125464c12bfd.png)\n\nTo allow the model to manage files or python code execution, the `+ Tools` option must be active, along with the above-mentioned plugins:\n\n![v2_code_execute](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_385abd0fa1a3.png)\n\n# Presets\n\n## What is preset?\n\nPresets in **PyGPT** are essentially templates used to store and quickly apply different configurations. Each preset includes settings for the mode you want to use (such as chat, completion, or image generation), an initial system prompt, an assigned name for the AI, a username for the session, and the desired \"temperature\" for the conversation. A warmer \"temperature\" setting allows the AI to provide more creative responses, while a cooler setting encourages more predictable replies. These presets can be used across various modes and with models accessed via the `OpenAI API` or `LlamaIndex`.\n\nThe application lets you create as many presets as needed and easily switch among them. Additionally, you can clone an existing preset, which is useful for creating variations based on previously set configurations and experimentation.\n\n![v2_preset](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_4b18215ffa14.png)\n\n## Example usage\n\nThe application includes several sample presets that help you become acquainted with the mechanism of their use.\n\n# Profiles\n\nYou can create multiple profiles for an app and switch between them. Each profile uses its own configuration, settings, context history, and a separate folder for user files. This allows you to set up different environments and quickly switch between them, changing the entire setup with just one click.\n\nThe app lets you create new profiles, edit existing ones, and duplicate current ones.\n\nTo create a new profile, select the option from the menu: `Config -> Profile -> New Profile...`\n\nTo edit saved profiles, choose the option from the menu: `Config -> Profile -> Edit Profiles...`\n\nTo switch to a created profile, pick the profile from the menu: `Config -> Profile -> [Profile Name]`\n\nEach profile uses its own user directory (workdir). You can link a newly created or edited profile to an existing workdir with its configuration.\n\nThe name of the currently active profile is shown as (Profile Name) in the window title.\n\n# Models\n\n## Built-in models\n\nPyGPT has a preconfigured list of models (as of 2026-02-06):\n\n- `bielik-11b-v2.3-instruct:Q4_K_M` (Ollama)\n- `chatgpt-4o-latest` (OpenAI)\n- `claude-3-5-sonnet-20240620` (Anthropic)\n- `claude-3-7-sonnet` (Anthropic)\n- `claude-3-opus` (Anthropic)\n- `claude-3-opus` (Anthropic)\n- `claude-opus-4-0` (Anthropic)\n- `claude-sonnet-4-0` (Anthropic)\n- `claude-opus-4-5` (Anthropic)\n- `claude-sonnet-4-5` (Anthropic)\n- `codellama` (Ollama)\n- `codex-mini` (OpenAI)\n- `dall-e-2` (OpenAI)\n- `dall-e-3` (OpenAI)\n- `deep-research-pro-preview-12-2025` (Google)\n- `deepseek-chat` (DeepSeek)\n- `deepseek-r1:1.5b` (Ollama)\n- `deepseek-r1:14b` (Ollama)\n- `deepseek-r1:32b` (Ollama)\n- `deepseek-r1:7b` (Ollama)\n- `deepseek-reasoner` (DeepSeek)\n- `gemini-2.5-computer-use-preview-10-2025` (Google)\n- `gemini-1.5-flash` (Google)\n- `gemini-1.5-pro` (Google)\n- `gemini-2.0-flash-exp` (Google)\n- `gemini-2.5-flash` (Google)\n- `gemini-2.5-flash-preview-native-audio-dialog` (Google, real-time)\n- `gemini-2.5-pro` (Google)\n- `gemini-3-flash-preview` (Google)\n- `gemini-3-pro-image-preview` (Google)\n- `gemini-3-pro-preview` (Google)\n- `gpt-3.5-turbo` (OpenAI)\n- `gpt-3.5-turbo-16k` (OpenAI)\n- `gpt-3.5-turbo-instruct` (OpenAI)\n- `gpt-4` (OpenAI)\n- `gpt-4-32k` (OpenAI)\n- `gpt-4-turbo` (OpenAI)\n- `gpt-4-vision-preview` (OpenAI)\n- `gpt-4.1` (OpenAI)\n- `gpt-4.1-mini` (OpenAI)\n- `gpt-4.1-nano` (OpenAI)\n- `gpt-4o` (OpenAI)\n- `gpt-4o-realtime-preview` (OpenAI, real-time)\n- `gpt-4o-mini` (OpenAI)\n- `gpt-5` (OpenAI)\n- `gpt-5-mini` (OpenAI)\n- `gpt-5-nano` (OpenAI)\n- `gpt-5.2` (OpenAI)\n- `gpt-image-1` (OpenAI)\n- `gpt-image-1.5` (OpenAI)\n- `gpt-oss:20b` (OpenAI - via Ollama and HuggingFace Router)\n- `gpt-oss:120b` (OpenAI - via Ollama and HuggingFace Router)\n- `gpt-realtime` (OpenAI, real-time)\n- `grok-2-vision` (xAI)\n- `grok-3` (xAI)\n- `grok-3-fast` (xAI)\n- `grok-3-mini` (xAI)\n- `grok-3-mini-fast` (xAI)\n- `grok-4` (xAI)\n- `grok-imagine-image` (xAI)\n- `grok-imagine-video` (xAI)\n- `llama2-uncensored` (Ollama)\n- `llama3.1` (Ollama)\n- `llama3.1:70b` (Ollama)\n- `mistral` (Ollama)\n- `mistral-large` (Ollama)\n- `mistral-small3.1` (Ollama)\n- `nano-banana-pro-preview` (Google)\n- `o1` (OpenAI)\n- `o1-mini` (OpenAI)\n- `o1-pro` (OpenAI)\n- `o3` (OpenAI)\n- `o3-deep-research` (OpenAI)\n- `o3-mini` (OpenAI)\n- `o3-pro` (OpenAI)\n- `o4-mini` (OpenAI)\n- `o4-mini-deep-research` (OpenAI)\n- `qwen2:7b` (Ollama)\n- `qwen2.5-coder:7b` (Ollama)\n- `qwen3:8b` (Ollama)\n- `qwen3:30b-a3b` (Ollama)\n- `r1` (Perplexity)\n- `sonar` (Perplexity)\n- `sonar-deep-research` (Perplexity)\n- `sonar-pro` (Perplexity)\n- `sonar-reasoning` (Perplexity)\n- `sonar-reasoning-pro` (Perplexity)\n- `sora-2` (OpenAI)\n- `veo-3.0-generate-preview` (Google)\n- `veo-3.0-fast-generate-preview` (Google)\n- `veo-3.1-generate-preview` (Google)\n- `veo-3.1-fast-generate-preview` (Google)\n\nAll models are specified in the configuration file `models.json`, which you can customize. \nThis file is located in your working directory. You can add new models provided directly by `OpenAI API` (or compatible), `Google Gen AI API`, `Anthropic API`, `xAI API`, and those supported by `LlamaIndex` or `Ollama` to this file. Configuration for LlamaIndex in placed in `llama_index` key.\n\nYou can import new models by manually editing `models.json` or by using the model importer in the `Config -> Models -> Import` menu.\n\n**Tip:** The models on the list are sorted by provider, not by manufacturer. A model from a particular manufacturer may be available through different providers (e.g., OpenAI models can be provided by the `OpenAI API` or by `OpenRouter`). If you want to use a specific model through a particular provider, you need to configure the provider in `Config -> Models -> Edit`, or import it directly via `Config -> Models -> Import`.\n\n**Tip**: Anthropic and Deepseek API providers use VoyageAI for embeddings (Chat with Files and attachments RAG), so you must also configure the Voyage API key if you want to use embeddings from these providers.\n\n## Adding a custom model\n\nYou can add your own models. See the section `Extending PyGPT \u002F Adding a new model` for more info.\n\nThere is built-in support for those LLM providers:\n\n- `Anthropic`\n- `Azure OpenAI` (native SDK)\n- `Deepseek API`\n- `Google` (native SDK)\n- `HuggingFace API`\n- `HuggingFace Router` (wrapper for OpenAI compatible ChatCompletions)\n- `Local models` (OpenAI API compatible)\n- `Mistral AI`\n- `Ollama`\n- `OpenAI` (native SDK)\n- `OpenRouter`\n- `Perplexity`\n- `xAI` (native SDK)\n\n## How to use local or non-GPT models\n\n### Llama 3, Mistral, DeepSeek, Qwen, gpt-oss, and other local models\n\nHow to use locally installed Llama 3, DeepSeek, Mistral, etc. models:\n\n1) Choose a working mode: `Chat` or `Chat with Files`.\n\n2) On the models list - select, edit, or add a new model (with `ollama` provider). You can edit the model settings through the menu `Config -> Models -> Edit`, then configure the model parameters in the `advanced` section.\n\n3) Download and install Ollama from here: https:\u002F\u002Fgithub.com\u002Follama\u002Follama\n\nFor example, on Linux:\n\n```curl -fsSL https:\u002F\u002Follama.com\u002Finstall.sh | sh```\n\n4) Run the model (e.g. Llama 3) locally on your machine. For example, on Linux:\n\n```ollama run llama3.1```\n\n5) Return to PyGPT and select the correct model from models list to chat with selected model using Ollama running locally.\n\n**Example available models**\n\n- `llama3.1`\n- `codellama`\n- `mistral`\n- `llama2-uncensored`\n- `deepseek-r1`\n\netc.\n\nYou can add more models by editing the models list.\n\n**Real-time importer**\n\nYou can also import models in real-time from a running Ollama instance using the `Config -> Models -> Import...` tool.\n\n**Custom Ollama endpoint**\n\nThe default endpoint for Ollama is: http:\u002F\u002Flocalhost:11434\n\nYou can change it globally by setting the environment variable `OLLAMA_API_BASE` in `Settings -> General -> Advanced -> Application environment`.\n\nYou can also change the \"base_url\" for a specific model in its configuration:\n\n`Config -> Models -> Edit`, then in the `Advanced -> [LlamaIndex] ENV Vars` section add the variable:\n\nNAME: `OLLAMA_API_BASE`\nVALUE: `http:\u002F\u002Fmy_endpoint.com:11434`\n\n\n**List of all models supported by Ollama**\n\nhttps:\u002F\u002Follama.com\u002Flibrary\n\nhttps:\u002F\u002Fgithub.com\u002Follama\u002Follama\n\n**IMPORTANT:** Remember to define the correct model name in the **kwargs list in the model settings.\n\n**Using local embeddings**\n\nRefer to: https:\u002F\u002Fdocs.llamaindex.ai\u002Fen\u002Fstable\u002Fexamples\u002Fembeddings\u002Follama_embedding\u002F\n\nYou can use an Ollama instance for embeddings. Simply select the `ollama` provider in:\n\n```Config -> Settings -> Indexes \u002F LlamaIndex -> Embeddings -> Embeddings provider```\n\nDefine parameters like model name and Ollama base URL in the Embeddings provider **kwargs list, e.g.:\n\n- name: `model_name`, value: `llama3.1`, type: `str`\n\n- name: `base_url`, value: `http:\u002F\u002Flocalhost:11434`, type: `str`\n\n### Google Gemini, Anthropic Claude, xAI Grok, etc.\n\nIf you want to use non-OpenAI models in `Chat with Files` and `Agents (LlamaIndex)` modes, then remember to configure the required parameters like API keys in the model config fields. `Chat` mode works via OpenAI SDK (compatible API), `Chat with Files` and `Agents (LlamaIndex)` modes works via LlamaIndex.\n\n**Google Gemini**\n\nRequired ENV:\n\n- GOOGLE_API_KEY = {api_key_google}\n\nRequired **kwargs:\n\n- model\n\n**Anthropic Claude**\n\nRequired ENV:\n\n- ANTHROPIC_API_KEY = {api_key_anthropic}\n\nRequired **kwargs:\n\n- model\n\n**xAI Grok** (Chat mode only)\n\nRequired ENV:\n\n- OPENAI_API_KEY = {api_key_xai}\n- OPENAI_API_BASE = {api_endpoint_xai}\n\nRequired **kwargs:\n\n- model\n\n**Mistral AI**\n\nRequired ENV:\n\n- MISTRAL_API_KEY = {api_key_mistral}\n\nRequired **kwargs:\n\n- model\n\n**Perplexity**\n\nRequired ENV:\n\n- PPLX_API_KEY = {api_key_perplexity}\n\nRequired **kwargs:\n\n- model\n\n**HuggingFace API**\n\nRequired ENV:\n\n- HUGGING_FACE_TOKEN = {api_key_hugging_face}\n\nRequired **kwargs:\n\n- model_name | model\n- token\n- provider = auto\n\n\n# Plugins\n\n## Overview\n\n**PyGPT** can be enhanced with plugins to add new features.\n\nThe following plugins are currently available, and model can use them instantly:\n\n- `API calls` - plugin lets you connect the model to the external services using custom defined API calls.\n\n- `Audio Input` - provides speech recognition.\n\n- `Audio Output` - provides voice synthesis.\n\n- `Autonomous Agent (inline)` - enables autonomous conversation (AI to AI), manages loop, and connects output back to input. This is the inline Agent mode.\n\n- `Bitbucket` - Access Bitbucket API to manage repositories, issues, and pull requests.\n\n- `Chat with Files (LlamaIndex, inline)` - plugin integrates `LlamaIndex` storage in any chat and provides additional knowledge into context (from indexed files and previous context from database).\n\n- `Code Interpreter` - responsible for generating and executing Python code, functioning much like the Code Interpreter on ChatGPT, but locally. This means a model can interface with any script, application, or code. Plugins can work in conjunction to perform sequential tasks; for example, the `Files` plugin can write generated Python code to a file, which the `Code Interpreter` can execute it and return its result to model.\n\n- `Context history (calendar, inline)` - provides access to context history database.\n\n- `Crontab \u002F Task scheduler` - plugin provides cron-based job scheduling - you can schedule tasks\u002Fprompts to be sent at any time using cron-based syntax for task setup.\n\n- `Custom Commands` - allows you to create and execute custom commands on your system.\n\n- `Experts (inline)` - allows calling experts in any chat mode. This is the inline Experts (co-op) mode.\n\n- `Facebook` - Manage user info, pages, posts, and photos on Facebook pages.\n\n- `Files I\u002FO` - provides access to the local filesystem, enabling a model to read and write files, as well as list and create directories.\n\n- `GitHub` - Access GitHub API to manage repositories, issues, and pull requests.\n\n- `Google` - Access Gmail, Drive, Docs, Maps, Calendar, Contacts, Colab, YouTube, Keep - for managing emails, files, events, notes, video info, and contacts.\n\n- `Image Generation (inline)` - integrates DALL-E 3 image generation with any chat and mode. Just enable and ask for image in Chat mode, using standard model like GPT-4. The plugin does not require the `+ Tools` option to be enabled.\n\n- `Mailer` - Provides the ability to send, receive and read emails.\n\n- `MCP` - Provides access to remote tools via the Model Context Protocol (MCP), including stdio, SSE, and Streamable HTTP transports, with per-server allow\u002Fdeny filtering, Authorization header support, and a tools cache.\n\n- `Mouse and Keyboard` - provides the ability to control the mouse and keyboard by the model.\n\n- `OpenStreetMap` -  Search, geocode, plan routes, and generate static maps using OpenStreetMap services (Nominatim, OSRM, staticmap).\n\n- `Real Time` - automatically appends the current date and time to the system prompt, informing the model about current time.\n\n- `Serial port \u002F USB` - plugin provides commands for reading and sending data to USB ports.\n\n- `Server (SSH\u002FFTP)` - Connect to remote servers using FTP, SFTP, and SSH. Execute remote commands, upload, download, and more.\n\n- `Slack` - Handle users, conversations, messages, and files on Slack.\n\n- `System Prompt Extra (append)` - appends additional system prompts (extra data) from a list to every current system prompt. You can enhance every system prompt with extra instructions that will be automatically appended to the system prompt.\n\n- `System (OS)` - allows you to create and execute custom commands on your system.\n\n- `Telegram` - Send messages, photos, and documents; manage chats and contacts.\n\n- `Tuya (IoT)` - Handle Tuya Smart Home devices via Tuya Cloud API.\n\n- `Vision (inline)` - integrates Vision capabilities with any chat mode, not just Vision mode. When the plugin is enabled, the model temporarily switches to vision in the background when an image attachment or vision capture is provided.\n\n- `Voice Control (inline)` - provides voice control command execution within a conversation.\n\n- `Web Search` - provides the ability to connect to the Web, search web pages for current data, and index external content using LlamaIndex data loaders.\n\n- `Wikipedia` - Search Wikipedia for information.\n\n- `Wolfram Alpha` - Compute and solve with Wolfram Alpha: short answers, full JSON pods, math (solve, derivatives, integrals), unit conversions, matrix operations, and plots.\n\n- `X\u002FTwitter` - Interact with tweets and users, manage bookmarks and media, perform likes, retweets, and more.\n\n\n## API calls\n\n**PyGPT** lets you connect the model to the external services using custom defined API calls.\n\nTo activate this feature, turn on the `API calls` plugin found in the `Plugins` menu.\n\nIn this plugin you can provide list of allowed API calls, their parameters and request types. The model will replace provided placeholders with required params and make API call to external service.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#api-calls\n\n## Audio Input\n\nThe plugin facilitates speech recognition (by default using the `Whisper` model from OpenAI, `Google` and `Bing` are also available). It allows for voice commands to be relayed to the AI using your own voice. Whisper doesn't require any extra API keys or additional configurations; it uses the main OpenAI key. In the plugin's configuration options, you should adjust the volume level (min energy) at which the plugin will respond to your microphone. Once the plugin is activated, a new `Speak` option will appear at the bottom near the `Send` button  -  when this is enabled, the application will respond to the voice received from the microphone.\n\nThe plugin can be extended with other speech recognition providers.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#audio-input\n\n## Audio Output\n\nThe plugin lets you turn text into speech using the TTS model from OpenAI or other services like `Microsoft Azure`, `Google`, and `Eleven Labs`. You can add more text-to-speech providers to it too. `OpenAI TTS` does not require any additional API keys or extra configuration; it utilizes the main OpenAI key. \nMicrosoft Azure requires to have an Azure API Key. Before using speech synthesis via `Microsoft Azure`, `Google` or `Eleven Labs`, you must configure the audio plugin with your API keys, regions and voices if required.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#audio-output\n\n## Autonomous Agent (inline)\n\n**WARNING: Please use autonomous mode with caution!** - this mode, when connected with other plugins, may produce unexpected results!\n\nThe plugin activates autonomous mode in standard chat modes, where AI begins a conversation with itself. \nYou can set this loop to run for any number of iterations. Throughout this sequence, the model will engage\nin self-dialogue, answering his own questions and comments, in order to find the best possible solution, subjecting previously generated steps to criticism.\n\nThis mode is similar to `Auto-GPT` - it can be used to create more advanced inferences and to solve problems by breaking them down into subtasks that the model will autonomously perform one after another until the goal is achieved. The plugin is capable of working in cooperation with other plugins, thus it can utilize tools such as web search, access to the file system, or image generation using `DALL-E`.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#autonomous-agent-inline\n\n## Bitbucket\n\nThe Bitbucket plugin allows for seamless integration with the Bitbucket Cloud API, offering functionalities to manage repositories, issues, and pull requests. This plugin provides highly configurable options for authentication, cached convenience, and manages HTTP requests efficiently.\n\n- Retrieve details about the authenticated user.\n- Get information about a specific user.\n- List available workspaces.\n- List repositories in a workspace.\n- Get details about a specific repository.\n- Create a new repository.\n- Delete an existing repository.\n- Retrieve contents of a file in a repository.\n- Upload a file to a repository.\n- Delete a file from a repository.\n- List issues in a repository.\n- Create a new issue.\n- Comment on an existing issue.\n- Update details of an issue.\n- List pull requests in a repository.\n- Create a new pull request.\n- Merge an existing pull request.\n- Search for repositories.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#bitbucket\n\n## Chat with Files (LlamaIndex, inline)\n\nPlugin integrates `LlamaIndex` storage in any chat and provides additional knowledge into context.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#chat-with-files-llamaindex-inline\n\n## Code Interpreter\n\n### Executing Code\n\nFrom version `2.4.13` with built-in `IPython`.\n\nThe plugin operates similarly to the `Code Interpreter` in `ChatGPT`, with the key difference that it works locally on the user's system. It allows for the execution of any Python code on the computer that the model may generate. When combined with the `Files I\u002FO` plugin, it facilitates running code from files saved in the `data` directory. You can also prepare your own code files and enable the model to use them or add your own plugin for this purpose. You can execute commands and code on the host machine or in Docker container.\n\n**IPython:** Starting from version `2.4.13`, it is highly recommended to adopt the new option: `IPython`, which offers significant improvements over previous workflows. IPython provides a robust environment for executing code within a kernel, allowing you to maintain the state of your session by preserving the results of previous commands. This feature is particularly useful for iterative development and data analysis, as it enables you to build upon prior computations without starting from scratch. Moreover, IPython supports the use of magic commands, such as `!pip install \u003Cpackage_name>`, which facilitate the installation of new packages directly within the session. This capability streamlines the process of managing dependencies and enhances the flexibility of your development environment. Overall, IPython offers a more efficient and user-friendly experience for executing and managing code.\n\nTo use IPython in sandbox mode, Docker must be installed on your system. \n\nYou can find the installation instructions here: https:\u002F\u002Fdocs.docker.com\u002Fengine\u002Finstall\u002F\n\n**Tip: connecting IPython in Docker in Snap version**:\n\nTo use IPython in the Snap version, you must connect PyGPT to the Docker daemon:\n\n```commandline\nsudo snap connect pygpt:docker-executables docker:docker-executables\n```\n\n````commandline\nsudo snap connect pygpt:docker docker:docker-daemon\n````\n\n**Code interpreter:** a real-time Python Code Interpreter is built-in. Click the `\u003C>` icon to open the interpreter window. Both the input and output of the interpreter are connected to the plugin. Any output generated by the executed code will be displayed in the interpreter. Additionally, you can request the model to retrieve contents from the interpreter window output.\n\n![v2_python](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_a409c46c7774.png)\n\n**INFO:** Executing Python code using IPython in compiled versions requires an enabled sandbox (Docker container). You can connect the Docker container via `Plugins -> Settings`.\n\n\n**Tip:** always remember to enable the `+ Tools` option to allow execute commands from the plugins.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#code-interpreter\n\n## Context history (calendar, inline)\n\nProvides access to context history database.\nPlugin also provides access to reading and creating day notes.\n\nExamples of use, you can ask e.g. for the following:\n\n```Give me today day note```\n\n```Save a new note for today```\n\n```Update my today note with...```\n\n```Get the list of yesterday conversations```\n\n```Get contents of conversation ID 123```\n\netc.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#context-history-calendar-inline\n\n## Crontab \u002F Task scheduler\n\nPlugin provides cron-based job scheduling - you can schedule tasks\u002Fprompts to be sent at any time using cron-based syntax for task setup.\n\n![v2_crontab](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_6ed3ef0ddf6c.png)\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#crontab-task-scheduler\n\n## Custom Commands\n\nWith the `Custom Commands` plugin, you can integrate **PyGPT** with your operating system and scripts or applications. You can define an unlimited number of custom commands and instruct model on when and how to execute them. Configuration is straightforward, and **PyGPT** includes a simple tutorial command for testing and learning how it works:\n\n![v2_custom_cmd](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_b7f90f5ca0d1.png)\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#custom-commands\n\n##  Experts (inline)\n\nThe plugin allows calling experts in any chat mode. This is the inline Experts (co-op) mode.\n\nSee the `Work modes -> Experts` section for more details.\n\n## Facebook\n\nThe plugin integrates with Facebook's Graph API to enable various actions such as managing pages, posts, and media uploads. It uses OAuth2 for authentication and supports automatic token exchange processes. \n\n- Retrieving basic information about the authenticated user.\n- Listing all Facebook pages the user has access to.\n- Setting a specified Facebook page as the default.\n- Retrieving a list of posts from a Facebook page.\n- Creating a new post on a Facebook page.\n- Deleting a post from a Facebook page.\n- Uploading a photo to a Facebook page.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#facebook\n\n## Files I\u002FO\n\nThe plugin allows for file management within the local filesystem. It enables the model to create, read, write and query files located in the `data` directory, which can be found in the user's work directory. With this plugin, the AI can also generate Python code files and thereafter execute that code within the user's system.\n\nPlugin capabilities include:\n\n- Sending files as attachments\n- Reading files\n- Appending to files\n- Writing files\n- Deleting files and directories\n- Listing files and directories\n- Creating directories\n- Downloading files\n- Copying files and directories\n- Moving (renaming) files and directories\n- Reading file info\n- Indexing files and directories using LlamaIndex\n- Querying files using LlamaIndex\n- Searching for files and directories\n\nIf a file being created (with the same name) already exists, a prefix including the date and time is added to the file name.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#files-i-o\n\n## GitHub\n\nThe plugin provides seamless integration with GitHub, allowing various operations such as repository management, issue tracking, pull requests, and more through GitHub's API. This plugin requires authentication, which can be configured using a Personal Access Token (PAT) or OAuth Device Flow.\n\n- Retrieve details about your GitHub profile.\n- Get information about a specific GitHub user.\n- List repositories for a user or organization.\n- Retrieve details about a specific repository.\n- Create a new repository.\n- Delete an existing repository.\n- Retrieve the contents of a file in a repository.\n- Upload or update a file in a repository.\n- Delete a file from a repository.\n- List issues in a repository.\n- Create a new issue in a repository.\n- Add a comment to an existing issue.\n- Close an existing issue.\n- List pull requests in a repository.\n- Create a new pull request.\n- Merge an existing pull request.\n- Search for repositories based on a query.\n- Search for issues based on a query.\n- Search for code based on a query.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#github\n\n## Google (Gmail, Drive, Calendar, Contacts, YT, Keep, Docs, Maps, Colab)\n\nThe plugin integrates with various Google services, enabling features such as email management, calendar events, contact handling, and document manipulation through Google APIs.\n\n- **Gmail**\n  - Listing recent emails from Gmail.\n  - Listing all emails from Gmail.\n  - Searching emails in Gmail.\n  - Retrieving email details by ID in Gmail.\n  - Sending an email via Gmail.\n  \n- **Google Calendar**\n  - Listing recent calendar events.\n  - Listing today's calendar events.\n  - Listing tomorrow's calendar events.\n  - Listing all calendar events.\n  - Retrieving calendar events by a specific date.\n  - Adding a new event to the calendar.\n  - Deleting an event from the calendar.\n  \n- **Google Keep**\n  - Listing notes from Google Keep.\n  - Adding a new note to Google Keep.\n  \n- **Google Drive**\n  - Listing files from Google Drive.\n  - Finding a file in Google Drive by its path.\n  - Downloading a file from Google Drive.\n  - Uploading a file to Google Drive.\n  \n- **YouTube**\n  - Retrieving information about a YouTube video.\n  - Retrieving the transcript of a YouTube video.\n  \n- **Google Contacts**\n  - Listing contacts from Google Contacts.\n  - Adding a new contact to Google Contacts.\n  \n- **Google Docs**\n  - Creating a new document.\n  - Retrieving a document.\n  - Listing documents.\n  - Appending text to a document.\n  - Replacing text in a document.\n  - Inserting a heading in a document.\n  - Exporting a document.\n  - Copying from a template.\n  \n- **Google Maps**\n  - Geocoding an address.\n  - Reverse geocoding coordinates.\n  - Getting directions between locations.\n  - Using the distance matrix.\n  - Text search for places.\n  - Finding nearby places.\n  - Generating static map images.\n  \n- **Google Colab**\n  - Listing notebooks.\n  - Creating a new notebook.\n  - Adding a code cell.\n  - Adding a markdown cell.\n  - Getting a link to a notebook.\n  - Renaming a notebook.\n  - Duplicating a notebook.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#google-gmail-drive-calendar-contacts-yt-keep-docs-maps-colab\n\n## Image Generation (inline)\n\nThe plugin integrates `DALL-E 3` image generation with any chat mode. Simply enable it and request an image in Chat mode, using a standard model such as `GPT-4`. The plugin does not require the `+ Tools` option to be enabled.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#image-generation-inline\n\n## Mailer\n\nEnables the sending, receiving, and reading of emails from the inbox. Currently, only SMTP is supported. More options coming soon.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#mailer\n\n## MCP (Model Context Protocol)\n\nWith the `MCP` plugin, you can connect **PyGPT** to remote tools exposed by `Model Context Protocol` servers (stdio, Streamable HTTP, or SSE). The plugin discovers available tools on your configured servers and publishes them to the model as callable commands with proper parameter schemas. You can whitelist\u002Fblacklist tools per server and optionally cache discovery results for speed.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#mcp\n\n## Mouse And Keyboard\n\nIntroduced in version: `2.4.4` (2024-11-09)\n\n**WARNING: Use this plugin with caution - allowing all options gives the model full control over the mouse and keyboard**\n\nThe plugin allows for controlling the mouse and keyboard by the model. With this plugin, you can send a task to the model, e.g., \"open notepad, type something in it\" or \"open web browser, do search, find something.\"\n\nPlugin capabilities include:\n\n- Get mouse cursor position\n- Control mouse cursor position\n- Control mouse clicks\n- Control mouse scroll\n- Control the keyboard (pressing keys, typing text)\n- Making screenshots\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#mouse-and-keyboard\n\n## OpenStreetMap\n\nProvides everyday mapping utilities using OpenStreetMap services:\n\n- Forward and reverse geocoding via Nominatim\n- Search with optional near\u002Fbbox filters\n- Routing via OSRM (driving, walking, cycling)\n- Generate openstreetmap.org URL (center\u002Fzoom or bbox; optional marker)\n- Utility helpers: open an OSM website URL centered on a point; download a single XYZ tile\n\nImages are saved under `data\u002Fopenstreetmap\u002F` in the user data directory.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#openstreetmap\n\n## Real Time\n\nThis plugin automatically adds the current date and time to each system prompt you send. \nYou have the option to include just the date, just the time, or both.\n\nWhen enabled, it quietly enhances each system prompt with current time information before sending it to model.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#real-time\n\n## Serial port \u002F USB\n\nProvides commands for reading and sending data to USB ports.\n\n**Tip:** in Snap version you must connect the interface first: https:\u002F\u002Fsnapcraft.io\u002Fdocs\u002Fserial-port-interface\n\nYou can send commands to, for example, an Arduino or any other controllers using the serial port for communication.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#serial-port-usb\n\n## Server (SSH\u002FFTP)\n\nThe Server plugin provides integration for remote server management via SSH, SFTP, and FTP protocols. This plugin allows executing commands, transferring files, and managing directories on remote servers.\n\nFor security reasons, the model will not see any credentials, only the server name and port fields (see the docs)\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#server-ssh-ftp\n\n## Slack\n\nThe Slack plugin integrates with the Slack Web API, enabling interaction with Slack workspaces through the application. This plugin supports OAuth2 for authentication, which allows for seamless integration with Slack services, enabling actions such as posting messages, retrieving users, and managing conversations.\n\n- Retrieving a list of users.\n- Listing all conversations.\n- Accessing conversation history.\n- Retrieving conversation replies.\n- Opening a conversation.\n- Posting a message in a chat.\n- Deleting a chat message.\n- Uploading files to Slack.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#slack\n\n## System (OS)\n\nThe plugin provides access to the operating system and executes system commands.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#system-os\n\n## System Prompt Extra (append)\n\nThe plugin appends additional system prompts (extra data) from a list to every current system prompt. \nYou can enhance every system prompt with extra instructions that will be automatically appended to the system prompt.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#system-prompt-extra-append\n\n## Telegram\n\nThe plugin enables integration with Telegram for both bots and user accounts through the ``Bot API`` and the ``Telethon`` library respectively. It allows sending and receiving messages, managing chats, and handling updates.\n\n- Sending text messages to a chat or channel.\n- Sending photos with an optional caption to a chat or channel.\n- Sending documents or files to a chat or channel.\n- Retrieving information about a specific chat or channel.\n- Polling for updates in bot mode.\n- Downloading files using a file identifier.\n- Listing contacts in user mode.\n- Listing recent dialogs or chats in user mode.\n- Retrieving recent messages from a specific chat or channel in user mode.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#telegram\n\n## Tuya (IoT)\n\nThe Tuya plugin integrates with Tuya's Smart Home platform, enabling seamless interactions with your smart devices via the Tuya Cloud API. This plugin provides a user-friendly interface to manage and control devices directly from your assistant.\n\n* Provide your Tuya Cloud credentials to enable communication.\n* Access and list all smart devices connected to your Tuya app account.\n* Retrieve detailed information about each device, including its status and supported functions.\n* Effortlessly search for devices by their names using cached data for quick access.\n* Control devices by turning them on or off, toggle states, and set specific device parameters.\n* Send custom commands to devices for more advanced control.\n* Read sensor values and normalize them for easy interpretation.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#tuya-iot\n\n## Vision (inline)\n\nThe plugin integrates vision capabilities across all chat modes, not just Vision mode. Once enabled, it allows the model to seamlessly switch to vision processing in the background whenever an image attachment or vision capture is detected.\n\n**Tip:** When using `Vision (inline)` by utilizing a plugin in standard mode, such as `Chat` (not `Vision` mode), the `+ Vision` special checkbox will appear at the bottom of the Chat window. It will be automatically enabled any time you provide content for analysis (like an uploaded photo). When the checkbox is enabled, the vision model is used. If you wish to exit the vision model after image analysis, simply uncheck the checkbox. It will activate again automatically when the next image content for analysis is provided.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#vision-inline\n\n## Voice Control (inline)\n\nThe plugin provides voice control command execution within a conversation.\n\nSee the ``Accessibility`` section for more details.\n\n## Web Search\n\n**PyGPT** lets you connect model to the internet and carry out web searches in real time as you make queries.\n\nTo activate this feature, turn on the `Web Search` plugin found in the `Plugins` menu.\n\nWeb searches are provided by `Google Custom Search Engine` and `Microsoft Bing` APIs and can be extended with other search engine providers. \n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#web-search\n\n## Wikipedia\n\nThe Wikipedia plugin allows for comprehensive interactions with Wikipedia, including language settings, article searching, summaries, and random article discovery. This plugin offers a variety of options to optimize your search experience.\n\n* Set your preferred language for Wikipedia queries.\n* Retrieve and check the current language setting.\n* Explore a list of supported languages.\n* Search for articles using keywords or get suggestions for queries.\n* Obtain summaries and detailed page content.\n* Discover articles by geographic location or randomly.\n* Open articles directly in your web browser.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#wikipedia\n\n## Wolfram Alpha\n\nProvides computational knowledge via Wolfram Alpha: short answers, full JSON pods, numeric and symbolic math (solve, derivatives, integrals), unit conversions, matrix operations, and plots rendered as images. Images are saved under `data\u002Fwolframalpha\u002F` in the user data directory.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#wolfram-alpha\n\n## X\u002FTwitter\n\nThe X\u002FTwitter plugin integrates with the X platform, allowing for comprehensive interactions such as tweeting, retweeting, liking, media uploads, and more. This plugin requires OAuth2 authentication and offers various configuration options to manage API interactions effectively.\n\n- Retrieve user details by providing their username.\n- Fetch user information using their unique ID.\n- Access recent tweets from a specific user.\n- Search for recent tweets using specific keywords or hashtags.\n- Create a new tweet and post it on the platform.\n- Remove an existing tweet from your profile.\n- Reply to a specific tweet with a new comment.\n- Quote a tweet while adding your own comments or thoughts.\n- Like a tweet to show appreciation or support.\n- Remove a like from a previously liked tweet.\n- Retweet a tweet to share it with your followers.\n- Undo a retweet to remove it from your profile.\n- Hide a specific reply to a tweet.\n- List all bookmarked tweets for easy access.\n- Add a tweet to your bookmarks for later reference.\n- Remove a tweet from your bookmarks.\n- Upload media files such as images or videos for tweeting.\n- Set alternative text for uploaded media for accessibility.\n\nDocumentation: https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#x-twitter\n\n# Creating Your Own Plugins\n\nYou can create your own plugin for **PyGPT** at any time. The plugin can be written in Python and then registered with the application just before launching it. All plugins included with the app are stored in the `plugin` directory - you can use them as coding examples for your own plugins.\n\nPyGPT can be extended with:\n\n- custom models\n\n- custom plugins\n\n- custom LLMs\n\n- custom vector store providers\n\n- custom data loaders\n\n- custom audio input providers\n\n- custom audio output providers\n\n- custom web search engine providers\n\n- custom agents (LlamaIndex or OpenAI Agents)\n\n\nSee the section `Extending PyGPT \u002F Adding a custom plugin` for more details.\n\n# Functions, commands and tools\n\n**Tip** remember to enable the `+ Tools` checkbox to enable execution of tools and commands from plugins.\n\nFrom version `2.2.20` PyGPT uses native API function calls by default. You can go back to internal syntax (described below) by switching off option `Config -> Settings -> Prompts -> Use native API function calls`. You must also enable `Tool calls` checkbox in model advanced settings to use native function calls with the specified model.\n\nIn background, **PyGPT** uses an internal syntax to define commands and their parameters, which can then be used by the model and executed on the application side or even directly in the system. This syntax looks as follows (example command below):\n\n```\u003Ctool>{\"cmd\": \"send_email\", \"params\": {\"quote\": \"Why don't skeletons fight each other? They don't have the guts!\"}}\u003C\u002Ftool>```\n\nIt is a JSON object wrapped between `\u003Ctool>` tags. The application extracts the JSON object from such formatted text and executes the appropriate function based on the provided parameters and command name. Many of these types of commands are defined in plugins (e.g., those used for file operations or internet searches). You can also define your own commands using the `Custom Commands` plugin, or simply by creating your own plugin and adding it to the application.\n\n**Tip:** The `+ Tools` option checkbox must be enabled to allow the execution of commands from plugins. Disable the option if you do not want to use commands, to prevent additional token usage (as the command execution system prompt consumes additional tokens and may slow down local models).\n\n![v2_code_execute](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_385abd0fa1a3.png)\n\nWhen native API function calls are disabled, a special system prompt responsible for invoking commands is added to the main system prompt if the `+ Tools` option is active.\n\nHowever, there is an additional possibility to define your own commands and execute them with the help of model.\nThese are functions \u002F tools - defined on the API side and described using JSON objects. You can find a complete guide on how to define functions here:\n\nhttps:\u002F\u002Fplatform.openai.com\u002Fdocs\u002Fguides\u002Ffunction-calling\n\nhttps:\u002F\u002Fcookbook.openai.com\u002Fexamples\u002Fhow_to_call_functions_with_chat_models\n\nPyGPT offers compatibility of these functions with commands (tools) used in the application. All you need to do is define the appropriate functions using the correct JSON schema, and PyGPT will do the rest, translating such syntax on the fly into its own internal format.\n\nLocal functions and tools from plugins are available in all modes, except `Assistants`.\n\nTo enable local functions for `Assistants` mode (in this mode remote tools are used by default), create a new Assistant, open the Preset edit dialog and import tools from plugins or add a new function using `+ Function` button e.g. with the following content:\n\n**Name:** `send_email`\n\n**Description:** `Send a quote using email`\n\n**Params (JSON):**\n\n```json\n{\n        \"type\": \"object\",\n        \"properties\": {\n            \"quote\": {\n                \"type\": \"string\",\n                \"description\": \"A generated funny quote\"\n            }\n        },\n        \"required\": [\n            \"quote\"\n        ]\n}\n```\n\nThen, in the `Custom Commands` plugin, create a new command with the same name and the same parameters:\n\n**Command name:** `send_email`\n\n**Instruction\u002Fprompt:** `send mail`\n\n**Params list:** `quote`\n\n**Command to execute:** `echo \"OK. Email sent: {quote}\"`\n\nAt next, enable the `+ Tools` option and enable the plugin.\n\nAsk a model:\n\n```Create a funny quote and email it```\n\nIn response you will receive prepared command, like this:\n\n```\u003Ctool>{\"cmd\": \"send_email\", \"params\": {\"quote\": \"Why do we tell actors to 'break a leg?' Because every play has a cast!\"}}\u003C\u002Ftool>```\n\nAfter receiving this, PyGPT will execute the system `echo` command with params given from `params` field and replacing `{quote}` placeholder with `quote` param value.\n\nAs a result, response like this will be sent to the model:\n\n```[{\"request\": {\"cmd\": \"send_email\"}, \"result\": \"OK. Email sent: Why do we tell actors to 'break a leg?' Because every play has a cast!\"}]```\n\nWith this flow you can use both forms - API provider JSON schema and PyGPT schema - to define and execute commands and functions in the application. They will cooperate with each other and you can use them interchangeably.\n\n# Tools\n\nPyGPT features several useful tools, including:\n\n- Notepad\n- Painter\n- Calendar\n- Indexer\n- Media Player\n- Image viewer\n- Text editor\n- Transcribe audio\u002Fvideo files\n- OpenAI Vector Stores\n- Google Vector Stores\n- Python Code Interpreter\n- HTML\u002FJS Canvas (built-in HTML renderer)\n- Translator\n- Web Browser (Chromium)\n- Agents Builder (beta)\n\n![v2_tool_menu](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_fb04ffa33dc5.png)\n\n\n## Notepad\n\nThe application has a built-in notepad, divided into several tabs. This can be useful for storing information in a convenient way, without the need to open an external text editor. The content of the notepad is automatically saved whenever the content changes.\n\n![v2_notepad](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_ac81dab3bd0f.png)\n\n## Painter\n\nUsing the `Painter` tool, you can create quick sketches and submit them to the model for analysis. You can also edit opened from disk or captured from camera images, for example, by adding elements like arrows or outlines to objects. Additionally, you can capture screenshots from the system - the captured image is placed in the drawing tool and attached to the query being sent.\n\n![v2_draw](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_826b6a0ca3aa.png)\n\nTo capture the screenshot just click on the `Ask with screenshot` option in a tray-icon dropdown:\n\n![v2_screenshot](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_d2a13ee6d709.png)\n\n## Calendar\n\nUsing the calendar, you can go back to selected conversations from a specific day and add daily notes. After adding a note, it will be marked on the list, and you can change the color of its label by right-clicking and selecting `Set label color`. By clicking on a particular day of the week, conversations from that day will be displayed.\n\n![v2_calendar](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_a4c953c9e46d.png)\n\n\n## Indexer\n\n\nThis tool allows indexing of local files or directories and external web content to a vector database, which can then be used with the `Chat with Files` mode. Using this tool, you can manage local indexes and add new data with built-in `LlamaIndex` integration.\n\n![v2_tool_indexer](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_40cfb3d01d15.png)\n\n## Media Player\n\n\nA simple video\u002Faudio player that allows you to play video files directly from within the app.\n\n\n## Image Viewer\n\n\nA simple image browser that lets you preview images directly within the app.\n\n\n## Text Editor\n\n\nA simple text editor that enables you to edit text files directly within the app.\n\n\n## Transcribe Audio\u002FVideo Files\n\n\nAn audio transcription tool with which you can prepare a transcript from a video or audio file. It will use a speech recognition plugin to generate the text from the file.\n\n\n## OpenAI \u002F Google Vector Stores\n\n\nRemote vector stores management.\n\n\n## Python Code Interpreter\n\n\nThis tool allows you to run Python code directly from within the app. It is integrated with the `Code Interpreter` plugin, ensuring that code generated by the model is automatically available from the interpreter. In the plugin settings, you can enable the execution of code in a Docker environment.\n\n**INFO:** Executing Python code using IPython in compiled versions requires an enabled sandbox (Docker container). You can connect the Docker container via `Plugins -> Settings`.\n\n## HTML\u002FJS Canvas\n\nAllows to render HTML\u002FJS code in HTML Canvas (built-in renderer based on Chromium). To use it, just ask the model to render the HTML\u002FJS code in built-in browser (HTML Canvas). Tool is integrated with the `Code Interpreter` plugin.\n\n## Translator\n\nEnables translation between multiple languages using an AI model.\n\n## Web Browser\n\nA built-in web browser based on Chromium, allowing you to open webpages directly within the app. **SECURITY NOTICE:** For your protection, avoid using the built-in browser for sensitive or critical tasks. It is intended for basic use only.\n\n# Agents Builder (beta)\n\nTo launch the Agent Editor, navigate to:\n\n`Tools -> Agents Builder`\n\n![nodes](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_47ad606dbc3c.png)\n\nThis tool allows you to create workflows for agents using a node editor, without writing any code. You can add a new agent type, and it will appear in the list of presets.\n\nTo add a new element, right-click on the editor grid and select `Add` to insert a new node.\n\n**Types of Nodes:**\n\n- **Start**: The starting point for agents (user input).\n- **Agent**: A single agent with customizable default parameters, such as system instructions and tool usage. These settings can be overridden in the preset.\n- **Memory**: Shared memory between agents (shared Context).\n- **End**: The endpoint, returning control to the user.\n\nAgents with connected shared memory share it among themselves. Agents without shared memory only receive the latest output from the previous agent.\n\nThe first agent in the sequence always receives the full context passed by the user.\n\nConnecting agents and memory is done using node connections via slots. To connect slots, simply drag from the input port to the output port (Ctrl + mouse button removes a connection).\n\n**Node Editor Navigation:**\n\n- **Right-click**: Add node, undo, redo, clear\n- **Middle-click + drag**: Pan view\n- **Ctrl + Mouse wheel**: Zoom\n- **Left-click a port**: Create connection\n- **Ctrl + Left-click a port**: Rewire or detach connection\n- **Right-click or DELETE a node\u002Fconnection**: Remove node\u002Fconnection\n\n**Tip:** Enable agent debugging in `Settings -> Debug -> Log Agents usage to console` to log the full workflow to the console.\n\nAgents built using this tool are compatible with both OpenAI Agents and LlamaIndex.\n\n**Notes:**\n\nRouting and system instruction: for every agent that has more than one connection leading to the next agent, a routing instruction is automatically injected just before your system prompt:\n\n```\nYou are a routing-capable agent in a multi-agent flow.\nYour id is: \u003Ccurrent_id>, name: \u003Cagent_name>.\nYou MUST respond ONLY with a single JSON object and nothing else.\nSchema:\n{\n  \"route\": \"\u003CID of the next agent from allowed_routes OR the string 'end'>\",\n  \"content\": \"\u003Cfinal response text for the user (or tool result)>\"\n}\nRules:\n- allowed_routes: [\u003Callowed>]\n- If you want to finish the flow, set route to \"end\".\n- content must contain the user-facing answer (you may include structured data as JSON or Markdown inside content).\n- Do NOT add any commentary outside of the JSON. No leading or trailing text.\n- If using tools, still return the final JSON with tool results summarized in content.\n- Human-friendly route names: \u003Cnames>\n- Human-friendly route roles (optional): \u003Croles>\n\n\u003Chere begins your system instruction>\n```\n\n**INFO:** Agents Builder is in beta.\n\n\n# Token usage calculation\n\n## Input tokens\n\nThe application features a token calculator. It attempts to forecast the number of tokens that \na particular query will consume and displays this estimate in real time. This gives you improved \ncontrol over your token usage. The app provides detailed information about the tokens used for the user's prompt, \nthe system prompt, any additional data, and those used within the context (the memory of previous entries).\n\n**Remember that these are only approximate calculations and do not include, for example, the number of tokens consumed by some plugins. You can find the exact number of tokens used on provider's website.**\n\n![v2_tokens1](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_690f4136eda7.png)\n\n## Total tokens\n\nAfter receiving a response from the model, the application displays the actual total number of tokens used for the query (received from the API).\n\n![v2_tokens2](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_26d89ee90f82.png)\n\n\n# Accessibility\n\nSince version `2.2.8`, PyGPT has added beta support for disabled people and voice control. This may be very useful for blind people.\n\nIn the `Config \u002F Accessibility` menu, you can turn on accessibility features such as:\n\n\n- activating voice control\n\n- translating actions and events on the screen with audio speech\n\n- setting up keyboard shortcuts for actions.\n\n\n**Using voice control**\n\nVoice control can be turned on in two ways: globally, through settings in `Config -> Accessibility`, and by using the `Voice control (inline)` plugin. Both options let you use the same voice commands, but they work a bit differently - the global option allows you to run commands outside of a conversation, anywhere, while the plugin option lets you execute commands directly during a conversation – allowing you to interact with the model and execute commands at the same time, within the conversation.\n\nIn the plugin (inline) option, you can also turn on a special trigger word that will be needed for content to be recognized as a voice command. You can set this up by going to `Plugins -> Settings -> Voice Control (inline)`:\n\n```bash\nMagic prefix for voice commands\n```\n\n**Tip:** When the voice control is enabled via a plugin, simply provide commands while providing the content of the conversation by using the standard `Microphone` button.\n\n\n**Enabling voice control globally**\n\n\nTurn on the voice control option in `Config \u002F Accessibility`:\n\n\n```bash\nEnable voice control (using microphone)\n```\n\nOnce you enable this option, an `Voice Control` button will appear at the bottom right corner of the window. When you click on this button, the microphone will start listening; clicking it again stops listening and starts recognizing the voice command you said. You can cancel voice recording at any time with the `ESC` key. You can also set a keyboard shortcut to turn voice recording on\u002Foff.\n\n\nVoice command recognition works based on a model, so you don't have to worry about saying things perfectly.\n\n\n**Here's a list of commands you can ask for by voice:**\n\n- Get the current application status\n- Exit the application\n- Enable audio output\n- Disable audio output\n- Enable audio input\n- Disable audio input\n- Add a memo to the calendar\n- Clear memos from calendar\n- Read the calendar memos\n- Enable the camera\n- Disable the camera\n- Capture image from camera\n- Create a new context\n- Go to the previous context\n- Go to the next context\n- Go to the latest context\n- Focus on the input\n- Send the input\n- Clear the input\n- Get current conversation info\n- Get available commands list\n- Stop executing current action\n- Clear the attachments\n- Read the last conversation entry\n- Read the whole conversation\n- Rename current context\n- Search for a conversation\n- Clear the search results\n- Send the message to input\n- Append message to current input without sending it\n- Switch to chat mode\n- Switch to chat with files (llama-index) mode\n- Switch to the next mode\n- Switch to the previous mode\n- Switch to the next model\n- Switch to the previous model\n- Add note to notepad\n- Clear notepad contents\n- Read current notepad contents\n- Switch to the next preset\n- Switch to the previous preset\n- Switch to the chat tab\n- Switch to the calendar tab\n- Switch to the draw (painter) tab\n- Switch to the files tab\n- Switch to the notepad tab\n- Switch to the next tab\n- Switch to the previous tab\n- Start listening for voice input\n- Stop listening for voice input\n- Toggle listening for voice input\n\nMore commands coming soon.\n\nJust ask for an action that matches one of the descriptions above. These descriptions are also known to the model, and relevant commands are assigned to them. When you voice a command that fits one of those patterns, the model will trigger the appropriate action.\n\n\nFor convenience, you can enable a short sound to play when voice recording starts and stops. To do this, turn on the option:\n\n\n```bash\nAudio notify microphone listening start\u002Fstop\n```\n\nTo enable a sound notification when a voice command is recognized and command execution begins, turn on the option:\n\n\n```bash\nAudio notify voice command execution\n```\n\nFor voice translation of on-screen events and information about completed commands via speech synthesis, you can turn on the option:\n\n```bash\nUse voice synthesis to describe events on the screen.\n```\n![v2_access](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_433d14c9a2a5.png)\n\n# Configuration\n\n## Settings\n\nThe following basic options can be modified directly within the application:\n\n``` ini\nConfig -> Settings...\n```\n\n![v2_settings](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_ca405d7f7897.png)\n\n**General**\n\n- `Minimize to tray on exit`: Minimize to tray icon on exit. Tray icon enabled is required for this option to work. Default: False.\n\n- `Render engine`: chat output render engine: `WebEngine \u002F Chromium` - for full HTML\u002FCSS and `Legacy (markdown)` for legacy, simple markdown CSS output. Default: WebEngine \u002F Chromium.\n\n- `OpenGL hardware acceleration`: enables hardware acceleration in `WebEngine \u002F Chromium` renderer.  Default: False.\n\n- `Use proxy`: Enable this option to use a proxy for connections to APIs. Default: False.\n\n- `Proxy address`: Proxy address to be used for connection in API SDKs; supports HTTP\u002FSOCKS, e.g. http:\u002F\u002Fproxy.example.com or socks5:\u002F\u002Fuser:pass@host:port\n\n- `Application environment (os.environ)`: Additional environment vars to set on application start.\n\n- `Memory Limit`: Renderer memory limit; set to 0 to disable. If > 0, the app will try to free memory after the limit is reached. Accepted formats: 3.5GB, 2GB, 2048MB, 1_000_000. Minimum: 2GB.\n\n**API Keys**\n\n- `OpenAI API KEY`: Required for the OpenAI API. If you wish to use custom endpoints or local APIs, then you may enter any value here.\n\n- `OpenAI ORGANIZATION KEY`: The organization's API key, which is optional for use within the application.\n\n- `API Endpoint`: OpenAI API endpoint URL, default: https:\u002F\u002Fapi.openai.com\u002Fv1.\n\n- `Anthropic API KEY`: Required for the Anthropic API and Claude models.\n\n- `Deepseek API KEY`: Required for the Deepseek API.\n\n- `Google API KEY`: Required for the Google API and Gemini models.\n\n- `HuggingFace API KEY`: Required for the HuggingFace API.\n\n- `Mistral AI API KEY`: Required for the Mistral AI API.\n\n- `Perplexity API KEY`: Required for the Perplexity API and Sonar models.\n\n- `xAI API KEY`: Required for the xAI API and Grok models.\n\n- `OpenAI API version`: Azure OpenAI API version, e.g. 2023-07-01-preview\n\n- `Azure OpenAI API endpoint`: Azure OpenAI API endpoint, https:\u002F\u002F\u003Cyour-resource-name>.openai.azure.com\u002F\n\n**Layout**\n\n- `Style (chat)`: Chat style (Blocks, or ChatGPT-like, or ChatGPT-like Wide. `WebEngine \u002F Chromium` render mode only.\n\n- `Zoom`: Adjusts the zoom in chat window (web render view). `WebEngine \u002F Chromium` render mode only.\n\n- `Font Size (chat window)`: Adjusts the font size in the chat window (plain-text) and notepads.\n\n- `Font Size (input)`: Adjusts the font size in the input window.\n\n- `Font Size (ctx list)`: Adjusts the font size in contexts list.\n\n- `Font Size (toolbox)`: Adjusts the font size in toolbox on right.\n\n- `Layout density`: Adjusts layout elements density. Default: -1. \n\n- `DPI scaling`: Enable\u002Fdisable DPI scaling. Restart of the application is required for this option to take effect. Default: True. \n\n- `DPI factor`: DPI factor. Restart of the application is required for this option to take effect. Default: 1.0. \n\n- `Auto-collapse user message (px)` - Auto-collapse user message after N pixels of height, set to 0 to disable auto-collapse.\n\n- `Display tips (help descriptions)`: Display help tips, Default: True.\n\n- `Store dialog window positions`: Enable or disable dialogs positions store\u002Frestore, Default: True.\n\n**Code syntax**\n\n- `Code syntax highlight`: Syntax highlight theme in code blocks. `WebEngine \u002F Chromium` render mode only.\n\n- `Disable syntax highlight`: Option to disable syntax highlighting in code blocks. `WebEngine \u002F Chromium` render mode only.\n\n- `Max chars to highlight (static)`: Sets the maximum number of characters to be highlighted in static content. Set to 0 to disable. `WebEngine \u002F Chromium` render mode only.\n\n- `Max lines to highlight (static)`: Sets the maximum number of lines to be highlighted in static content. Set to 0 to disable. `WebEngine \u002F Chromium` render mode only.\n\n- `Max lines to highlight (real-time)`: Sets the maximum number of lines to be highlighted in real-time stream mode. Set to 0 to disable. `WebEngine \u002F Chromium` render mode only.\n\n- `Highlight every N chars (real-time)`: Sets the interval for highlighting every N characters in real-time stream mode. `WebEngine \u002F Chromium` render mode only.\n\n- `Highlight every N line (real-time)`: Sets the interval for highlighting every N lines in real-time stream mode. `WebEngine \u002F Chromium` render mode only.\n\n**Files and attachments**\n\n- `Store attachments in the workdir upload directory`: Enable to store a local copy of uploaded attachments for future use. Default: True\n\n- `Store images, capture and upload in data directory`: Enable to store everything in single data directory. Default: False\n\n- `Allow images as additional context`: If enabled, images can be used as additional context. Default: False\n\n- `Append attachment only once (mode: always)`: If enabled, the sent attachment will be appended once to the sending message, rather than appended every time to the input prompt as additional context. Force mode - affects all models. Default: False\n\n- `Append attachment only once (mode: only if available, auto-detect)`: If enabled, the sent attachment will be appended once to the sending message, if the selected model and API handle the storage of sent messages on the server side. This may optimize token usage by sending attachments only once. Default: True\n\n- `Model for querying index`: Model to use for preparing query and querying the index when the RAG option is selected.\n\n- `Model for attachment content summary`: Model to use when generating a summary for the content of a file when the Summary option is selected.\n\n- `Use history in RAG query`: When enabled, the content of the entire conversation will be used when preparing a query if mode is RAG or Summary.\n\n- `RAG limit`: Only if the option `Use history in RAG query` is enabled. Specify the limit of how many recent entries in the conversation will be used when generating a query for RAG. 0 = no limit.\n\n- `Directory for file downloads`: Subdirectory for downloaded files, e.g. in Assistants mode, inside \"data\". Default: \"download\"\n\n**Context**\n\n- `Context Threshold`: Sets the number of tokens reserved for the model to respond to the next prompt.\n\n- `Limit of last contexts on list to show  (0 = unlimited)`: Limit of the last contexts on list, default: 0 (unlimited)\n\n- `Show context groups on top of the context list`: Display groups on top, default: False\n\n- `Show date separators on the context list`: Show date periods, default: True\n\n- `Show date separators in groups on the context list`: Show date periods in groups, default: True\n\n- `Show date separators in pinned on the context list`: Show date periods in pinned items, default: False\n\n- `Use Context`: Toggles the use of conversation context (memory of previous inputs).\n\n- `Store History`: Toggles conversation history store.\n\n- `Store Time in History`: Chooses whether timestamps are added to the .txt files.\n\n- `Context Auto-summary`: Enables automatic generation of titles for contexts, Default: True.\n\n- `Lock incompatible modes`: If enabled, the app will create a new context when switched to an incompatible mode within an existing context.\n\n- `Search also in conversation content, not only in titles`: When enabled, context search will also consider the content of conversations, not just the titles of conversations.\n\n- `Show LlamaIndex sources`: If enabled, sources utilized will be displayed in the response (if available, it will not work in streamed chat).\n\n- `Show code interpreter output`: If enabled, output from the code interpreter in the Assistant API will be displayed in real-time (in stream mode), Default: True.\n\n- `Use extra context output`: If enabled, plain text output (if available) from command results will be displayed alongside the JSON output, Default: True.\n\n- `Open URLs in built-in browser`: Enable this option to open all URLs in the built-in browser (Chromium) instead of an external browser. Default: False.\n\n- `Model used for auto-summary`: Model used for context auto-summary (generating titles in context list) (default: *gpt-4o-mini*). **Tip:** If you prefer to use local models, you should change the model here as well\n\n**Remote tools**\n\nEnable\u002Fdisable remote tools, like Web Search, MCP or Image generation.\n\nRemote tools are available for these providers, and only via their native SDKs:\n\n- Anthropic\n- Google\n- OpenAI\n- xAI\n\n**Models**\n\n- `Max Output Tokens`: Sets the maximum number of tokens the model can generate for a single response.\n\n- `Max Total Tokens`: Sets the maximum token count that the application can send to the model, including the conversation context.\n\n- `RPM limit`: Sets the limit of maximum requests per minute (RPM), 0 = no limit.\n\n- `Temperature`: Sets the randomness of the conversation. A lower value makes the model's responses more deterministic, while a higher value increases creativity and abstraction.\n\n- `Top-p`: A parameter that influences the model's response diversity, similar to temperature. For more information, please check the OpenAI documentation.\n\n- `Frequency Penalty`: Decreases the likelihood of repetition in the model's responses.\n\n- `Presence Penalty`: Discourages the model from mentioning topics that have already been brought up in the conversation.\n\n**Prompts**\n\n- `Use native API function calls`: Use API function calls to run commands from plugins instead of using command prompts - disabled in Autonomous and Experts modes, default: True\n\n- `Command execute: instruction`: Prompt for appending command execution instructions. Placeholders: {schema}, {extra}\n\n- `Command execute: extra footer (non-Assistant modes)`: Extra footer to append after commands JSON schema.\n\n- `Command execute: extra footer (Assistant mode only)`: PAdditional instructions to separate local commands from the remote environment that is already configured in the Assistants.\n\n- `Context: auto-summary (system prompt)`: System prompt for context auto-summary.\n\n- `Context: auto-summary (user message)`: User message for context auto-summary. Placeholders: {input}, {output}\n\n- `Agent: evaluation prompt in loop (LlamaIndex) - % complete`: Prompt used for evaluating (by % complete) the response in Agents (LlamaIndex\u002FOpenAI) mode.\n\n- `Agent: evaluation prompt in loop (LlamaIndex) - % score`: Prompt used for evaluating (by % score) the response in Agents (LlamaIndex\u002FOpenAI) mode.\n\n- `Agent: system instruction (Legacy)`: Prompt to instruct how to handle autonomous mode.\n\n- `Agent: continue (Legacy)`: Prompt sent to automatically continue the conversation.\n\n- `Agent: continue (always, more steps) (Legacy)`: Prompt sent to always automatically continue the conversation (more reasoning - \"Always continue...\" option).\n\n- `Agent: goal update (Legacy)`: Prompt to instruct how to update current goal status.\n\n- `Experts: Master prompt`: Prompt to instruct how to handle experts.\n\n- `Image generate`: Prompt for generating prompts for image generation (if raw-mode is disabled).\n\n**Images and video**\n\n**Image**\n\n- `Image size`: The resolution of the generated images (DALL-E). Default: 1024x1024.\n\n- `Image quality`: The image quality of the generated images (DALL-E). Default: standard.\n\n- `Prompt generation model`: Model used for generating prompts for image generation (if raw-mode is disabled).\n\n**Video**\n\n- `Aspect ratio`: Specifies the frame aspect ratio (e.g., 16:9, 9:16, 1:1). Availability depends on the selected model.\n\n- `Video duration`: Sets the clip length in seconds; limits may vary by model.\n\n- `FPS`: Determines the frames per second (e.g., 24, 25, 30). Values may be rounded or ignored by the model.\n\n- `Generate audio`: Option to include synthesized background audio if supported by the model.\n\n- `Negative prompt`: Specifies words or phrases to avoid in the output (comma-separated).\n\n- `Prompt enhancement model`: Defines the LLM used to refine your prompt before video generation. This is not the video model.\n\n- `Video resolution`: Sets the target output resolution (e.g., 720p, 1080p). Availability depends on the model.\n\n- `Seed`: Provides an optional random seed for reproducible results; leave empty for random.\n\n**Vision and camera**\n\n- `Camera Input Device`: Video capture camera index (index of the camera, default: 0).\n\n- `Camera capture width (px)`: Video capture resolution (width).\n\n- `Camera capture height (px)`: Video capture resolution (height).\n\n- `Image capture quality`: Video capture image JPEG quality (%).\n\n**Audio**\n\n- `Audio Input Backend`: Selects the backend for audio input (Native\u002FQtMultimedia, PyAudio, PyGame)\n\n- `Audio Input Device`: Selects the audio device for Microphone input.\n\n- `Audio Output Backend`: Selects the backend for audio input (Native\u002FQtMultimedia, PyAudio)\n\n- `Audio Output Device`: Selects the audio device for audio output.\n\n- `Channels`: Input channels, default: 1\n\n- `Sampling Rate`: Sampling rate, default: 44100\n\n- `Use cache`: Use cache for generating audio files.\n\n- `Max files to store`: Max files to store on disk for audio cache.\n\n- `Audio notify microphone listening start\u002Fstop`: enables audio \"tick\" notify when microphone listening started\u002Fended.\n\n- `Continuous Audio Recording (Chunks)`: Enable recording in chunks for long audio recordings in notepad (voice notes).\n\n- `VAD prefix padding (in ms)`:  VAD prefix padding in ms, default: 300ms (Realtime audio mode)\n\n- `VAD end silence (in ms)`: VAD end silence in ms, default: 2000ms (Realtime audio mode)\n\n**Indexes \u002F LlamaIndex**\n\n**General**\n\n- `Indexes`: List of created indexes.\n\n**Vector Store**\n\n- `Vector Store`: Vector store to use (vector database provided by LlamaIndex).\n\n- `Vector Store (**kwargs)`: Keyword arguments for vector store provider (api_key, index_name, etc.).\n\n**Chat**\n\n- `Chat mode`: LlamIndex chat mode for use in query engine, default: context\n\n- `Use ReAct agent for Tool calls in Chat with Files mode`: Enable ReAct agent for tool calls in Chat with Files mode.\n\n- `Auto-retrieve additional context`: Enable automatic retrieve of additional context from vector store in every query.\n\n**Embeddings**\n\n- `Embeddings provider`: Global embeddings provider (for indexing and Chat with Files).\n\n- `Embeddings provider (ENV)`: ENV vars for global embeddings provider (API keys, etc.).\n\n- `Embeddings provider (**kwargs)`: Keyword arguments for global embeddings provider (model_name, etc.).\n\n- `Default embedding providers for attachments`: Define embedding model by provider to use in attachments.\n\n- `RPM limit for embeddings API calls`: Specify the limit of maximum requests per minute (RPM), 0 = no limit.\n\n**Indexing**\n\n- `Recursive directory indexing`: Enables recursive directory indexing, default is False.\n\n- `Replace old document versions in the index during re-indexing`: If enabled, previous versions of documents will be deleted from the index when the newest versions are indexed, default is True.\n\n- `Excluded file extensions`: File extensions to exclude if no data loader for this extension, separated by comma.\n\n- `Force exclude files`: If enabled, the exclusion list will be applied even when the data loader for the extension is active. Default: False.\n\n- `Stop indexing on error`: If enabled, indexing will stop whenever an error occurs Default: True.\n\n- `Custom metadata to append\u002Freplace to indexed documents (files)`: Define custom metadata key => value fields for specified file extensions, separate extensions by comma.\\nAllowed placeholders: {path}, {relative_path} {filename}, {dirname}, {relative_dir} {ext}, {size}, {mtime}, {date}, {date_time}, {time}, {timestamp}. Use * (asterisk) as extension if you want to apply field to all files. Set empty value to remove field with specified key from metadata.\n\n- `Custom metadata to append\u002Freplace to indexed documents (web)`: Define custom metadata key => value fields for specified external data loaders.\\nAllowed placeholders: {date}, {date_time}, {time}, {timestamp} + {data loader args}\n\n**Data Loaders**\n\n- `Additional keyword arguments (**kwargs) for data loaders`: Additional keyword arguments, such as settings, API keys, for the data loader. These arguments will be passed to the loader; please refer to the LlamaIndex or LlamaHub loaders reference for a list of allowed arguments for the specified data loader.\n\n- `Use local models in Video\u002FAudio and Image (vision) loaders`: Enables usage of local models in Video\u002FAudio and Image (vision) loaders. If disabled then API models will be used (GPT-4 Vision and Whisper). Note: local models will work only in Python version (not compiled\u002FSnap). Default: False.\n\n**Update**\n\n- `Auto-index DB in real time`: Enables conversation context auto-indexing in defined modes.\n\n- `ID of index for auto-indexing`: Index to use if auto-indexing of conversation context is enabled.\n\n- `Enable auto-index in modes`: List of modes with enabled context auto-index, separated by comma.\n\n- `DB (ALL), DB (UPDATE), FILES (ALL)`: Index the data – batch indexing is available here.\n\n**Agent and experts**\n\n**General**\n\n- `Auto retrieve additional context from RAG`: Auto retrieve additional context from RAG at the beginning if the index is provided.\n\n- `Display a tray notification when the goal is achieved.`: If enabled, a notification will be displayed after goal achieved \u002F finished run.\n\n- `Display full agent output in chat window`: If enabled, a real-time output from agent reasoning will be displayed with the response.\n\n**Agents (LlamaIndex \u002F OpenAI)**\n\n- `Max steps (per iteration)`: Max steps is one iteration before goal achieved\n\n- `Max evaluation steps in loop`: Maximum evaluation steps to achieve the final result, set 0 to infinity\n\n- `Model for evaluation`: Model used for evaluation with score\u002Fpercentage (loop). If not selected, then current active model will be used.\n\n- `Append and compare previous evaluation prompt in next evaluation`: If enabled, previous improvement prompt will be checked in next eval in loop, default: False\n\n- `Split response messages`: Split response messages to separated context items in OpenAI Agents mode.\n\n**Autonomous (Legacy agents)**\n\n- `Sub-mode for agents`: Sub-mode to use in Agent (Autonomous) mode (chat, llama_index, etc.). Default: chat.\n\n- `Index to use`: Only if sub-mode is llama_index (Chat with files), choose the index to use in both Agent and Expert modes.\n\n- `Use native API function calls`: Use API function calls to run tools from plugins instead of using command prompts - Autonomous agent mode only, default: False\n\n- `Use Responses API in Agent mode`: Use Responses API instead of ChatCompletions API in Agent (autonomous) mode. OpenAI models only. Default: False\n\n**Experts**\n\n- `Sub-mode for experts`: Sub-mode to use in Experts mode (chat, llama_index, etc.). Default: chat.\n\n- `Use agent for expert reasoning`: If enabled, the ReAct agent will be used for expert calls and expert reasoning. Default: True\n\n- `Use native API function calls`: Use API function calls to run tools from plugins instead of using command prompts - Experts only, default: False\n\n- `Use Responses API in Experts mode (master)`: Use Responses API instead of ChatCompletions API in Experts (master model). OpenAI models only. Default: False\n\n- `Use Responses API in Experts (slaves)`: Use Responses API instead of ChatCompletions API for Expert instances (slave models). OpenAI models only. Default: False\n\n**Accessibility**\n\n- `Enable voice control (using microphone)`: enables voice control (using microphone and defined commands).\n\n- `Model`: model used for voice command recognition.\n\n- `Use voice synthesis to describe events on the screen.`: enables audio description of on-screen events.\n\n- `Use audio output cache`: If enabled, all static audio outputs will be cached on the disk instead of being generated every time. Default: True.\n\n- `Audio notify voice command execution`: enables audio \"tick\" notify when voice command is executed.\n\n- `Control shortcut keys`: configuration for keyboard shortcuts for a specified actions.\n\n- `Blacklist for voice synthesis events describe (ignored events)`: list of muted events for 'Use voice synthesis to describe event' option.\n\n- `Voice control actions blacklist`: Disable actions in voice control; add actions to the blacklist to prevent execution through voice commands.\n\n**Personalize**\n\n- `About You`: Provide information about yourself, e.g., \"My name is... I'm 30 years old, I'm interested in...\" This will be included in the model's system prompt. **WARNING:** Please do not use AI as a \"friend\". Real-life friendship is better than using an AI as a friendship replacement. DO NOT become emotionally involved in interactions with an AI.\n\n- `Enable in Modes`: Select the modes where the personalized \"about\" prompt will be used.\n\n**Updates**\n\n- `Check for updates on start`: Enables checking for updates on start. Default: True.\n\n- `Check for updates in background`: Enables checking for updates in background (checking every 5 minutes). Default: True.\n\n**Debug**\n\n- `Show debug menu`: Enables debug (developer) menu.\n\n- `Log level`: toggle log level (ERROR|WARNING|INFO|DEBUG)\n\n- `Log and debug context`: Enables logging of context input\u002Foutput.\n\n- `Log and debug events`: Enables logging of event dispatch.\n\n- `Log plugin usage to console`: Enables logging of plugin usage to console.\n\n- `Log DALL-E usage to console`: Enables logging of DALL-E usage to console.\n\n- `Log attachments usage to console`: Enables logging of attachments usage to console.\n\n- `Log Agents usage to console`: Enables logging of Agents usage to console.\n\n- `Log LlamaIndex usage to console`: Enables logging of LlamaIndex usage to console.\n\n- `Log Assistants usage to console`: Enables logging of Assistants API usage to console.\n\n\n## JSON files\n\nThe configuration is stored in JSON files for easy manual modification outside of the application. \nThese configuration files are located in the user's work directory within the following subdirectory:\n\n``` ini\n{HOME_DIR}\u002F.config\u002Fpygpt-net\u002F\n```\n\n## Manual configuration\n\nYou can manually edit the configuration files in this directory (this is your work directory):\n\n``` ini\n{HOME_DIR}\u002F.config\u002Fpygpt-net\u002F\n```\n\n- `assistants.json` - stores the list of assistants.\n- `attachments.json` - stores the list of current attachments.\n- `config.json` - stores the main configuration settings.\n- `models.json` - stores models configurations.\n- `cache` - a directory for audio cache.\n- `capture` - a directory for captured images from camera and screenshots\n- `css` - a directory for CSS stylesheets (user override)\n- `history` - a directory for context history in `.txt` format.\n- `idx` - `LlamaIndex` indexes\n- `img` - a directory for images generated with `DALL-E 3` and `DALL-E 2`, saved as `.png` files.\n- `locale` - a directory for locales (user override)\n- `data` - a directory for data files and files downloaded\u002Fgenerated by models.\n- `presets` - a directory for presets stored as `.json` files.\n- `upload` - a directory for local copies of attachments coming from outside the workdir\n- `db.sqlite` - a database with contexts, notepads and indexes data records\n- `app.log` - a file with error and debug log\n\n---\n\n## Setting the Working Directory Using Command Line Arguments\n\nTo set the current working directory using a command-line argument, use:\n\n```\npython3 .\u002Frun.py --workdir=\"\u002Fpath\u002Fto\u002Fworkdir\"\n```\nor, for the binary version:\n\n```\npygpt.exe --workdir=\"\u002Fpath\u002Fto\u002Fworkdir\"\n```\n\n\n## Translations \u002F Locale\n\nLocale `.ini` files are located in the app directory:\n\n``` ini\n.\u002Fdata\u002Flocale\n```\n\nThis directory is automatically scanned when the application launches. To add a new translation, \ncreate and save the file with the appropriate name, for example:\n\n``` ini\nlocale.es.ini   \n```\n\nThis will add Spanish as a selectable language in the application's language menu.\n\n**Overwriting CSS and locales with Your Own Files:**\n\nYou can also overwrite files in the `locale` and `css` app directories with your own files in the user directory. \nThis allows you to overwrite language files or CSS styles in a very simple way - by just creating files in your working directory.\n\n\n``` ini\n{HOME_DIR}\u002F.config\u002Fpygpt-net\u002F\n```\n\n- `locale` - a directory for locales in `.ini` format.\n- `css` - a directory for CSS styles in `.css` format.\n\n**Adding Your Own Fonts**\n\nYou can add your own fonts and use them in CSS files. To load your own fonts, you should place them in the `%workdir%\u002Ffonts` directory. Supported font types include: `otf`, `ttf`.\nYou can see the list of loaded fonts in `Debug \u002F Config`.\n\n**Example:**\n\n```\n%workdir%\n|_css\n|_data\n|_fonts\n   |_MyFont\n     |_MyFont-Regular.ttf\n     |_MyFont-Bold.ttf\n     |...\n```\n\n```css\npre {{\n    font-family: 'MyFont';\n}}\n```\n\n## Data Loaders\n\n**Configuring data loaders**\n\nIn the `Settings -> LlamaIndex -> Data loaders` section you can define the additional keyword arguments to pass into data loader instance.\n\nIn most cases, an internal LlamaIndex loaders are used internally. \nYou can check these base loaders e.g. here:\n\nFile: https:\u002F\u002Fgithub.com\u002Frun-llama\u002Fllama_index\u002Ftree\u002Fmain\u002Fllama-index-integrations\u002Freaders\u002Fllama-index-readers-file\u002Fllama_index\u002Freaders\u002Ffile\n\nWeb: https:\u002F\u002Fgithub.com\u002Frun-llama\u002Fllama_index\u002Ftree\u002Fmain\u002Fllama-index-integrations\u002Freaders\u002Fllama-index-readers-web\n\n**Tip:** to index an external data or data from the Web just ask for it, by using `Web Search` plugin, e.g. you can ask the model with `Please index the youtube video: URL to video`, etc. Data loader for a specified content will be choosen automatically.\n\nAllowed additional keyword arguments for built-in data loaders (files):\n\n**CSV Files**  (file_csv)\n\n- `concat_rows` - bool, default: `True`\n- `encoding` - str, default: `utf-8`\n\n**HTML Files**  (file_html)\n\n- `tag` - str, default: `section`\n- `ignore_no_id` - bool, default: `False`\n\n**Image (vision)**  (file_image_vision)\n\nThis loader can operate in two modes: local model and API.\nIf the local mode is enabled, then the local model will be used. The local mode requires a Python\u002FPyPi version of the application and is not available in the compiled or Snap versions.\nIf the API mode (default) is selected, then the OpenAI API and the standard vision model will be used. \n\n**Note:** Usage of API mode consumes additional tokens in OpenAI API (for `GPT-4 Vision` model)!\n\nLocal mode requires `torch`, `transformers`, `sentencepiece` and `Pillow` to be installed and uses the `Salesforce\u002Fblip2-opt-2.7b` model to describing images.\n\n- `keep_image` - bool, default: `False`\n- `local_prompt` - str, default: `Question: describe what you see in this image. Answer:`\n- `api_prompt` - str, default: `Describe what you see in this image` - Prompt to use in API\n- `api_model` - str, default: `gpt-4-vision-preview` - Model to use in API\n- `api_tokens` - int, default: `1000` - Max output tokens in API\n\n**IPYNB Notebook files**  (file_ipynb)\n\n- `parser_config` - dict, default: `None`\n- `concatenate` - bool, default: `False`\n\n**Markdown files**  (file_md)\n\n- `remove_hyperlinks` - bool, default: `True`\n- `remove_images` - bool, default: `True`\n\n**PDF documents**  (file_pdf)\n\n- `return_full_document` - bool, default: `False`\n\n**Video\u002FAudio**  (file_video_audio)\n\nThis loader can operate in two modes: local model and API.\nIf the local mode is enabled, then the local `Whisper` model will be used. The local mode requires a Python\u002FPyPi version of the application and is not available in the compiled or Snap versions.\nIf the API mode (default) is selected, then the currently selected provider in `Audio Input` plugin will be used. If the `OpenAI Whisper` is chosen then the OpenAI API and the API Whisper model will be used. \n\n**Note:** Usage of Whisper via API consumes additional tokens in OpenAI API (for `Whisper` model)!\n\nLocal mode requires `torch` and `openai-whisper` to be installed and uses the `Whisper` model locally to transcribing video and audio.\n\n- `model_version` - str, default: `base` - Whisper model to use, available models: https:\u002F\u002Fgithub.com\u002Fopenai\u002Fwhisper\n\n**XML files**  (file_xml)\n\n- `tree_level_split` - int, default: `0`\n\nAllowed additional keyword arguments for built-in data loaders (Web and external content):\n\n**Bitbucket**  (web_bitbucket)\n\n- `username` - str, default: `None`\n- `api_key` - str, default: `None`\n- `extensions_to_skip` - list, default: `[]`\n\n**ChatGPT Retrieval**  (web_chatgpt_retrieval)\n\n- `endpoint_url` - str, default: `None`\n- `bearer_token` - str, default: `None`\n- `retries` - int, default: `None`\n- `batch_size` - int, default: `100`\n\n**Google Calendar** (web_google_calendar)\n\n- `credentials_path` - str, default: `credentials.json`\n- `token_path` - str, default: `token.json`\n\n**Google Docs** (web_google_docs)\n\n- `credentials_path` - str, default: `credentials.json`\n- `token_path` - str, default: `token.json`\n\n**Google Drive** (web_google_drive)\n\n- `credentials_path` - str, default: `credentials.json`\n- `token_path` - str, default: `token.json`\n- `pydrive_creds_path` - str, default: `creds.txt`\n- `client_config` - dict, default: `{}`\n\n**Google Gmail** (web_google_gmail)\n\n- `credentials_path` - str, default: `credentials.json`\n- `token_path` - str, default: `token.json`\n- `use_iterative_parser` - bool, default: `False`\n- `max_results` - int, default: `10`\n- `results_per_page` - int, default: `None`\n\n**Google Keep** (web_google_keep)\n\n- `credentials_path` - str, default: `keep_credentials.json`\n\n**Google Sheets** (web_google_sheets)\n\n- `credentials_path` - str, default: `credentials.json`\n- `token_path` - str, default: `token.json`\n\n**GitHub Issues**  (web_github_issues)\n\n- `token` - str, default: `None`\n- `verbose` - bool, default: `False`\n\n**GitHub Repository**  (web_github_repository)\n\n- `token` - str, default: `None`\n- `verbose` - bool, default: `False`\n- `concurrent_requests` - int, default: `5`\n- `timeout` - int, default: `5`\n- `retries` - int, default: `0`\n- `filter_dirs_include` - list, default: `None`\n- `filter_dirs_exclude` - list, default: `None`\n- `filter_file_ext_include` - list, default: `None`\n- `filter_file_ext_exclude` - list, default: `None`\n\n**Microsoft OneDrive**  (web_microsoft_onedrive)\n\n- `client_id` - str, default: `None`\n- `client_secret` - str, default: `None`\n- `tenant_id` - str, default: `consumers`\n\n**Sitemap (XML)**  (web_sitemap)\n\n- `html_to_text` - bool, default: `False`\n- `limit` - int, default: `10`\n\n**SQL Database**  (web_database)\n\n- `uri` - str, default: `None`\n\nYou can provide a single URI in the form of: `{scheme}:\u002F\u002F{user}:{password}@{host}:{port}\u002F{dbname}`, or you can provide each field manually:\n\n- `scheme` - str, default: `None`\n- `host` - str, default: `None`\n- `port` - str, default: `None`\n- `user` - str, default: `None`\n- `password` - str, default: `None`\n- `dbname` - str, default: `None`\n\n**Twitter\u002FX posts**  (web_twitter)\n\n- `bearer_token` - str, default: `None`\n- `num_tweets` - int, default: `100`\n\n## Vector stores\n\n**Available vector stores** (provided by `LlamaIndex`):\n\n```\n- ChromaVectorStore\n- ElasticsearchStore\n- PinecodeVectorStore\n- QdrantVectorStore\n- RedisVectorStore\n- SimpleVectorStore\n```\n\nYou can configure selected vector store by providing config options like `api_key`, etc. in `Settings -> LlamaIndex` window. \n\nArguments provided here (on list: `Vector Store (**kwargs)` in `Advanced settings` will be passed to selected vector store provider. You can check keyword arguments needed by selected provider on LlamaIndex API reference page: \n\nhttps:\u002F\u002Fdocs.llamaindex.ai\u002Fen\u002Fstable\u002Fapi_reference\u002Fstorage\u002Fvector_store.html\n\nWhich keyword arguments are passed to providers?\n\nFor `ChromaVectorStore` and `SimpleVectorStore` all arguments are set by PyGPT and passed internally (you do not need to configure anything).\n\nFor other providers you can provide these arguments:\n\n**ElasticsearchStore**\n\nKeyword arguments for ElasticsearchStore(`**kwargs`):\n\n- `index_name` (default: current index ID, already set, not required)\n- any other keyword arguments provided on list\n\n**PinecodeVectorStore**\n\nKeyword arguments for Pinecone(`**kwargs`):\n\n- `api_key`\n- index_name (default: current index ID, already set, not required)\n\n**QdrantVectorStore**\n\nKeyword arguments for QdrantVectorStore(`**kwargs`):\n\n- `url` - str, default: `http:\u002F\u002Flocalhost:6333`\n- `api_key` - str, default: `None` (for Qdrant Cloud)\n- `collection_name` (default: current index ID, already set, not required)\n- any other keyword arguments provided on list\n\n**RedisVectorStore**\n\nKeyword arguments for RedisVectorStore(`**kwargs`):\n\n- `index_name` (default: current index ID, already set, not required)\n- any other keyword arguments provided on list\n\nYou can extend list of available providers by creating custom provider and registering it on app launch.\n\nBy default, you are using chat-based mode when using `Chat with Files`.\nIf you want to only query index (without chat) you can enable `Query index only (without chat)` option.\n\n### Adding custom vector stores and data loaders\n\nYou can create a custom vector store provider or data loader for your data and develop a custom launcher for the application. \n\nSee the section `Extending PyGPT \u002F Adding a custom Vector Store provider` for more details.\n\n# Updates\n\n### Updating PyGPT\n\n**PyGPT** comes with an integrated update notification system. When a new version with additional features is released, you'll receive an alert within the app. \n\nTo get the new version, simply download it and start using it in place of the old one. All your custom settings like configuration, presets, indexes, and past conversations will be kept and ready to use right away in the new version.\n\n# Debugging and Logging\n\nIn `Settings -> Developer` dialog, you can enable the `Show debug menu` option to turn on the debugging menu. The menu allows you to inspect the status of application elements. In the debugging menu, there is a `Logger` option that opens a log window. In the window, the program's operation is displayed in real-time.\n\n**Logging levels**:\n\nBy default, all errors and exceptions are logged to the file:\n\n```ini\n{HOME_DIR}\u002F.config\u002Fpygpt-net\u002Fapp.log\n```\n\nTo increase the logging level (`ERROR` level is default), run the application with `--debug` argument:\n\n``` ini\npython3 run.py --debug=1\n```\n\nor\n\n```ini\npython3 run.py --debug=2\n```\n\nThe value `1` enables the `INFO`logging level.\n\nThe value `2` enables the `DEBUG` logging level (most information).\n\n**Compatibility (legacy) mode**\n\nIf you have a problems with `WebEngine \u002F Chromium` renderer you can force the legacy mode by launching the app with command line arguments:\n\n``` ini\npython3 run.py --legacy=1\n```\n\nand to force disable OpenGL hardware acceleration:\n\n``` ini\npython3 run.py --disable-gpu=1\n```\n\nYou can also manualy enable legacy mode by editing config file - open the `%WORKDIR%\u002Fconfig.json` config file in editor and set the following options:\n\n``` json\n\"render.engine\": \"legacy\",\n\"render.open_gl\": false,\n```\n\n# Extending PyGPT\n\n## Quick start\n\nYou can create your own extensions for **PyGPT** at any time.\n\nPyGPT can be extended with:\n\n- custom models\n\n- custom plugins\n\n- custom LLM wrappers\n\n- custom vector store providers\n\n- custom data loaders\n\n- custom audio input providers\n\n- custom audio output providers\n\n- custom web search engine providers\n\n- custom agents (LlamaIndex or OpenAI Agents)\n\n**Examples (tutorial files)** \n\nSee the `examples` directory in this repository with examples of custom launcher, plugin, vector store, LLM (LlamaIndex) provider and data loader:\n\n- `examples\u002Fcustom_launcher.py`\n\n- `examples\u002Fexample_audio_input.py`\n\n- `examples\u002Fexample_audio_output.py`\n\n- `examples\u002Fexample_data_loader.py`\n\n- `examples\u002Fexample_llm.py`\n\n- `examples\u002Fexample_plugin.py`\n\n- `examples\u002Fexample_vector_store.py`\n\n- `examples\u002Fexample_web_search.py`\n\nThese example files can be used as a starting point for creating your own extensions for **PyGPT**.\n\nExtending PyGPT with custom plugins, LLMs wrappers and vector stores:\n\n- You can pass custom plugin instances, LLMs wrappers and vector store providers to the launcher.\n\n- This is useful if you want to extend PyGPT with your own plugins, vectors storage and LLMs.\n\nTo register custom plugins:\n\n- Pass a list with the plugin instances as `plugins` keyword argument.\n\nTo register custom LLMs wrappers:\n\n- Pass a list with the LLMs wrappers instances as `llms` keyword argument.\n\nTo register custom vector store providers:\n\n- Pass a list with the vector store provider instances as `vector_stores` keyword argument.\n\nTo register custom data loaders:\n\n- Pass a list with the data loader instances as `loaders` keyword argument.\n\nTo register custom audio input providers:\n\n- Pass a list with the audio input provider instances as `audio_input` keyword argument.\n\nTo register custom audio output providers:\n\n- Pass a list with the audio output provider instances as `audio_output` keyword argument.\n\nTo register custom web providers:\n\n- Pass a list with the web provider instances as `web` keyword argument.\n\n## Adding a custom model\n\nTo add a new model using the OpenAI API or LlamaIndex wrapper, use the editor in `Config -> Models` or manually edit the `models.json` file by inserting the model's configuration details. If you are adding a model via LlamaIndex, ensure to include the model's name, its supported modes (either `chat`, `completion`, or both), the LLM provider (such as `OpenAI` or `HuggingFace`), and, if you are using an external API-based model, an optional `API KEY` along with any other necessary environment settings.\n\nExample of models configuration - `%WORKDIR%\u002Fmodels.json`:\n\n```\n\"gpt-3.5-turbo\": {\n    \"id\": \"gpt-3.5-turbo\",\n    \"name\": \"gpt-3.5-turbo\",\n    \"mode\": [\n        \"chat\",\n        \"assistant\",\n        \"langchain\",\n        \"llama_index\"\n    ],\n    \"provider\": \"openai\"\n    \"llama_index\": {\n        \"args\": [\n            {\n                \"name\": \"model\",\n                \"value\": \"gpt-3.5-turbo\",\n                \"type\": \"str\"\n            }\n        ],\n        \"env\": [\n            {\n                \"name\": \"OPENAI_API_KEY\",\n                \"value\": \"{api_key}\"\n            }\n        ]\n    },\n    \"ctx\": 4096,\n    \"tokens\": 4096,\n    \"default\": false\n},\n```\n\nThere is built-in support for those LLM providers:\n\n- Anthropic\n- Azure OpenAI\n- Deepseek API\n- Google\n- HuggingFace\n- Local models (OpenAI API compatible)\n- Ollama\n- OpenAI\n- OpenRouter\n- Perplexity\n- xAI\n\n**Tip**: `{api_key}` in `models.json` is a placeholder for the main OpenAI API KEY from the settings. It will be replaced by the configured key value.\n\n## Adding a custom plugin\n\n### Creating Your Own Plugin\n\nYou can create your own plugin for **PyGPT**. The plugin can be written in Python and then registered with the application just before launching it. All plugins included with the app are stored in the `plugin` directory - you can use them as coding examples for your own plugins.\n\n**Examples (tutorial files)** \n\nSee the example plugin in this `examples` directory:\n\n- `examples\u002Fexample_plugin.py`\n\nThese example file can be used as a starting point for creating your own plugin for **PyGPT**.\n\nTo register a custom plugin:\n\n- Create a custom launcher for the app.\n\n- Pass a list with the custom plugin instances as `plugins` keyword argument.\n\n**Example of a custom launcher:**\n\n\n```python\n# custom_launcher.py\n\nfrom pygpt_net.app import run\nfrom plugins import CustomPlugin, OtherCustomPlugin\nfrom llms import CustomLLM\nfrom vector_stores import CustomVectorStore\n\nplugins = [\n    CustomPlugin(),\n    OtherCustomPlugin(),\n]\nllms = [\n    CustomLLM(),\n]\nvector_stores = [\n    CustomVectorStore(),\n]\n\nrun(\n    plugins=plugins,\n    llms=llms,\n    vector_stores=vector_stores,\n)\n```\n\n### Handling events\n\nIn the plugin, you can receive and modify dispatched events.\nTo do this, create a method named `handle(self, event, *args, **kwargs)` and handle the received events like here:\n\n```python\n# custom_plugin.py\n\nfrom pygpt_net.core.events import Event\n\n\ndef handle(self, event: Event, *args, **kwargs):\n    \"\"\"\n    Handle dispatched events\n\n    :param event: event object\n    \"\"\"\n    name = event.name\n    data = event.data\n    ctx = event.ctx\n\n    if name == Event.INPUT_BEFORE:\n        self.some_method(data['value'])\n    elif name == Event.CTX_BEGIN:\n        self.some_other_method(ctx)\n    else:\n    \t# ...\n```\n\n### List of Events\n\nEvent names are defined in `Event` class in `pygpt_net.core.events`.\n\nSyntax: `event name` - triggered on, `event data` *(data type)*:\n\n- `AI_NAME` - when preparing an AI name, `data['value']` *(string, name of the AI assistant)*\n\n- `AGENT_PROMPT` - on agent prompt in eval mode, `data['value']` *(string, prompt)*\n\n- `AUDIO_INPUT_RECORD_START` - start audio input recording\n\n- `AUDIO_INPUT_RECORD_STOP` -  stop audio input recording\n\n- `AUDIO_INPUT_RECORD_TOGGLE` - toggle audio input recording\n\n- `AUDIO_INPUT_TRANSCRIBE` - on audio file transcribe, `data['path']` *(string, path to audio file)*\n\n- `AUDIO_INPUT_STOP` - force stop audio input\n\n- `AUDIO_INPUT_TOGGLE` - when speech input is enabled or disabled, `data['value']` *(bool, True\u002FFalse)*\n\n- `AUDIO_OUTPUT_STOP` - force stop audio output\n\n- `AUDIO_OUTPUT_TOGGLE` - when speech output is enabled or disabled, `data['value']` *(bool, True\u002FFalse)*\n\n- `AUDIO_READ_TEXT` - on text read using speech synthesis, `data['text']` *(str, text to read)*\n\n- `CMD_EXECUTE` - when a command is executed, `data['commands']` *(list, commands and arguments)*\n\n- `CMD_INLINE` - when an inline command is executed, `data['commands']` *(list, commands and arguments)*\n\n- `CMD_SYNTAX` - when appending syntax for commands, `data['prompt'], data['syntax']` *(string, list, prompt and list with commands usage syntax)*\n\n- `CMD_SYNTAX_INLINE` - when appending syntax for commands (inline mode), `data['prompt'], data['syntax']` *(string, list, prompt and list with commands usage syntax)*\n\n- `CTX_AFTER` - after the context item is sent, `ctx`\n\n- `CTX_BEFORE` - before the context item is sent, `ctx`\n\n- `CTX_BEGIN` - when context item create, `ctx`\n\n- `CTX_END` - when context item handling is finished, `ctx`\n\n- `CTX_SELECT` - when context is selected on list, `data['value']` *(int, ctx meta ID)*\n\n- `DISABLE` - when the plugin is disabled, `data['value']` *(string, plugin ID)*\n\n- `ENABLE` - when the plugin is enabled, `data['value']` *(string, plugin ID)*\n\n- `FORCE_STOP` - on force stop plugins\n\n- `INPUT_BEFORE` - upon receiving input from the textarea, `data['value']` *(string, text to be sent)*\n\n- `MODE_BEFORE` - before the mode is selected `data['value'], data['prompt']` *(string, string, mode ID)*\n\n- `MODE_SELECT` - on mode select `data['value']` *(string, mode ID)*\n\n- `MODEL_BEFORE` - before the model is selected `data['value']` *(string, model ID)*\n\n- `MODEL_SELECT` - on model select `data['value']` *(string, model ID)*\n\n- `PLUGIN_SETTINGS_CHANGED` - on plugin settings update (saving settings)\n\n- `PLUGIN_OPTION_GET` - on request for plugin option value `data['name'], data['value']` *(string, any, name of requested option, value)*\n\n- `POST_PROMPT` - after preparing a system prompt, `data['value']` *(string, system prompt)*\n\n- `POST_PROMPT_ASYNC` - after preparing a system prompt, just before request in async thread, `data['value']` *(string, system prompt)*\n\n- `POST_PROMPT_END` - after preparing a system prompt, just before request in async thread, at the very end `data['value']` *(string, system prompt)*\n\n- `PRE_PROMPT` - before preparing a system prompt, `data['value']` *(string, system prompt)*\n\n- `SYSTEM_PROMPT` - when preparing a system prompt, `data['value']` *(string, system prompt)*\n\n- `TOOL_OUTPUT_RENDER` - when rendering extra content from tools from plugins, `data['content']` *(string, content)*\n\n- `UI_ATTACHMENTS` - when the attachment upload elements are rendered, `data['value']` *(bool, show True\u002FFalse)*\n\n- `UI_VISION` - when the vision elements are rendered, `data['value']` *(bool, show True\u002FFalse)*\n\n- `USER_NAME` - when preparing a user's name, `data['value']` *(string, name of the user)*\n\n- `USER_SEND` - just before the input text is sent, `data['value']` *(string, input text)*\n\n\nYou can stop the propagation of a received event at any time by setting `stop` to `True`:\n\n```\nevent.stop = True\n```\n\nEvents flow can be debugged by enabling the option `Config -> Settings -> Developer -> Log and debug events`.\n\n## Adding a custom LLM provider\n\nHandling LLMs with LlamaIndex is implemented through separated wrappers. This allows for the addition of support for any provider and model available via LlamaIndex. All built-in wrappers for the models and its providers are placed in the `pygpt_net.provider.llms`.\n\nThese wrappers are loaded into the application during startup using `launcher.add_llm()` method:\n\n```python\n# app.py\n\nfrom pygpt_net.provider.api.openai import OpenAILLM\nfrom pygpt_net.provider.llms.azure_openai import AzureOpenAILLM\nfrom pygpt_net.provider.llms.anthropic import AnthropicLLM\nfrom pygpt_net.provider.llms.hugging_face import HuggingFaceLLM\nfrom pygpt_net.provider.llms.ollama import OllamaLLM\nfrom pygpt_net.provider.llms.google import GoogleLLM\n\n\ndef run(**kwargs):\n    \"\"\"Runs the app.\"\"\"\n    # Initialize the app\n    launcher = Launcher()\n    launcher.init()\n\n    # Register plugins\n    ...\n\n    # Register langchain and llama-index LLMs wrappers\n    launcher.add_llm(OpenAILLM())\n    launcher.add_llm(AzureOpenAILLM())\n    launcher.add_llm(AnthropicLLM())\n    launcher.add_llm(HuggingFaceLLM())\n    launcher.add_llm(OllamaLLM())\n    launcher.add_llm(GoogleLLM())\n\n    # Launch the app\n    launcher.run()\n```\n\nTo add support for providers not included by default, you can create your own wrapper that returns a custom model to the application and then pass this custom wrapper to the launcher.\n\nExtending **PyGPT** with custom plugins and LLM wrappers is straightforward:\n\n- Pass instances of custom plugins and LLM wrappers directly to the launcher.\n\nTo register custom LLM wrappers:\n\n- Provide a list of LLM wrapper instances as `llms` keyword argument.\n\n**Example:**\n\n\n```python\n# launcher.py\n\nfrom pygpt_net.app import run\nfrom plugins import CustomPlugin, OtherCustomPlugin\nfrom llms import CustomLLM\n\nplugins = [\n    CustomPlugin(),\n    OtherCustomPlugin(),\n]\nllms = [\n    CustomLLM(),  # \u003C--- custom LLM provider (wrapper)\n]\nvector_stores = []\n\nrun(\n    plugins=plugins, \n    llms=llms, \n    vector_stores=vector_stores,\n)\n```\n\n**Examples (tutorial files)** \n\nSee the `examples` directory in this repository with examples of custom launcher, plugin, vector store, LLM provider and data loader:\n\n- `examples\u002Fcustom_launcher.py`\n\n- `examples\u002Fexample_audio_input.py`\n\n- `examples\u002Fexample_audio_output.py`\n\n- `examples\u002Fexample_data_loader.py`\n\n- `examples\u002Fexample_llm.py`  \u003C-- use it as an example\n\n- `examples\u002Fexample_plugin.py`\n\n- `examples\u002Fexample_vector_store.py`\n\n- `examples\u002Fexample_web_search.py`\n\nThese example files can be used as a starting point for creating your own extensions for **PyGPT**.\n\nTo integrate your own model or provider into **PyGPT**, you can also reference the classes located in the `pygpt_net.provider.llms`. These samples can act as an more complex example for your custom class. Ensure that your custom wrapper class includes two essential methods: `chat` and `completion`. These methods should return the respective objects required for the model to operate in `chat` and `completion` modes.\n\nEvery single LLM provider (wrapper) inherits from `BaseLLM` class and can provide 2 components: provider for LlamaIndex, and provider for Embeddings.\n\n\n## Adding a custom vector store provider\n\nYou can create a custom vector store provider or data loader for your data and develop a custom launcher for the application. To register your custom vector store provider or data loader, simply register it by passing the vector store provider instance to `vector_stores` keyword argument and loader instance in the `loaders` keyword argument:\n\n\n```python\n# app.py\n\n# vector stores\nfrom pygpt_net.provider.vector_stores.chroma import ChromaProvider\nfrom pygpt_net.provider.vector_stores.elasticsearch import ElasticsearchProvider\nfrom pygpt_net.provider.vector_stores.pinecode import PinecodeProvider\nfrom pygpt_net.provider.vector_stores.qdrant import QdrantProvider\nfrom pygpt_net.provider.vector_stores.redis import RedisProvider\nfrom pygpt_net.provider.vector_stores.simple import SimpleProvider\n\ndef run(**kwargs):\n    # ...\n    # register base vector store providers (llama-index)\n    launcher.add_vector_store(ChromaProvider())\n    launcher.add_vector_store(ElasticsearchProvider())\n    launcher.add_vector_store(PinecodeProvider())\n    launcher.add_vector_store(QdrantProvider())\n    launcher.add_vector_store(RedisProvider())\n    launcher.add_vector_store(SimpleProvider())\n\n    # register custom vector store providers (llama-index)\n    vector_stores = kwargs.get('vector_stores', None)\n    if isinstance(vector_stores, list):\n        for store in vector_stores:\n            launcher.add_vector_store(store)\n\n    # ...\n```\n\nTo register your custom vector store provider just register it by passing provider instance in `vector_stores` keyword argument:\n\n```python\n\n# custom_launcher.py\n\nfrom pygpt_net.app import run\nfrom plugins import CustomPlugin, OtherCustomPlugin\nfrom llms import CustomLLM\nfrom vector_stores import CustomVectorStore\n\nplugins = [\n    CustomPlugin(),\n    OtherCustomPlugin(),\n]\nllms = [\n    CustomLLM(),\n]\nvector_stores = [\n    CustomVectorStore(),  # \u003C--- custom vector store provider\n]\n\nrun(\n    plugins=plugins,\n    llms=llms,\n    vector_stores=vector_stores,\n)\n```\n\nThe vector store provider must be an instance of `pygpt_net.provider.vector_stores.base.BaseStore`. \nYou can review the code of the built-in providers in `pygpt_net.provider.vector_stores` and use them as examples when creating a custom provider.\n\n### Adding a custom data loader\n\n\n```python\n\n# custom_launcher.py\n\nfrom pygpt_net.app import run\nfrom plugins import CustomPlugin, OtherCustomPlugin\nfrom llms import CustomLLM\nfrom vector_stores import CustomVectorStore\nfrom loaders import CustomLoader\n\nplugins = [\n    CustomPlugin(),\n    OtherCustomPlugin(),\n]\nllms = [\n    CustomLLM(),\n]\nvector_stores = [\n    CustomVectorStore(),\n]\nloaders = [\n    CustomLoader(),  # \u003C---- custom data loader\n]\n\nrun(\n    plugins=plugins,\n    llms=llms,\n    vector_stores=vector_stores,  # \u003C--- list with custom vector store providers\n    loaders=loaders  # \u003C--- list with custom data loaders\n)\n```\n\nThe data loader must be an instance of `pygpt_net.provider.loaders.base.BaseLoader`. \nYou can review the code of the built-in loaders in `pygpt_net.provider.loaders` and use them as examples when creating a custom loader.\n\n\n# DISCLAIMER\n\nThis application is not officially associated with OpenAI. The author shall not be held liable for any damages \nresulting from the use of this application. It is provided \"as is,\" without any form of warranty. \nUsers are reminded to be mindful of token usage - always verify the number of tokens utilized by the model on \nthe API website and engage with the application responsibly. Activating plugins, such as Web Search,\nmay consume additional tokens that are not displayed in the main window. \n\n**Always monitor your actual token usage on the OpenAI, Google, Anthropic, xAI, etc. websites.**\n\n---\n\n# CHANGELOG\n\n## Recent changes:\n\n**2.7.12 (2026-02-06)**\n\n- xAI SDK upgraded to v1.6.1.\n- Added video generation support in xAI provider.\n- Added new models: grok-imagine-image and grok-imagine-video.\n- UI improvements and smoother navigation between tabs and columns.  \n- Optimized adding attachments. Added options to attach an attachment only once (if the API stores messages on its side), and an option to force attaching an attachment only once.  \n- Updated libraries.\n\n**2.7.11 (2026-02-05)**\n\n- Improved handling focus between columns.\n- Fixed models debugger update.\n\n**2.7.10 (2026-02-03)**\n\n- Fixed an issue where an avatar could be overwritten when creating a new preset.  \n- Fixed an issue where a new context was not created when opening a new tab in the second column.  \n- Added prompt history navigation to the input field (Ctrl + Up\u002FDown arrow keys).  \n- Added initial image centering when loading the Image Viewer.  \n- Added a Mark\u002FUnmark feature to the Notepad widget.  \n- Added 18 new languages: Arabic (ar), Bulgarian (bg), Czech (cs), Danish (da), Finnish (fi), Hebrew (he), Hindi (hi), Hungarian (hu), Japanese (ja), Korean (ko), Dutch (nl), Norwegian (no), Portuguese (pt), Romanian (ro), Russian (ru), Slovak (sk), Swedish (sv), Turkish (tr).\n\n**2.7.9 (2026-01-08)**\n\n- Improved realtime audio mode.\n- Added xAI provider and Grok support in realtime audio mode.\n\n**2.7.8 (2026-01-06)**\n\n- Added the xAI Collections remote tool and integrated collections management into the Remote Vector Stores tool.\n- Unified the Remote Vector Stores tool into a single tool for all providers.\n- Added xAI Grok audio input and output provider (realtime audio support for Grok coming soon!).\n- Enhanced the Image Viewer tool with a toolbox featuring Prev\u002FNext and more options.\n\n**2.7.7 (2026-01-05)**\n\n- Added support for Responses API in xAI.\n- Added xAI remote tools: Remote MCP, Code Execution.\n- Added Anthropic remote tools: Remote MCP, Web Fetch, Code Execution.\n\n**2.7.6 (2026-01-03)**\n\n- Fixed compatibility with xAI SDK and resolved empty responses from Grok models.\n- Fixed missing libraries in the Snap package.\n- Added zoom and grab functionality in the Image Viewer.\n- Added a zoom menu to textarea and web widgets.\n- Added the ability to close tabs with a middle mouse button click.\n\n# Credits and links\n\n**Official website:** \u003Chttps:\u002F\u002Fpygpt.net>\n\n**Documentation:** \u003Chttps:\u002F\u002Fpygpt.readthedocs.io>\n\n**Support and donate:** \u003Chttps:\u002F\u002Fpygpt.net\u002F#donate>\n\n**GitHub:** \u003Chttps:\u002F\u002Fgithub.com\u002Fszczyglis-dev\u002Fpy-gpt>\n\n**Discord:** \u003Chttps:\u002F\u002Fpygpt.net\u002Fdiscord>\n\n**Snap Store:** \u003Chttps:\u002F\u002Fsnapcraft.io\u002Fpygpt>\n\n**Microsoft Store:** \u003Chttps:\u002F\u002Fapps.microsoft.com\u002Fdetail\u002FXP99R4MX3X65VQ>\n\n**PyPI:** \u003Chttps:\u002F\u002Fpypi.org\u002Fproject\u002Fpygpt-net>\n\n**Author:** Marcin Szczygliński (Poland, EU)\n\n**Contact:** \u003Cinfo@pygpt.net>\n\n**License:** MIT License\n\n# Special thanks\n\nGitHub's community:\n\n- [@BillionShields](https:\u002F\u002Fgithub.com\u002FBillionShields)\n\n- [@gfsysa](https:\u002F\u002Fgithub.com\u002Fgfsysa)\n\n- [@glinkot](https:\u002F\u002Fgithub.com\u002Fglinkot)\n\n- [@kaneda2004](https:\u002F\u002Fgithub.com\u002Fkaneda2004)\n\n- [@KingOfTheCastle](https:\u002F\u002Fgithub.com\u002FKingOfTheCastle)\n\n- [@linnflux](https:\u002F\u002Fgithub.com\u002Flinnflux)\n\n- [@lukasz-pekala](https:\u002F\u002Fgithub.com\u002Flukasz-pekala)\n\n- [@moritz-t-w](https:\u002F\u002Fgithub.com\u002Fmoritz-t-w)\n\n- [@oleksii-honchar](https:\u002F\u002Fgithub.com\u002Foleksii-honchar)\n\n- [@yf007](https:\u002F\u002Fgithub.com\u002Fyf007)\n\n## Third-party libraries\n\nFull list of external libraries used in this project is located in the [requirements.txt](https:\u002F\u002Fgithub.com\u002Fszczyglis-dev\u002Fpy-gpt\u002Fblob\u002Fmaster\u002Frequirements.txt) file in the main folder of the repository.\n\nAll used SVG icons are from `Material Design Icons` provided by Google:\n\nhttps:\u002F\u002Fgithub.com\u002Fgoogle\u002Fmaterial-design-icons\n\nhttps:\u002F\u002Ffonts.google.com\u002Ficons\n\nMonaspace fonts provided by GitHub: https:\u002F\u002Fgithub.com\u002Fgithubnext\u002Fmonaspace\n\nCode of the LlamaIndex offline loaders integrated into app is taken from LlamaHub: https:\u002F\u002Fllamahub.ai\n\nAwesome ChatGPT Prompts (used in templates): https:\u002F\u002Fgithub.com\u002Ff\u002Fawesome-chatgpt-prompts\u002F\n\nCode syntax highlight powered by: https:\u002F\u002Fhighlightjs.org\n\nMarkdown parsing powered by: https:\u002F\u002Fgithub.com\u002Fmarkdown-it\u002Fmarkdown-it\n\nLaTeX support by: https:\u002F\u002Fkatex.org\n\nPlaywright: https:\u002F\u002Fplaywright.dev\u002F\n","# PyGPT - 桌面AI助手\n\n[![pygpt](https:\u002F\u002Fsnapcraft.io\u002Fpygpt\u002Fbadge.svg)](https:\u002F\u002Fsnapcraft.io\u002Fpygpt)\n\n版本：**2.7.12** | 构建日期：**2026-02-06** | Python：**>=3.10, \u003C3.14**\n\n> 官方网站：https:\u002F\u002Fpygpt.net | 文档：https:\u002F\u002Fpygpt.readthedocs.io\n> \n> Discord：https:\u002F\u002Fpygpt.net\u002Fdiscord | Snap：https:\u002F\u002Fsnapcraft.io\u002Fpygpt | PyPI：https:\u002F\u002Fpypi.org\u002Fproject\u002Fpygpt-net\n> \n> 针对Linux（`zip`）和Windows 10\u002F11（`msi`）的64位编译版：https:\u002F\u002Fpygpt.net\u002F#download\n> \n> ❤️ 捐赠：https:\u002F\u002Fwww.buymeacoffee.com\u002Fszczyglis | https:\u002F\u002Fgithub.com\u002Fsponsors\u002Fszczyglis-dev\n\n## 概述\n\n**PyGPT** 是一款 **一体化** 的桌面AI助手，通过 `OpenAI API` 直接与 OpenAI 的语言模型进行交互，包括 `GPT-5`、`GPT-4`、`o1`、`o3` 等。此外，借助其他 SDK 和 `LlamaIndex`，该应用还支持其他 LLM，例如 HuggingFace 上的模型、通过 `Ollama` 提供的本地模型（如 `gpt-oss`、`Llama 3`、`Mistral`、`DeepSeek V3\u002FR1` 或 `Bielik`），以及 `Google Gemini`、`Anthropic Claude`、`Perplexity \u002F Sonar` 和 `xAI Grok` 等。\n\n此助手提供多种操作模式，如聊天、助理、代理、文本补全，以及图像生成和分析等图像相关任务。**PyGPT** 具有文件系统功能，可进行文件读写，生成并运行Python代码，执行系统命令、自定义命令，并管理文件传输。它还允许模型通过 `DuckDuckGo`、`Google` 和 `Microsoft Bing` 进行网络搜索。\n\n在音频交互方面，**PyGPT** 包括使用 `Microsoft Azure`、`Google`、`Eleven Labs` 和 `OpenAI` 文本转语音服务的语音合成功能。此外，它还具备由 `OpenAI Whisper`、`Google` 和 `Bing` 提供的语音识别能力，使应用程序能够理解语音命令并将音频输入转录为文本。它具有上下文记忆功能，支持保存和加载，使用户可以从对话中预设的点继续交互。通过直观的预设系统，提示的创建和管理变得更加便捷。\n\n**PyGPT** 的功能可通过插件扩展，允许进行自定义增强（内置多个插件）。其多模态能力使其成为适用于多种AI辅助操作的灵活工具，例如基于文本的交互、系统自动化、日常协助、视觉应用、自然语言处理、代码生成和图像创作。\n\n包含多种操作模式，如聊天、文本补全、助理、代理、视觉、文件聊天（通过 `LlamaIndex`）、命令执行、外部API调用和图像生成，使 **PyGPT** 成为许多AI驱动任务的多功能工具。\n\n**展示视频**（mp4，版本 `2.5.65`，构建日期 `2025-07-24`）：\n\nhttps:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Fassets\u002Fd8305109-8b1b-41cb-b3ba-8c654271a95c\n\n**截图**（版本 `2.5.64`，构建日期 `2025-07-23`）：\n\n深色主题：\n![v2_main](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_af3a500512c8.png)\n\n浅色主题：\n![v2_light](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_6924a06d1216.png)\n\n您可以在以下链接下载适用于Windows和Linux的64位编译版：https:\u002F\u002Fpygpt.net\u002F#download\n\n## 特性\n\n- 桌面AI助手，适用于 `Linux`、`Windows` 和 `Mac`，使用Python编写。\n- 功能类似于 `ChatGPT`，但运行于本地桌面计算机上。\n- 11种操作模式：聊天、文件聊天、实时+音频、研究（Perplexity）、补全、图像和视频生成、助理、专家、计算机使用、代理和自主模式。\n- 支持多种模型，如 `OpenAI GPT-5`、`GPT-4`、`o1`、`o3`、`o4`、`Google Gemini`、`Anthropic Claude`、`xAI Grok`、`DeepSeek V3\u002FR1`、`Perplexity \u002F Sonar`，以及任何可通过 `LlamaIndex` 和 `Ollama` 访问的模型，如 `DeepSeek`、`gpt-oss`、`Llama 3`、`Mistral`、`Bielik` 等。\n- 与自有文件聊天：集成 `LlamaIndex` 支持，可与以下数据类型进行聊天：`txt`、`pdf`、`csv`、`html`、`md`、`docx`、`json`、`epub`、`xlsx`、`xml`、网页、`Google`、`GitHub`、视频\u002F音频、图片及其他数据类型；或使用对话历史作为提供给模型的额外上下文。\n- 内置向量数据库支持及自动化的文件和数据嵌入。\n- 通过 `DALL-E`、`gpt-image`、`Imagen`、`Gemini` 和 `Nano Banana` 等模型进行图像生成。\n- 通过 `Veo3` 和 `Sora2` 等模型进行视频生成。\n- 通过 `DuckDuckGo`、`Google` 和 `Microsoft Bing` 实现互联网访问。\n- 使用 `Microsoft Azure`、`Google`、`Eleven Labs` 和 `OpenAI` 文本转语音服务进行语音合成。\n- 使用 `OpenAI Whisper`、`Google` 和 `Microsoft 语音识别` 进行语音识别。\n- 插件支持，内置插件如 `文件I\u002FO`、`代码解释器`、`网络搜索`、`Google`、`Facebook`、`X\u002FTwitter`、`Slack`、`Telegram`、`GitHub`、`MCP` 等。\n- 支持MCP。\n- 在视觉模式下实时捕捉视频摄像头画面。\n- 通过视觉模型进行图像分析。\n- 内置针对残障人士的支持功能：可自定义的键盘快捷键、语音控制，以及通过语音合成将屏幕上的操作转换为音频。\n- 处理并存储完整的对话上下文（短期和长期记忆）。\n- 集成日历、每日笔记，并可根据选定日期在上下文中进行搜索。\n- 执行工具和命令（通过插件：访问本地文件系统、Python代码解释器、执行系统命令等）。\n- 自定义命令的创建和执行。\n- 内置Crontab\u002F任务调度器。\n- 内置实时Python代码解释器。\n- 管理文件和附件，提供上传、下载和整理选项。\n- 对话历史具备回退到先前上下文的功能（长期记忆）。\n- 可轻松管理提示，提供便捷可编辑的预设。\n- 操作和界面直观易用。\n- 内置记事本。\n- 内置简单的绘图工具。\n- 内置基于节点的代理构建器。\n- 支持多种语言。\n- 无需事先了解如何使用AI模型。\n- 完全可配置。\n- 主题支持。\n- 实时代码语法高亮显示。\n- 内置令牌使用量计算。\n- 具备支持未来OpenAI模型的潜力。\n- **开源**；源代码可在 `GitHub` 上获取。\n- 使用用户自己的API密钥。\n- 以及更多功能。\n\n该应用程序是免费的、开源的，可在配备 `Linux`、`Windows 10`、`Windows 11` 和 `Mac` 的个人电脑上运行。\n完整的Python源代码可在 `GitHub` 上获取。\n\n**PyGPT使用用户的API密钥——要使用GPT模型，您必须拥有注册的OpenAI账户和自己的API密钥。本地模型则不需要任何API密钥。**\n\n您还可以使用内置的LlamaIndex支持连接到其他大型语言模型（LLMs），例如HuggingFace上的模型。这可能需要额外的API密钥。\n\n# 安装\n\n## 二进制文件（Linux、Windows 10 和 11）\n\n您可以下载适用于 `Linux` 和 `Windows`（10\u002F11）的编译好的二进制版本。\n\n**PyGPT** 的二进制文件需要在 Windows 10、11 或 Linux 系统上运行。只需从 https:\u002F\u002Fpygpt.net 下载页面下载适合您系统的安装程序或压缩包，解压或安装后即可运行应用程序。目前尚无适用于 Mac 的二进制版本，因此在 Mac 上您必须通过 PyPi 或源代码来运行 PyGPT。当前仅提供 64 位二进制文件。\n\nLinux 版本要求 `GLIBC` >= `2.35`。\n\n## Microsoft Store（Windows）\n\n对于 Windows 10\u002F11，您可以直接从 Microsoft Store 安装 **PyGPT**：\n\n[![从 Microsoft Store 获取](https:\u002F\u002Fget.microsoft.com\u002Fimages\u002Fen-us%20dark.svg)](https:\u002F\u002Fapps.microsoft.com\u002Fdetail\u002FXP99R4MX3X65VQ)\n\nMicrosoft Store 链接：https:\u002F\u002Fapps.microsoft.com\u002Fdetail\u002FXP99R4MX3X65VQ\n\n## AppImage（Linux）\n\n您可以从发布页面下载最新的 Linux 版 **PyGPT** `AppImage`：\n\n**发布页面：** https:\u002F\u002Fgithub.com\u002Fszczyglis-dev\u002Fpy-gpt\u002Freleases\n\n**提示：** 请确保为下载的文件赋予执行权限：\n\n```chmod +x .\u002FPyGPT-X.X.X-x86_64.AppImage```\n\n为了管理未来的更新，您可以使用 `AppImageUpdate` 工具：\n\n可以从以下链接下载：https:\u002F\u002Fgithub.com\u002FAppImage\u002FAppImageUpdate\u002Freleases\n\n下载后，在终端中运行以下命令：\n\n```appimageupdatetool .\u002FPyGPT-X.X.X-x86_64.AppImage```\n\n## Snap Store（Linux）\n\n您也可以直接从 Snap Store 安装 **PyGPT**：\n\n```commandline\nsudo snap install pygpt\n```\n\n要管理未来的更新，请使用：\n\n```commandline\nsudo snap refresh pygpt\n```\n\n[![从 Snap Store 获取](https:\u002F\u002Fsnapcraft.io\u002Fstatic\u002Fimages\u002Fbadges\u002Fen\u002Fsnap-store-black.svg)](https:\u002F\u002Fsnapcraft.io\u002Fpygpt)\n\n**使用摄像头：** 在 Snap 版本中使用摄像头时，您需要通过以下命令连接摄像头：\n\n```commandline\nsudo snap connect pygpt:camera\n```\n\n**使用麦克风：** 在 Snap 版本中使用麦克风时，您需要通过以下命令连接麦克风：\n\n```commandline\nsudo snap connect pygpt:audio-record :audio-record\nsudo snap connect pygpt:alsa\n```\n\n**使用音频输出：** 在 Snap 版本中使用音频输出时，您需要通过以下命令连接音频设备：\n\n```commandline\nsudo snap connect pygpt:audio-playback\nsudo snap connect pygpt:alsa\n```\n\n**在 Snap 版本中连接 Docker 中的 IPython：**\n\n要在 Snap 版本中使用 IPython，您需要将 PyGPT 连接到 Docker 守护进程：\n\n```commandline\nsudo snap connect pygpt:docker-executables docker:docker-executables\n```\n\n```commandline\nsudo snap connect pygpt:docker docker:docker-daemon\n```\n\n## PyPi（pip）\n\n该应用程序也可以通过 `PyPi` 使用 `pip install` 进行安装：\n\n1. 创建虚拟环境：\n\n```commandline\npython3 -m venv venv\nsource venv\u002Fbin\u002Factivate\n```\n\n2. 从 PyPi 安装：\n\n``` commandline\npip install pygpt-net\n```\n\n3. 安装完成后，运行以下命令启动应用程序：\n\n``` commandline\npygpt\n```\n\n## 从 GitHub 源代码运行\n\n另一种方法是从 `GitHub` 下载源代码，并使用 Python 解释器（>=3.10，\u003C3.14）运行应用程序。\n\n### 使用 pip 安装\n\n1. 克隆 Git 仓库或下载 .zip 文件：\n\n```commandline\ngit clone https:\u002F\u002Fgithub.com\u002Fszczyglis-dev\u002Fpy-gpt.git\ncd py-gpt\n```\n\n2. 创建一个新的虚拟环境：\n\n```commandline\npython3 -m venv venv\nsource venv\u002Fbin\u002Factivate\n```\n\n3. 安装依赖项：\n\n```commandline\npip install -r requirements.txt\n```\n\n4. 运行应用程序：\n\n```commandline\npython3 run.py\n```\n\n### 使用 Poetry 安装\n\n1. 克隆 Git 仓库或下载 .zip 文件：\n\n```commandline\ngit clone https:\u002F\u002Fgithub.com\u002Fszczyglis-dev\u002Fpy-gpt.git\ncd py-gpt\n```\n\n2. 安装 Poetry（如果尚未安装）：\n\n```commandline\npip install poetry\n```\n\n3. 创建一个使用 Python 3.10 的新虚拟环境：\n\n```commandline\npoetry env use python3.10\npoetry shell\n```\n\n或者（Poetry ≥ 2.0）：\n\n```commandline\npoetry env use python3.10\npoetry env activate\n```\n\n4. 安装依赖项：\n\n```commandline\npoetry install\n```\n\n5. 运行应用程序：\n\n```commandline\npoetry run python3 run.py\n```\n\n**提示：** 您可以使用 `PyInstaller` 为您的系统创建编译后的应用程序版本（要求版本 ≥ `6.0.0`）。\n\n### 故障排除\n\n如果您在 Linux 上使用较新版本的 PySide 时遇到 `xcb` 插件问题，例如以下错误：\n\n```commandline\nqt.qpa.plugin: 无法加载 Qt 平台插件 \"xcb\"，尽管它已被找到。\n此应用程序无法启动，因为无法初始化任何 Qt 平台插件。\n重新安装应用程序可能会解决此问题。\n```\n\n此时，请安装 `libxcb`：\n\n```commandline\nsudo apt install libxcb-cursor0\n```\n\n如果您在 Linux 上遇到音频问题，请尝试安装 `portaudio19-dev` 和\u002F或 `libasound2`：\n\n```commandline\nsudo apt install portaudio19-dev\n```\n\n```commandline\nsudo apt install libasound2\nsudo apt install libasound2-data \nsudo apt install libasound2-plugins\n```\n\n**Linux 上 GLIBC 版本问题**\n\n如果您在尝试运行 Linux 编译版本时遇到以下错误：\n\n```commandline\n加载 Python 库 libpython3.10.so.1.0 出错：dlopen: \u002Flib\u002Fx86_64-linux-gnu\u002Flibm.so.6: 找不到 GLIBC_2.35 版本（libpython3.10.so.1.0 需要该版本）\n```\n\n请尝试将 GLIBC 更新至 `2.35` 版本，或使用至少包含 `2.35` 版本 GLIBC 的较新操作系统。\n\n**Snap 版本中访问摄像头：**\n\n```commandline\nsudo snap connect pygpt:camera\n```\n\n**Snap 版本中访问麦克风：**\n\n要在 Snap 版本中使用麦克风，您需要通过以下命令连接麦克风：\n\n```commandline\nsudo snap connect pygpt:audio-record :audio-record\n```\n\n# \u002Fvar\u002Flib\u002Fsnapd\u002Fapparmor\u002Fprofiles\u002Fsnap.pygpt.pygpt\n\n...\n\n\u002Fetc\u002Fhttpd\u002Fconf\u002Fmime.types r\n```\n\n并重新加载配置文件。\n\n或者，您也可以尝试移除并重新安装 snap：\n\n`sudo snap remove --purge pygpt`\n\n`sudo snap install pygpt`\n\n\n**Windows 版本中对麦克风和音频的访问权限：**\n\n如果您在 Windows 上使用非二进制 PIP\u002FPython 版本时遇到音频或麦克风问题，请检查是否已安装 FFmpeg。如果没有安装，请先安装 FFmpeg，并将其添加到系统 PATH 中。您可以在此处找到相关教程：https:\u002F\u002Fphoenixnap.com\u002Fkb\u002Fffmpeg-windows。而二进制版本已经内置了 FFmpeg。\n\n**Windows 与 VC++ 可再发行组件**\n\n在 Windows 系统上，程序正常运行需要安装 `VC++ 可再发行组件`，该组件可在 Microsoft 官网下载：\n\nhttps:\u002F\u002Flearn.microsoft.com\u002Fen-us\u002Fcpp\u002Fwindows\u002Flatest-supported-vc-redist\n\n`PySide6` 使用的就是这一环境中的库——它是 PyGPT 的基础依赖之一。如果缺少这些库，可能会导致界面显示异常，甚至使应用程序无法启动。\n\n此外，您可能还需要将路径 `C:\\path\\to\\venv\\Lib\\python3.x\\site-packages\\PySide6` 添加到系统的 `PATH` 环境变量中。\n\n**WebEngine\u002FChromium 渲染器及 OpenGL 相关问题**\n\n如果您在使用 `WebEngine \u002F Chromium` 渲染器时遇到问题，可以通过命令行参数强制启用旧版模式来解决：\n\n``` ini\npython3 run.py --legacy=1\n```\n\n同时，若要强制禁用 OpenGL 硬件加速，可以使用以下命令：\n\n``` ini\npython3 run.py --disable-gpu=1\n```\n\n您也可以手动编辑配置文件以启用旧版模式：打开 `%WORKDIR%\u002Fconfig.json` 文件，将以下选项设置为：\n\n``` json\n\"render.engine\": \"legacy\",\n\"render.open_gl\": false,\n```\n\n## 其他要求\n\n运行本程序需要互联网连接（用于 API 通信）、一个已注册的 OpenAI 账户，以及有效的 API 密钥，这些信息必须输入到程序中。而本地模型，例如 `Llama3`，则无需 OpenAI 账户或任何 API 密钥。\n\n## 调试与日志记录\n\n请参阅“调试与日志记录”章节，了解如何更详细地记录日志并诊断问题的方法。\n\n\n# 快速入门\n\n## 配置 API 密钥\n\n您可以为多个不同的服务提供商配置 API 密钥，例如 OpenAI、Anthropic、Google、xAI、Perplexity、OpenRouter 等。这种灵活性使您能够根据需求选择不同的服务提供商。\n\n首次设置时，请在应用程序内配置您的 API 密钥。\n\n操作步骤如下：进入菜单：\n\n`配置 -> 设置 -> API 密钥`\n\n在这里，您可以添加或管理任何受支持的服务提供商的 API 密钥。\n\n![v2_api_keys](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_bf85d02016ea.png)\n\n**配置服务提供商：**\n\n1. **选择服务提供商：** 切换到相应提供商的标签页。\n2. **输入 API 密钥：** 将所选服务提供商的 API 密钥粘贴进去。\n\n**示例：**\n\n- **OpenAI：** 您可以在 OpenAI 官网上注册并获取 API 密钥：https:\u002F\u002Fplatform.openai.com，然后前往 https:\u002F\u002Fplatform.openai.com\u002Faccount\u002Fapi-keys。\n- **Anthropic、Google 等：** 请按照各自平台的指引操作。\n\n**注意：** 您能否使用特定模型或服务，取决于您在相应服务提供商处的权限级别。如果您希望使用自定义 API 端点或不需要 API 密钥的本地 API，只需在 API 密钥字段中输入任意值，即可跳过密钥为空的提示。\n\n# 工作模式\n\n## 对话模式\n\n**+ 内联视觉与图像生成**\n\n在 **PyGPT** 中，此模式模拟了 `ChatGPT`，允许您与 `GPT-5`、`GPT-4`、`o1`、`o3`、`Claude`、`Gemini`、`Grok`、`Perplexity (Sonar)`、`Deepseek` 等多种模型进行对话。它通过 OpenAI SDK 的 `Responses API` 和 `ChatCompletions API` 进行工作；如果启用了原生 SDK，则也可使用 Google GenAI、Anthropic 或 xAI 的 SDK。您可以在 `配置 -> 设置 -> API 密钥` 中设置 `ChatCompletions` 的端点。\n\n**提示：** 此模式直接使用服务提供商的 SDK。如果应用中未内置原生客户端，像 Sonar 或 Llama3 这样的模型仍可通过 LlamaIndex 或兼容 OpenAI 的 API 端点在对话模式下使用。当您使用非 OpenAI 模型时，程序会自动切换到这些端点。您可以在 `设置 -> API 密钥` 中启用或禁用每个服务提供商的原生 API SDK。如果禁用原生 SDK，程序将通过兼容的 ChatCompletions API 端点调用 OpenAI SDK。\n\n目前内置的原生客户端包括：\n\n- Anthropic SDK\n- OpenAI SDK\n- Google GenAI SDK\n- xAI SDK\n\n界面的主要部分是一个聊天窗口，显示您的对话记录。下方是用于输入消息的文本框。右侧可以设置或更改使用的模型和系统提示词。您还可以将这些设置保存为预设，以便快速在不同模型或任务之间切换。\n\n在输入消息的地方上方，界面会实时显示您当前消息将消耗的 token 数量，帮助您监控用量。此外，此处还提供附件上传功能。有关附件使用的更多信息，请参阅“文件与附件”章节。\n\n![v2_mode_chat](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_970013e68074.png)\n\n**视觉功能：** 如果您想发送磁盘上的照片或相机拍摄的图片进行分析，而所选模型不支持视觉功能，则必须在插件菜单中启用“视觉（内联）”插件。该插件允许您在任何对话模式下发送照片或相机图像进行分析。\n\n![v3_vision_plugins](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_057ad314f863.png)\n\n借助此插件，您可以使用相机拍摄一张图片，或将现有图片附加后发送，以讨论该照片的内容：\n\n![v3_vision_chat](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_aabca0bc4da3.png)\n\n**图像生成：** 如果您希望在对话中直接生成图像，必须在插件菜单中启用“图像生成（内联）”插件。该插件允许您在对话模式下生成图像：\n\n![v3_img_chat](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_b624079b3f47.png)\n\n## 文件聊天模式（LlamaIndex）\n\n此模式允许您通过对话与文档及整个上下文历史进行交互。它将 `LlamaIndex` 无缝集成到聊天界面中，使您可以立即查询已索引的文档。\n\n**提示：** 如果您不想调用工具或命令，请取消勾选 `+工具` 复选框。这将在使用本地模型时加快响应速度。您还可以在以下路径启用 ReAct 代理以进行工具调用：`设置 -> 索引 \u002F LlamaIndex -> 聊天 -> 在文件聊天模式中使用 ReAct 代理进行工具调用`。如果 ReAct 代理和 `+工具` 复选框同时启用，则流式模式将被禁用。\n\n**查询单个文件**\n\n您还可以使用 `Files I\u002FO` 插件中的 `query_file` 命令“即时”查询单个文件。只需针对该文件提出问题，即可查询任何文件。系统会为正在查询的文件在内存中创建一个临时索引，并从中返回答案。自版本 `2.1.9` 起，还提供了一个用于查询网页及外部内容的类似命令：`直接使用 LlamaIndex 查询网页内容`。\n\n**例如：**\n\n如果您有一个文件：`data\u002Fmy_cars.txt`，内容为 `我的车是红色的。`\n\n您可以询问：`查询 my_cars.txt 文件，我的车是什么颜色？`\n\n您将收到回复：`红色`。\n\n注意：此命令仅针对当前查询对文件进行索引，不会将其持久化到数据库中。若要将查询过的文件也存储到标准索引中，您必须在插件设置中启用 `自动索引读取的文件` 选项。请务必勾选 `+工具` 复选框，以允许使用插件中的工具和命令。\n\n**使用文件聊天模式**\n\n在此模式下，您查询的是存储在向量存储数据库中的整个索引。首先，您需要对希望用作额外上下文的文件进行索引（嵌入）。嵌入会将您的文本数据转换为向量。如果您不熟悉嵌入及其工作原理，请参阅本文：\n\nhttps:\u002F\u002Fstackoverflow.blog\u002F2023\u002F11\u002F09\u002Fan-intuitive-introduction-to-text-embeddings\u002F\n\n有关 OpenAI 官网上的可视化示例，请参见下图：\n\n![vectors](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_f7c7e5f55e3b.png)\n\n来源：https:\u002F\u002Fcdn.openai.com\u002Fnew-and-improved-embedding-model\u002Fdraft-20221214a\u002Fvectors-3.svg\n\n要索引您的文件，只需将它们复制或上传到 `data` 目录，并点击 `索引全部` 按钮启动索引（嵌入），或者右键单击某个文件并选择 `索引...`。此外，您还可以通过激活 `文件聊天模式（LlamaIndex，内联）` 插件，在任何聊天模式中利用已索引文件中的数据。\n\n![v2_idx1](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_4027d34ee466.png)\n\n文件完成索引（嵌入到向量存储）后，您就可以在聊天模式中使用其中的上下文：\n\n![v2_idx2](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_8d502c2a5c4e.png)\n\n内置文件加载器：\n\n**文件：**\n\n- CSV 文件（csv）\n- Epub 文件（epub）\n- Excel .xlsx 表格（xlsx）\n- HTML 文件（html、htm）\n- IPYNB 笔记本文件（ipynb）\n- 图像（视觉）（jpg、jpeg、png、gif、bmp、tiff、webp）\n- JSON 文件（json）\n- Markdown 文件（md）\n- PDF 文档（pdf）\n- 纯文本文件（txt）\n- 视频\u002F音频（mp4、avi、mov、mkv、webm、mp3、mpeg、mpga、m4a、wav）\n- Word .docx 文档（docx）\n- XML 文件（xml）\n\n**网页\u002F外部内容：**\n\n- Bitbucket\n- ChatGPT 检索插件\n- GitHub Issues\n- GitHub 仓库\n- Google 日历\n- Google 文档\n- Google 云端硬盘\n- Google Gmail\n- Google Keep\n- Google 表格\n- Microsoft OneDrive\n- RSS\n- SQL 数据库\n- 站点地图（XML）\n- Twitter\u002FX 帖子\n- 网页（抓取任意网页内容）\n- YouTube（转录）\n\n您可以在 `设置 \u002F 索引 \u002F LlamaIndex \u002F 数据加载器` 中配置数据加载器，为指定的加载器提供关键字参数列表。您也可以开发并提供自己的自定义加载器，并在应用程序中注册。\n\nLlamaIndex 还与上下文数据库集成——您可以将数据库中的数据（您的上下文历史）作为讨论中的额外上下文。在 `设置 \u002F 索引 \u002F LlamaIndex` 部分，您可以选择索引现有上下文历史，或启用对新数据的实时索引（来自数据库）。\n\n**警告：** 请注意，索引内容时会调用嵌入模型的 API。每次索引都会消耗额外的令牌。请务必在提供商的页面上控制所使用的令牌数量。\n\n**提示：** 使用文件聊天模式时，您默认可以访问从 `\u002Fdata` 目录手动索引的文件。不过，您也可以通过附加文件来使用额外的上下文——这种来自附件的额外上下文不会进入主索引，而只存在于仅供本次对话使用的临时索引中。\n\n**令牌限制：** 当您在非查询模式下使用 `文件聊天模式` 时，LlamaIndex 会将额外的上下文添加到系统提示中。如果您同时使用插件（这也增加了系统提示中的指令），可能会超过允许的最大令牌数。如果出现“令牌使用过多”的警告，请关闭未使用的插件，或取消勾选 `+工具` 选项，以减少系统提示中使用的令牌数量。\n\n**可用的向量存储**（由 `LlamaIndex` 提供）：\n\n```\n- ChromaVectorStore\n- ElasticsearchStore\n- PinecodeVectorStore\n- QdrantVectorStore\n- RedisVectorStore\n- SimpleVectorStore\n```\n\n您可以在 `设置 -> LlamaIndex` 窗口中，通过提供如 `api_key` 等配置选项来配置选定的向量存储。请参阅“配置 \u002F 向量存储”部分以获取配置参考。\n\n\n**配置数据加载器**\n\n在 `设置 -> LlamaIndex -> 数据加载器` 部分，您可以定义要传递给数据加载器实例的附加关键字参数。请参阅“配置 \u002F 数据加载器”部分以获取配置参考。\n\n\n## 音频聊天模式\n\n此模式的工作方式与普通聊天模式相同，但原生支持使用 Realtime 和 Live API 进行音频输入和输出。在此模式下，音频输入和输出直接由模型处理，无需借助外部插件。这使得音频通信更快、效果更好。\n\n目前处于测试阶段。\n\n现阶段仅支持 OpenAI 实时模型（通过 Realtime API）和 Google Gemini 实时模型（通过 Live API）。\n\n## 研究模式\n\n此模式（使用 Sonar 和 R1 模型时）基于 Perplexity API 运行：https:\u002F\u002Fperplexity.ai。\n\n它允许进行深度网络搜索，并利用 Perplexity AI 中提供的 Sonar 模型。\n\n此模式需要一个 Perplexity API 密钥，可在 https:\u002F\u002Fperplexity.ai 上生成。\n\n自版本 `2.5.27` 起，此模式也支持 OpenAI 的深度研究模型。\n\n## 完成模式\n\n这是一种较旧的操作模式，允许用户以标准的文本补全方式进行工作。不过，它在文本处理上提供了更大的灵活性，使您可以按照自己喜欢的方式启动整个对话。\n\n与聊天模式类似，在界面右侧有便捷的预设选项。这些预设可以帮助您微调指令，并快速在不同的配置和预制提示模板之间切换。\n\n此外，该模式还提供为AI和用户添加标签的功能，从而可以模拟特定角色之间的对话——例如，您可以根据提示中预定义的内容，创建蝙蝠侠与小丑之间的对话。这一功能为设置各种有趣的对话场景提供了丰富的创意可能性，让交流更具吸引力和探索性。\n\n自版本 `2.0.107` 起，`davinci` 模型已被弃用，完成模式中已替换为 `gpt-3.5-turbo-instruct` 模型。\n\n## 图像和视频生成\n\n**PyGPT** 支持使用 `DALL-E 3`、`gpt-image-1`、`Imagen 3\u002F4` 和 `Nano Banana` 等模型快速简便地创建图像；同时也可以利用 `Veo3` 和 `Sora2` 进行视频生成。\n\n图像和视频的生成类似于一次聊天对话：用户输入提示后触发生成过程，随后可将生成的图像或视频下载、保存到本地，并在屏幕上显示。您可以在“图像生成”模式下直接向模型发送原始提示，也可以让模型为您推荐最佳提示。\n\n![v3_img](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_b54dd1d338f4.png)\n\n通过插件“图像生成（内联）”，所有模式下均可使用图像生成功能。只需在任何模式下，例如 GPT 或 Gemini，请求生成一张图像，模型就会直接在当前会话中完成，无需切换模式。\n\n若希望在聊天模式中直接生成图像，需在插件菜单中启用“图像生成（内联）”插件。\n\n该插件允许您在聊天模式中生成图像：\n\n![v3_img_chat](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_b624079b3f47.png)\n\n对于 OpenAI 模型，您还可以在“配置 -> 设置 -> 远程工具”中启用远程图像生成功能。启用后，在聊天模式下无需插件即可原生支持图像生成。\n\n要使用 `Imagen` 模型，必须在“配置 -> 设置 -> API 密钥 -> Google -> 高级选项”中启用 `VertexAI`。\n\n### 重混、编辑或扩展\n\n如果您想基于之前的图像或视频进行重混或扩展，而不是从头开始创作新内容，请在工具箱中勾选“重混\u002F扩展”选项。当前上下文中最后生成的图像或视频将作为您的提示参考，以便您请求对生成内容进行修改。如果启用了“重混\u002F扩展”选项，则上传图片附件作为参考将不会生效。\n\n### 原始模式\n\n提供了一种切换提示生成模式的选项。\n\n如果启用“原始模式”，模型将按您提供的内容原样接收提示。\n\n如果禁用“原始模式”，模型将根据您的指示自动生成最佳提示。\n\n### 图像存储\n\n生成图像后，您可以通过右键单击轻松将其保存到磁盘的任意位置。此外，您还可以选择删除图像，或在浏览器中以全尺寸查看。\n\n**提示**：请使用预设来保存您准备好的提示，这样以后可以快速再次使用它们来生成新图像。\n\n应用程序会保留所有提示的历史记录，方便您随时回顾任何会话，并重复使用之前的提示来创建新图像。\n\n图像将被存储在 **PyGPT** 用户数据文件夹中的 `img` 目录下。\n\n## 助手模式\n\n此模式使用 OpenAI 的 **助手 API**。\n\n该模式在基础聊天功能的基础上进行了扩展，增加了诸如用于执行代码的 `代码解释器`、用于访问文件的 `检索文件` 以及用于增强交互和与其他 API 或服务集成的自定义 `函数` 等外部工具。在此模式下，您可以轻松上传和下载文件。**PyGPT** 简化了文件管理流程，使您能够快速上传文档并管理由模型生成的文件。\n\n设置新助手非常简单——只需单击一下即可，助手会立即与 OpenAI API 同步。您之前在 OpenAI 上创建的助手也可以无缝导入到 **PyGPT** 中。\n\n![v2_mode_assistant](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_d1604fab5061.png)\n\n在助手模式下，您可以将文件存储在远程向量数据库中（每个助手对应一个），并通过应用轻松管理这些文件：\n\n![v2_mode_assistant_upload](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_e0f7574874a1.png)\n\n请注意，此模式下无法计算 token 使用量。尽管如此，仍支持文件（附件）上传。只需转到“文件”选项卡，即可轻松管理文件和附件，这些文件可以发送到 OpenAI API。\n\n### 向量数据库（通过助手 API）\n\n助手模式支持使用 OpenAI API 提供的外部向量数据库。此功能允许您将文件存储在数据库中，然后通过助手 API 对其进行搜索。每个助手可以关联一个向量数据库；如果已关联数据库，所有在此模式下上传的文件都将存储在该数据库中。如果助手未关联向量数据库，则会在上传文件时自动创建一个临时数据库，该数据库仅在当前线程中可用。临时数据库中的文件将在 7 天后自动删除。\n\n要启用向量数据库功能，请在助手设置中勾选“与文件聊天”复选框，这将激活助手 API 中的“文件搜索”工具。\n\n要管理外部向量数据库，可在助手创建和编辑窗口的向量数据库选择列表旁点击 DB 图标（见下图）。在该管理窗口中，您可以创建新的向量数据库、编辑现有数据库，或从 OpenAI 服务器导入所有现有数据库的列表：\n\n![v2_assistant_stores](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_1ab57abccf71.png)\n\n您可以通过设置“过期天数”来指定文件在数据库中自动保存的时长，超过该时长后文件将被删除（因为在 OpenAI 上存储文件会产生费用）。如果将值设置为 0，则文件不会自动删除。\n\n正在使用的向量数据库名称将显示在已上传文件列表中，位于右侧字段上：如果文件存储在数据库中，此处将显示数据库名称；否则，将显示该文件仅在线程内可用的信息：\n\n![v2_assistant_stores_upload](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_95cb619fe2a1.png)\n\n## 代理（LlamaIndex）  \n\n此模式允许使用 `LlamaIndex` 提供的代理功能。\n\n**内置代理（工作流）：**  \n- FunctionAgent  \n- ReAct  \n- 结构化规划器（子任务分解）  \n- CodeAct（与代码解释器插件连接）  \n- 监督者 + 工作者  \n\n**内置代理（旧版）：**  \n- OpenAI 助手  \n\n未来，内置代理列表将进一步扩展。\n\n您可以通过位于“工具 → 代理构建器”中的内置可视化节点式编辑器，创建自定义类型（工作流\u002F模式）。\n\n此外，您还可以通过创建一个继承自 `pygpt_net.provider.agents.base` 的新提供者来开发自己的代理。  \n\n**工具与插件**  \n\n在此模式下，所有已启用插件的命令均可使用（插件命令会实时自动转换为代理可用的工具）。  \n\n**RAG — 使用索引**  \n\n如果在代理预设中选择了某个索引，则系统会自动为该代理添加一个从索引读取数据的工具，从而实现 RAG 功能。  \n\n目前暂不支持多模态，仅支持文本输入。视觉支持将在未来加入。  \n\n**循环\u002F评估模式**  \n\n您可以将代理设置为自主运行模式，在循环中持续执行，并对当前输出进行评估。当您勾选“循环\u002F评估”复选框后，代理在给出最终响应后，将由另一个代理以百分比形式（0% 至 100%）对其质量进行评分。若得分低于您设定的期望值（可通过屏幕右下角的滑块调整，默认值为 75%），则会向该代理发送提示，要求其改进和优化响应内容。  \n\n将期望得分设为 0%，意味着每次代理生成结果时都会对其进行评估，并持续促使它自我改进。这样，您就可以让代理进入一个自主循环，直到它成功满足要求为止。  \n\n评估方式有两种可供选择：  \n- 按已完成任务的百分比  \n- 按最终响应的准确度（得分）  \n\n您还可以通过以下路径设置此类循环中的步骤上限：「设置 → 代理与专家 → LlamaIndex 代理 → 最大评估步骤」。默认值为 3，即代理最多尝试三次改进或修正其答案。若将上限设为零，则无限制，代理可无限期地在此模式下运行（请注意 Token 消耗！）。  \n\n用于评估响应的提示信息可在「设置 → 提示语 → 代理：循环中的评估提示」中进行修改。您可以根据实际需求调整提示内容，例如针对不同反馈强度的需求，定制更严格或更宽松的评价标准。\n\n## 代理（OpenAI）\n\n该模式基于集成到应用程序中的 `openai-agents` 库运行：\n\nhttps:\u002F\u002Fgithub.com\u002Fopenai\u002Fopenai-agents-python\n\n它允许运行适用于 OpenAI 模型以及与 OpenAI API 兼容的模型的代理。\n\n在该模式下，您可以使用“专家模式”预设中预先配置的专家——它们将以代理的形式启动（类型为 `openai_agents_experts`，支持启动一个主代理和若干子代理，并将查询适当地路由到相应的子代理）。\n\n**代理类型（工作流\u002F模式）：**\n\n- `带专家的代理` —— 使用附加的专家作为子代理\n- `带专家和反馈的代理` —— 使用附加的专家作为子代理，并结合循环中的反馈代理\n- `带反馈的代理` —— 单一代理配合循环中的反馈代理\n- `规划者` —— 规划代理，内部包含三个子代理：规划者、基础代理和反馈代理\n- `研究机器人` —— 研究员，内部包含三个子代理：规划者、搜索者和作为基础代理的写作者\n- `简单代理` —— 单一代理。\n- `进化` —— 在每一代（循环）中，从给定的父代代理中选择最佳响应；在下一代中，循环重复进行。\n- `B2B` —— 机器人之间的通信，涉及两个机器人相互交互，同时保留人类参与其中。\n- `主管 + 工人` —— 一个代理（主管）充当用户与另一个代理（工人）之间的桥梁。用户向主管提出请求，主管随后向工人发送指令，直到工人完成任务。\n\n您可以通过内置的可视化节点式编辑器创建自己的类型（工作流\u002F模式），该编辑器位于“工具 -> 代理构建器”中。\n\n此外，还添加了一些预定义的示例预设：\n\n- `编码员`\n- `专家代理`\n- `规划者`\n- `研究员`\n- `简单代理`\n- `带反馈的写作者`\n- `两个机器人`\n- `主管 + 工人`\n\n在“代理（OpenAI）”模式下，根据“配置 -> 设置 -> 远程工具”菜单中的配置，所有远程工具均可供基础代理使用。\n\n对于专家的远程工具，可以在预设配置中为每个专家单独选择。\n\n本地工具（来自插件）则根据已启用的插件，对代理和专家可用，与其他模式相同。\n\n在带有反馈和计划的代理中，可以为每个代理在预设配置中允许使用工具。这些代理还具有可在预设中配置的独立提示。\n\n**不同类型代理的工作方式说明：**\n\n以下是不同类型代理的工作模式。您可以根据这些模式，通过修改特定任务预设中的相应提示，为不同任务创建代理。\n\n**简单代理**\n- 代理完成其任务后即停止工作。\n\n**带反馈的代理**\n- 第一个代理回答问题。\n- 第二个代理（反馈代理）评估答案，必要时返回给第一个代理以强制纠正。\n- 循环重复，直到反馈代理对评估结果满意为止。\n\n**带专家的代理**\n- 代理自行完成分配的任务，或将其委派给最合适的专家（另一名代理）。\n\n**带专家和反馈的代理**\n- 第一个代理回答问题，或将任务委派给最合适的专家。\n- 第二个代理（反馈代理）评估答案，必要时返回给第一个代理以强制纠正。\n- 循环重复，直到反馈代理对评估结果满意为止。\n\n**研究机器人**\n- 第一个代理（规划者）准备用于搜索的短语列表。\n- 第二个代理（搜索者）根据这些短语查找信息并生成摘要。\n- 第三个代理（写作者）基于摘要撰写报告。\n\n**规划者**\n- 第一个代理（规划者）将任务分解为子任务，并将清单发送给第二个代理。\n- 第二个代理根据准备好的任务清单执行任务。\n- 第三个代理负责反馈，评估任务完成情况，必要时要求纠正，并将请求返回给第一个代理。循环重复进行。\n\n**进化**\n- 您可以选择每一代（迭代）中工作的代理数量（父代）。\n- 每个代理针对同一问题准备不同的答案。\n- 下一代代理（选择者）会从这一代中选出表现最佳的代理（产生最佳答案）。\n- 另一个代理（反馈代理）验证最佳答案并提出改进建议。\n- 改进最佳答案的请求会被发送给新的一对代理（新的父代）。\n- 在下一代中再次从这对新代理中选出最佳答案，循环重复进行。\n\n**B2B**\n- 人类提供讨论主题。\n- 机器人1生成回复并发送给机器人2。\n- 机器人2接收机器人1的回复作为输入，作出回应，并将回复再次发送回机器人1作为其输入。此循环不断重复。\n- 人类可随时中断循环并更新整个讨论内容。\n\n**主管 + 工人**\n- 人类向主管提出请求。\n- 主管为工人准备指令并发送给工人。\n- 工人完成任务并将结果返回给主管。\n- 如果任务已完成，主管将结果返回给用户；如果未完成，主管会再次向工人发送指令以完成任务，或询问用户是否有其他问题。\n- 循环重复，直到任务完成为止。\n\n**提示**：自版本 `2.5.97` 起，您可以在所有代理类型中分配和使用专家。\n\n**限制：**\n\n- 当为专家选择“计算机使用”工具，或选择“computer-use”模型时，该模型将无法使用其他任何工具。\n\n## 代理（自主）  \n\n这是代理模式的旧版本，目前仍作为遗留功能提供。不过，建议使用更新的模式：`代理（LlamaIndex）`。\n\n**警告：请谨慎使用此模式** - 自主模式与其他插件连接时，可能会产生意外结果！\n\n该模式会激活自主模式，AI 将与自身展开对话。  \n您可以设置此循环运行任意次数。在此过程中，模型将进行自我对话，回答自己的问题和评论，以寻找最佳解决方案，并对之前生成的步骤进行批判性评估。\n\n**警告：** 将运行步骤数（迭代次数）设置为 `0` 会激活无限循环，这可能导致大量请求并产生极高的 token 消耗，请谨慎使用！每次启动无限循环时，系统都会显示确认提示。\n\n此模式类似于 `Auto-GPT`，可用于生成更复杂的推理，并通过将问题分解为子任务来解决，模型会自主地逐一执行这些子任务，直到目标达成。\n\n您还可以为多个代理创建带有自定义指令的预设，整合各种工作流、指令和目标以实现特定任务。\n\n所有插件都适用于代理，因此您可以为代理启用文件访问、命令执行、网络搜索、图像生成、视觉分析等功能。将代理与插件结合使用，可以构建一个完全自主、自给自足的系统。当前启用的所有插件都会自动对代理可用。\n\n当启用 `自动停止` 选项时，代理会在达到目标后尝试停止。\n\n与 `自动停止` 相反，当启用 `始终继续...` 选项时，代理会使用“始终继续”的提示生成额外的推理，并在看似任务已完成的情况下自动进入下一步。\n\n**选项**\n\n代理本质上是一种**虚拟**模式，它会在内部按顺序执行所选的基础模式。  \n您可以在设置中选择代理应使用的内部模式：\n\n```设置 \u002F 代理（自主） \u002F 要使用的子模式```\n\n默认模式为：`聊天`。\n\n如果您希望在运行代理时使用 LlamaIndex 模式，还可以通过以下选项指定 LlamaIndex 应使用的索引：\n\n``` 设置 \u002F 代理与专家 \u002F 要使用的索引```\n\n![v2_agent_settings](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_a72474fd0f7c.png)\n\n\n## 专家（协作模式）\n\n专家模式允许您创建专家（使用预设），并在对话中咨询他们。在此模式下，会创建一个用于进行对话的主上下文。在该上下文中，模型可以向专家发出请求以执行任务，并将结果返回到主线程。当专家在后台被调用时，会为其创建一个独立的上下文及记忆空间。这意味着，在同一个主上下文生命周期内，每个专家都可以通过其独立且隔离的上下文访问自己的记忆。\n\n**简单来说——您可以将专家想象成一个在后台运行的独立模型实例，随时可以被调用来提供帮助，拥有自己的上下文和记忆，以及针对特定主题的专业指令。**\n\n专家之间不会共享上下文，它们之间的唯一联系点是主对话线程。在这个主线程中，模型充当专家的管理者，可以根据需要在专家之间交换数据。\n\n专家是根据预设中的名称来选择的；例如，将您的专家命名为：ID = python_expert，名称 = “Python 程序员”，这样就会创建一个模型在涉及 Python 编程相关事务时会尝试调用的专家。您也可以手动请求调用某个专家：\n\n```bash\n调用 Python 专家生成一些代码。\n```\n\n专家可以被启用或禁用——要启用或禁用专家，请使用右键菜单从预设列表中选择 `启用\u002F禁用` 选项。只有已启用的专家才能在对话中使用。\n\n专家也可以在 `代理（自主）` 模式中使用——只需使用预设创建一个新的代理即可。只需将相应的专家移至活动列表，即可自动使其可供代理使用。\n\n您还可以在“内联”模式下使用专家——通过启用 `专家（内联）` 插件即可。这使得专家可以在任何模式下使用，例如普通聊天模式。\n\n专家模式与代理模式一样，也是一种“虚拟”模式——您需要为其选择一个目标运行模式，这可以在设置中的 `设置 \u002F 代理（自主） \u002F 专家使用的子模式` 中完成。\n\n您也可以随时请求获取当前活跃的专家列表：\n\n```bash\n给我一份当前活跃的专家名单。\n```\n\n## 计算机使用\n\n此模式允许对计算机进行自主控制。\n\n在此模式下，模型会接管鼠标和键盘，并能够在用户的环境中进行导航。这里使用的是 `Computer use` 远程工具：https:\u002F\u002Fplatform.openai.com\u002Fdocs\u002Fguides\u002Ftools-computer-use，结合 `鼠标与键盘` 插件。\n\n**使用示例：**\n\n```点击开始菜单打开它，在列表中搜索记事本并运行。```\n\n您可以通过工具箱底部的列表更改导航模式运行的环境。\n\n**可用环境：**\n\n- 浏览器\n- Linux\n- Windows\n- Mac\n\n您还可以在沙盒环境中运行此模式（使用 `Playwright` - https:\u002F\u002Fplaywright.dev\u002F）——只需在工具箱中启用 `沙盒` 开关即可。您的系统上必须安装 Playwright 浏览器。为此，请运行：\n\n```bash\npip install playwright\nplaywright install \u003Cchromium|firefox|webkit>\n```\n\n之后，在 `鼠标与键盘` 插件设置中，将已安装浏览器的目录路径设置到 `沙盒（Playwright）\u002F 浏览器目录` 选项中。\n\n编译好的二进制版本和 Snap 版本在其软件包中已预装了 `chromium` 浏览器。\n\n\n**提示：** 请勿在计算机使用模式下启用 `鼠标与键盘` 插件——它已经“在后台”与计算机使用模式连接在一起了。\n\n\n# 上下文与记忆\n\n## 短期与长期记忆\n\n**PyGPT** 具有持续聊天模式，能够保持长时间的对话上下文。它会保存整个对话历史，并将其自动附加到您发送给 AI 的每一条新消息（提示）中。此外，您还可以随时回顾过去的对话。应用程序会记录您的聊天历史，使您能够从上次中断的地方继续讨论。\n\n## 处理多个上下文\n\n在应用程序界面的左侧，有一个面板显示已保存的对话列表。您可以保存多个上下文，并轻松地在它们之间切换。此功能使您能够随时返回并从之前的对话中的任意位置继续。**PyGPT** 会自动为每个上下文生成摘要，类似于 `ChatGPT` 的工作方式，并且允许您自行修改这些标题。\n\n![v2_context_list](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_bcdfb2157bb4.png)\n\n您可以通过以下设置选项在设置中禁用上下文支持：\n\n``` ini\n配置 -> 设置 -> 使用上下文 \n```\n\n## 清除历史记录\n\n您可以通过选择菜单选项来清除整个内存（所有上下文）：\n\n``` ini\n文件 -> 清除历史记录...\n```\n\n## 上下文存储\n\n在应用程序端，上下文存储在工作目录下的 `SQLite` 数据库中（`db.sqlite`）。此外，所有历史记录也会保存为 `.txt` 文件，以便于阅读。\n\n一旦对话开始，聊天的标题就会被生成并在左侧列表中显示。这一过程与 `ChatGPT` 类似，即总结对话的主题，并基于该总结创建线程标题。您可以随时更改线程名称。\n\n# 文件与附件\n\n## 上传附件\n\n**在对话中使用您自己的文件作为额外上下文**\n\n您可以在任何对话中使用自己的文件（例如进行分析）。这可以通过两种方式实现：一是将您的文件索引（嵌入）到向量数据库中，这样在“带文件的聊天”会话期间它们将始终可用；二是添加文件附件（该附件仅在上传它的那次对话中可用）。\n\n**附件**\n\n**PyGPT** 让用户可以轻松上传文件并将其发送给模型以执行分析等任务，类似于在 `ChatGPT` 中附加文件。在文本输入区域旁边有一个专门用于管理文件上传的“附件”选项卡。\n\n**提示：在群组中上传的附件在该群组的所有上下文中均可用。**\n\n![v2_file_input](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_125464c12bfd.png)\n\n您可以使用附件为对话提供额外上下文。上传的文件将通过 LlamaIndex 的加载器转换为文本，然后嵌入到向量存储中。您可以通过 LlamaIndex 支持的应用程序格式上传任何文件。支持的格式包括：\n\n文本类型：\n\n- CSV 文件 (csv)\n- Epub 文件 (epub)\n- Excel .xlsx 表格 (xlsx)\n- HTML 文件 (html, htm)\n- IPYNB 笔记本文件 (ipynb)\n- JSON 文件 (json)\n- Markdown 文件 (md)\n- PDF 文档 (pdf)\n- 纯文本文件 (txt 等)\n- Word .docx 文档 (docx)\n- XML 文件 (xml)\n\n媒体类型：\n\n- 图像（使用视觉模型）(jpg, jpeg, png, gif, bmp, tiff, webp)\n- 视频\u002F音频 (mp4, avi, mov, mkv, webm, mp3, mpeg, mpga, m4a, wav)\n\n压缩文件：\n\n- zip\n- tar、tar.gz、tar.bz2\n\n上传的附件内容将在当前对话中使用，并在整个对话过程中可用（按上下文划分）。处理附件提供的额外上下文有三种模式：\n\n- `完整上下文`：提供最佳效果。此模式会将读取文件的全部内容附加到用户的提示中。此过程在后台进行，如果您上传了大量内容，可能需要消耗大量的 token。\n  \n- `RAG`：索引后的附件将仅通过 LlamaIndex 进行实时查询。此操作不需要额外的 token，但可能无法 1:1 地访问文件的全部内容。\n\n- `摘要`：在查询时，系统会在后台生成一个额外的查询，并由单独的模型执行，以总结附件内容并将所需信息返回给主模型。您可以在“文件和附件”部分的设置中更改用于摘要的模型。\n\n在 `RAG` 和 `摘要` 模式下，您可以启用一个附加设置：`设置 -> 文件和附件 -> 在 RAG 查询中使用历史记录`。这有助于更好地准备 RAG 查询。当此选项开启时，系统会考虑整个对话上下文，而不仅仅是用户最后的查询。这样可以更有效地在索引中搜索额外的上下文。在 `RAG 限制` 选项中，您可以设置应考虑多少条最近的对话记录（`0 = 无限制，默认：3`）。\n\n**重要提示**：使用 `完整上下文` 模式时，文件的全部内容都会包含在提示中，每次使用都可能导致较高的 token 消耗。如果您希望减少 token 的使用量，建议改用 `RAG` 模式，它只会查询向量数据库中的索引附件以提供额外上下文。\n\n**图像作为额外上下文**\n\nJPG、PNG 等图像文件属于特殊情况。默认情况下，图像不会用作额外上下文，而是会使用视觉模型进行实时分析。如果您希望将它们用作额外上下文，则必须在设置中启用“允许图像作为额外上下文”选项：`文件和附件 -> 允许图像作为额外上下文`。\n\n**上传大文件与自动索引**\n\n要使用 RAG 模式，文件必须先被索引到向量数据库中。如果“附件”选项卡中的“上传时自动索引”选项已启用，此过程将在上传时自动完成。然而，对于大型文件来说，索引可能需要较长时间。因此，如果您使用的是不依赖索引的 `完整上下文` 模式，可以关闭“自动索引”选项，以加快附件的上传速度。在这种情况下，只有在首次调用 RAG 模式时才会对附件进行索引，而在那之前，附件将以 `完整上下文` 和 `摘要` 的形式可用。\n\n**嵌入**\n\n在使用 RAG 查询附件时，文档会被索引到一个临时的向量存储中。由于有多个提供商和模型可供选择，您可以在“配置 -> 设置 -> 文件和附件”中选择用于查询附件的模型。您还可以在“配置 -> 设置 -> 索引 \u002F LlamaIndex -> 嵌入 -> 默认嵌入模型”列表中为指定的提供商选择嵌入模型。默认情况下，使用 RAG 查询附件时，将使用与 RAG 查询模型相对应的默认嵌入模型和提供商。如果未为特定提供商提供默认配置，则将使用全局嵌入配置。\n\n例如，如果 RAG 查询模型是 `gpt-4o-mini`，则将使用 `OpenAI` 提供商的默认模型。如果 `OpenAI` 的默认模型未在列表中指定，则将使用全局提供商和模型。\n\n## 下载文件\n\n**PyGPT** 支持自动下载并保存由模型生成的文件。此过程在后台进行，文件会被保存到用户工作目录下的 `data` 文件夹中。用户可以通过导航至“Files”（文件）选项卡来查看或管理这些文件，该选项卡提供了一个针对此特定目录的文件浏览器。在这里，用户可以方便地处理所有由 AI 发送的文件。\n\n这个 `data` 目录也是应用程序存储由 AI 在本地生成的文件的地方，例如代码文件或其他从模型请求的数据。用户可以选择直接从存储的文件中执行代码并读取其内容，然后将结果反馈给 AI。这一无需手动干预的过程由内置插件系统和模型触发的命令来管理。此外，您还可以对该目录中的文件进行索引（使用集成的 `LlamaIndex`），并将这些内容作为额外的上下文提供给对话。\n\n`Files I\u002FO` 插件负责处理 `data` 目录中的文件操作，而 `Code Interpreter` 插件则允许执行这些文件中的代码。\n\n![v2_file_output](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_125464c12bfd.png)\n\n为了使模型能够管理文件或执行 Python 代码，必须启用 `+ Tools` 选项，并同时激活上述插件：\n\n![v2_code_execute](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_385abd0fa1a3.png)\n\n# 预设\n\n## 什么是预设？\n\n**PyGPT** 中的预设本质上是用于存储和快速应用不同配置的模板。每个预设包含您希望使用的模式设置（如聊天、完成或图像生成）、初始系统提示、分配给 AI 的名称、会话用户名，以及对话所需的“温度”参数。较高的“温度”设置会使 AI 提供更具创造性的回复，而较低的设置则会促使 AI 给出更可预测的回应。这些预设可以在各种模式下使用，并且适用于通过 `OpenAI API` 或 `LlamaIndex` 访问的模型。\n\n应用程序允许您根据需要创建任意数量的预设，并轻松地在它们之间切换。此外，您还可以克隆现有预设，这对于基于先前设置的配置创建变体并进行实验非常有用。\n\n![v2_preset](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_4b18215ffa14.png)\n\n## 示例用法\n\n应用程序包含多个示例预设，可以帮助您熟悉其使用机制。\n\n# 个人资料\n\n您可以为应用程序创建多个个人资料并在它们之间切换。每个个人资料都使用自己的配置、设置、上下文历史记录以及独立的用户文件夹。这使您能够设置不同的环境，并只需单击一下即可快速切换，从而更改整个配置。\n\n应用程序允许您创建新个人资料、编辑现有个人资料以及复制当前个人资料。\n\n要创建新个人资料，请从菜单中选择：`Config -> Profile -> New Profile...`\n\n要编辑已保存的个人资料，请从菜单中选择：`Config -> Profile -> Edit Profiles...`\n\n要切换到已创建的个人资料，请从菜单中选择：`Config -> Profile -> [个人资料名称]`\n\n每个个人资料都使用其自己的用户目录（workdir）。您可以将新创建或编辑后的个人资料与现有的 workdir 及其配置关联起来。\n\n当前活动个人资料的名称会显示在窗口标题中，格式为 (Profile Name)。\n\n# 模型\n\n## 内置模型\n\n截至2026年2月6日，PyGPT 已预配置了一系列模型：\n\n- `bielik-11b-v2.3-instruct:Q4_K_M`（Ollama）\n- `chatgpt-4o-latest`（OpenAI）\n- `claude-3-5-sonnet-20240620`（Anthropic）\n- `claude-3-7-sonnet`（Anthropic）\n- `claude-3-opus`（Anthropic）\n- `claude-3-opus`（Anthropic）\n- `claude-opus-4-0`（Anthropic）\n- `claude-sonnet-4-0`（Anthropic）\n- `claude-opus-4-5`（Anthropic）\n- `claude-sonnet-4-5`（Anthropic）\n- `codellama`（Ollama）\n- `codex-mini`（OpenAI）\n- `dall-e-2`（OpenAI）\n- `dall-e-3`（OpenAI）\n- `deep-research-pro-preview-12-2025`（Google）\n- `deepseek-chat`（DeepSeek）\n- `deepseek-r1:1.5b`（Ollama）\n- `deepseek-r1:14b`（Ollama）\n- `deepseek-r1:32b`（Ollama）\n- `deepseek-r1:7b`（Ollama）\n- `deepseek-reasoner`（DeepSeek）\n- `gemini-2.5-computer-use-preview-10-2025`（Google）\n- `gemini-1.5-flash`（Google）\n- `gemini-1.5-pro`（Google）\n- `gemini-2.0-flash-exp`（Google）\n- `gemini-2.5-flash`（Google）\n- `gemini-2.5-flash-preview-native-audio-dialog`（Google，实时）\n- `gemini-2.5-pro`（Google）\n- `gemini-3-flash-preview`（Google）\n- `gemini-3-pro-image-preview`（Google）\n- `gemini-3-pro-preview`（Google）\n- `gpt-3.5-turbo`（OpenAI）\n- `gpt-3.5-turbo-16k`（OpenAI）\n- `gpt-3.5-turbo-instruct`（OpenAI）\n- `gpt-4`（OpenAI）\n- `gpt-4-32k`（OpenAI）\n- `gpt-4-turbo`（OpenAI）\n- `gpt-4-vision-preview`（OpenAI）\n- `gpt-4.1`（OpenAI）\n- `gpt-4.1-mini`（OpenAI）\n- `gpt-4.1-nano`（OpenAI）\n- `gpt-4o`（OpenAI）\n- `gpt-4o-realtime-preview`（OpenAI，实时）\n- `gpt-4o-mini`（OpenAI）\n- `gpt-5`（OpenAI）\n- `gpt-5-mini`（OpenAI）\n- `gpt-5-nano`（OpenAI）\n- `gpt-5.2`（OpenAI）\n- `gpt-image-1`（OpenAI）\n- `gpt-image-1.5`（OpenAI）\n- `gpt-oss:20b`（OpenAI - 通过 Ollama 和 HuggingFace Router）\n- `gpt-oss:120b`（OpenAI - 通过 Ollama 和 HuggingFace Router）\n- `gpt-realtime`（OpenAI，实时）\n- `grok-2-vision`（xAI）\n- `grok-3`（xAI）\n- `grok-3-fast`（xAI）\n- `grok-3-mini`（xAI）\n- `grok-3-mini-fast`（xAI）\n- `grok-4`（xAI）\n- `grok-imagine-image`（xAI）\n- `grok-imagine-video`（xAI）\n- `llama2-uncensored`（Ollama）\n- `llama3.1`（Ollama）\n- `llama3.1:70b`（Ollama）\n- `mistral`（Ollama）\n- `mistral-large`（Ollama）\n- `mistral-small3.1`（Ollama）\n- `nano-banana-pro-preview`（Google）\n- `o1`（OpenAI）\n- `o1-mini`（OpenAI）\n- `o1-pro`（OpenAI）\n- `o3`（OpenAI）\n- `o3-deep-research`（OpenAI）\n- `o3-mini`（OpenAI）\n- `o3-pro`（OpenAI）\n- `o4-mini`（OpenAI）\n- `o4-mini-deep-research`（OpenAI）\n- `qwen2:7b`（Ollama）\n- `qwen2.5-coder:7b`（Ollama）\n- `qwen3:8b`（Ollama）\n- `qwen3:30b-a3b`（Ollama）\n- `r1`（Perplexity）\n- `sonar`（Perplexity）\n- `sonar-deep-research`（Perplexity）\n- `sonar-pro`（Perplexity）\n- `sonar-reasoning`（Perplexity）\n- `sonar-reasoning-pro`（Perplexity）\n- `sora-2`（OpenAI）\n- `veo-3.0-generate-preview`（Google）\n- `veo-3.0-fast-generate-preview`（Google）\n- `veo-3.1-generate-preview`（Google）\n- `veo-3.1-fast-generate-preview`（Google）\n\n所有模型均在配置文件 `models.json` 中指定，您可以对其进行自定义。该文件位于您的工作目录中。您可以在其中添加由 `OpenAI API`（或兼容接口）、`Google Gen AI API`、`Anthropic API`、`xAI API` 直接提供的新模型，以及受 `LlamaIndex` 或 `Ollama` 支持的模型。LlamaIndex 的配置则放置在 `llama_index` 键下。\n\n您可以通过手动编辑 `models.json` 文件，或使用菜单 `Config -> Models -> Import` 中的模型导入工具来导入新模型。\n\n**提示：** 列表中的模型按提供方而非制造商排序。同一制造商的模型可能通过不同的提供方获取（例如，OpenAI 模型可通过 `OpenAI API` 或 `OpenRouter` 提供）。如果您希望通过特定提供方使用某款模型，需在 `Config -> Models -> Edit` 中配置该提供方，或直接通过 `Config -> Models -> Import` 导入。\n\n**提示：** Anthropic 和 Deepseek API 提供方使用 VoyageAI 进行嵌入（用于“聊天与文件”及附件 RAG），因此若要使用这些提供方的嵌入功能，还需配置 Voyage API 密钥。\n\n## 添加自定义模型\n\n您也可以添加自己的模型。更多信息请参阅“扩展 PyGPT \u002F 添加新模型”部分。\n\n内置支持以下 LLM 提供方：\n\n- `Anthropic`\n- `Azure OpenAI`（原生 SDK）\n- `Deepseek API`\n- `Google`（原生 SDK）\n- `HuggingFace API`\n- `HuggingFace Router`（OpenAI 兼容 ChatCompletions 封装）\n- `本地模型`（OpenAI API 兼容）\n- `Mistral AI`\n- `Ollama`\n- `OpenAI`（原生 SDK）\n- `OpenRouter`\n- `Perplexity`\n- `xAI`（原生 SDK）\n\n## 如何使用本地或非 GPT 模型\n\n### Llama 3、Mistral、DeepSeek、Qwen、gpt-oss 等本地模型\n\n如何使用本地安装的 Llama 3、DeepSeek、Mistral 等模型：\n\n1) 选择工作模式：“聊天”或“聊天与文件”。\n\n2) 在模型列表中选择、编辑或添加新模型（使用 `ollama` 提供方）。您可以通过菜单 `Config -> Models -> Edit` 编辑模型设置，并在“高级”部分配置模型参数。\n\n3) 从这里下载并安装 Ollama：https:\u002F\u002Fgithub.com\u002Follama\u002Follama\n\n例如，在 Linux 上：\n\n```curl -fsSL https:\u002F\u002Follama.com\u002Finstall.sh | sh```\n\n4) 在本地机器上运行模型（例如 Llama 3）。以 Linux 为例：\n\n```ollama run llama3.1```\n\n5) 返回 PyGPT，在模型列表中选择正确的模型，即可通过本地运行的 Ollama 与所选模型进行对话。\n\n**可用示例模型**\n\n- `llama3.1`\n- `codellama`\n- `mistral`\n- `llama2-uncensored`\n- `deepseek-r1`\n\n等等。\n\n您还可以通过编辑模型列表添加更多模型。\n\n**实时导入器**\n\n您也可以使用 `Config -> Models -> Import...` 工具，从正在运行的 Ollama 实例中实时导入模型。\n\n**自定义 Ollama 端点**\n\nOllama 的默认端点为：http:\u002F\u002Flocalhost:11434\n\n您可以通过在 `Settings -> General -> Advanced -> Application environment` 中设置环境变量 `OLLAMA_API_BASE` 来全局更改此端点。\n\n此外，您还可以在特定模型的配置中更改其“base_url”：\n\n进入 `Config -> Models -> Edit`，然后在“高级 -> [LlamaIndex] ENV Vars”部分添加如下变量：\n\n名称：`OLLAMA_API_BASE`  \n值：`http:\u002F\u002Fmy_endpoint.com:11434`\n\n**Ollama 支持的所有模型列表**\n\nhttps:\u002F\u002Follama.com\u002Flibrary\n\nhttps:\u002F\u002Fgithub.com\u002Follama\u002Follama\n\n**重要提示：** 请务必在模型设置的 **kwargs 列表中正确指定模型名称。\n\n**使用本地嵌入**\n\n参考：https:\u002F\u002Fdocs.llamaindex.ai\u002Fen\u002Fstable\u002Fexamples\u002Fembeddings\u002Follama_embedding\u002F\n\n您可以使用 Ollama 实例进行嵌入。只需在以下路径中选择 `ollama` 提供方：\n\n```Config -> Settings -> Indexes \u002F LlamaIndex -> Embeddings -> 嵌入提供方```\n\n在嵌入提供方的 **kwargs 列表中定义诸如模型名称和 Ollama 基础 URL 等参数，例如：\n\n- 名称：`model_name`，值：`llama3.1`，类型：`str`\n\n- 名称：`base_url`，值：`http:\u002F\u002Flocalhost:11434`，类型：`str`\n\n### Google Gemini、Anthropic Claude、xAI Grok 等\n\n如果您希望在“文件聊天”和“智能体（LlamaIndex）”模式中使用非 OpenAI 模型，请务必在模型配置字段中设置所需的参数，例如 API 密钥。“聊天”模式通过 OpenAI SDK（兼容 API）运行，“文件聊天”和“智能体（LlamaIndex）”模式则通过 LlamaIndex 运行。\n\n**Google Gemini**\n\n所需环境变量：\n\n- GOOGLE_API_KEY = {api_key_google}\n\n所需关键字参数：\n\n- model\n\n**Anthropic Claude**\n\n所需环境变量：\n\n- ANTHROPIC_API_KEY = {api_key_anthropic}\n\n所需关键字参数：\n\n- model\n\n**xAI Grok**（仅限聊天模式）\n\n所需环境变量：\n\n- OPENAI_API_KEY = {api_key_xai}\n- OPENAI_API_BASE = {api_endpoint_xai}\n\n所需关键字参数：\n\n- model\n\n**Mistral AI**\n\n所需环境变量：\n\n- MISTRAL_API_KEY = {api_key_mistral}\n\n所需关键字参数：\n\n- model\n\n**Perplexity**\n\n所需环境变量：\n\n- PPLX_API_KEY = {api_key_perplexity}\n\n所需关键字参数：\n\n- model\n\n**HuggingFace API**\n\n所需环境变量：\n\n- HUGGING_FACE_TOKEN = {api_key_hugging_face}\n\n所需关键字参数：\n\n- model_name | model\n- token\n- provider = auto\n\n\n# 插件\n\n## 概述\n\n**PyGPT** 可以通过插件扩展功能，添加新特性。\n\n目前可用的插件如下，模型可立即使用：\n\n- `API 调用` - 该插件允许您通过自定义的 API 调用将模型连接到外部服务。\n  \n- `音频输入` - 提供语音识别功能。\n  \n- `音频输出` - 提供语音合成功能。\n  \n- `自主智能体（内联）` - 支持自主对话（AI 对 AI），管理循环并将输出反馈回输入。这是内联智能体模式。\n  \n- `Bitbucket` - 访问 Bitbucket API，管理代码仓库、问题和拉取请求。\n  \n- `文件聊天（LlamaIndex，内联）` - 该插件将 LlamaIndex 存储集成到任何聊天中，并从索引文件及数据库中的历史上下文中提供额外的知识。\n  \n- `代码解释器` - 负责生成并执行 Python 代码，功能类似于 ChatGPT 上的代码解释器，但为本地运行。这意味着模型可以与任何脚本、应用程序或代码进行交互。多个插件可以协同工作以完成一系列任务；例如，“文件”插件可以将生成的 Python 代码写入文件，然后由“代码解释器”执行并将其结果返回给模型。\n  \n- `上下文历史记录（日历，内联）` - 提供对上下文历史数据库的访问权限。\n  \n- `Crontab \u002F 任务调度器` - 该插件提供基于 cron 的作业调度功能——您可以使用 cron 语法安排任务\u002F提示，在指定时间发送。\n  \n- `自定义命令` - 允许您在系统上创建并执行自定义命令。\n  \n- `专家（内联）` - 在任何聊天模式下调用专家。这是内联专家模式。\n  \n- `Facebook` - 管理 Facebook 页面上的用户信息、主页、帖子和照片。\n  \n- `文件 I\u002FO` - 提供对本地文件系统的访问权限，使模型能够读写文件，以及列出和创建目录。\n  \n- `GitHub` - 访问 GitHub API，管理代码仓库、问题和拉取请求。\n  \n- `Google` - 访问 Gmail、Drive、Docs、Maps、Calendar、Contacts、Colab、YouTube、Keep 等，用于管理电子邮件、文件、日程、笔记、视频信息和联系人。\n  \n- `图像生成（内联）` - 将 DALL-E 3 图像生成功能集成到任何聊天和模式中。只需启用并在聊天模式下请求图像，使用 GPT-4 等标准模型即可。该插件无需启用“+ 工具”选项。\n  \n- `邮件客户端` - 提供发送、接收和阅读电子邮件的功能。\n  \n- `MCP` - 通过模型上下文协议（MCP）访问远程工具，包括 stdio、SSE 和可流式传输的 HTTP 传输方式，并支持按服务器的允许\u002F拒绝过滤、Authorization 头部认证以及工具缓存。\n  \n- `鼠标和键盘` - 允许模型控制鼠标和键盘。\n  \n- `OpenStreetMap` - 使用 OpenStreetMap 服务（Nominatim、OSRM、staticmap）进行搜索、地理编码、规划路线和生成静态地图。\n  \n- `实时` - 自动将当前日期和时间附加到系统提示中，告知模型当前时间。\n  \n- `串口 \u002F USB` - 该插件提供读取和向 USB 端口发送数据的命令。\n  \n- `服务器（SSH\u002FFTP）` - 使用 FTP、SFTP 和 SSH 连接到远程服务器。执行远程命令、上传、下载等操作。\n  \n- `Slack` - 处理 Slack 上的用户、对话、消息和文件。\n  \n- `系统提示补充（追加）` - 将列表中的额外系统提示自动追加到每个当前的系统提示中。您可以为每个系统提示添加额外的指令，这些指令会自动附加到系统提示中。\n  \n- `系统（操作系统）` - 允许您在系统上创建并执行自定义命令。\n  \n- `Telegram` - 发送消息、照片和文档；管理聊天和联系人。\n  \n- `Tuya（物联网）` - 通过 Tuya Cloud API 管理 Tuya 智能家居设备。\n  \n- `视觉（内联）` - 将视觉能力集成到任何聊天模式中，而不仅仅是视觉模式。当插件启用时，如果收到图片附件或进行视觉捕捉，模型会在后台临时切换到视觉模式。\n  \n- `语音控制（内联）` - 在对话中提供语音控制命令执行功能。\n  \n- `网络搜索` - 提供连接到互联网、搜索网页获取最新数据以及使用 LlamaIndex 数据加载器索引外部内容的能力。\n  \n- `维基百科` - 在维基百科中搜索信息。\n  \n- `Wolfram Alpha` - 使用 Wolfram Alpha 进行计算和求解：简短答案、完整的 JSON pod、数学运算（求解、求导、积分）、单位换算、矩阵运算和图表绘制。\n  \n- `X\u002FTwitter` - 与推文和用户互动，管理书签和媒体，点赞、转发等。\n\n\n## API 调用\n\n**PyGPT** 允许您通过自定义的 API 调用将模型连接到外部服务。\n\n要激活此功能，请在“插件”菜单中启用“API 调用”插件。\n\n在此插件中，您可以提供允许的 API 调用列表、其参数和请求类型。模型会将提供的占位符替换为所需参数，并向外部服务发起 API 调用。\n\n文档：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#api-calls\n\n## 音频输入\n\n该插件支持语音识别（默认使用 OpenAI 的 `Whisper` 模型，也可选择 `Google` 和 `Bing`）。它允许您用自己的声音向 AI 发出语音命令。Whisper 不需要额外的 API 密钥或额外配置；它使用主 OpenAI 密钥。在插件的配置选项中，您应调整插件对麦克风响应的音量级别（最小能量）。一旦插件启用，底部靠近“发送”按钮的位置会出现一个新的“说话”选项——启用后，应用程序将响应来自麦克风的语音。\n\n该插件还可以扩展其他语音识别提供商。\n\n文档：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#audio-input\n\n## 音频输出\n\n该插件允许您使用 OpenAI 的 TTS 模型，或 `Microsoft Azure`、`Google` 和 `Eleven Labs` 等其他服务，将文本转换为语音。您还可以添加更多文本转语音提供商。`OpenAI TTS` 无需任何额外的 API 密钥或配置；它直接使用您的 OpenAI 主 API 密钥。\n\n而 `Microsoft Azure` 则需要 Azure API 密钥。在使用 `Microsoft Azure`、`Google` 或 `Eleven Labs` 进行语音合成之前，您必须先使用相应的 API 密钥、区域和语音选项（如适用）对音频插件进行配置。\n\n文档链接：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#audio-output\n\n## 自主代理（内联）\n\n**警告：请谨慎使用自主模式！** —— 当此模式与其他插件结合使用时，可能会产生意想不到的结果！\n\n该插件会在标准聊天模式中激活自主模式，此时 AI 将与自身展开对话。您可以设置循环运行任意次数。在此过程中，模型会不断进行自我对话，回答自己的问题和评论，以寻找最佳解决方案，并对先前生成的内容进行批判性评估。\n\n此模式类似于 `Auto-GPT`，可用于进行更复杂的推理，并通过将问题分解为子任务来逐步解决，直到目标达成。该插件能够与其他插件协同工作，因此可以利用诸如网络搜索、文件系统访问或使用 `DALL-E` 进行图像生成等工具。\n\n文档链接：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#autonomous-agent-inline\n\n## Bitbucket\n\nBitbucket 插件可与 Bitbucket Cloud API 无缝集成，提供管理代码库、问题和拉取请求的功能。该插件具有高度可配置的认证选项、缓存功能，并能高效地处理 HTTP 请求。\n\n- 获取已认证用户的详细信息。\n- 获取特定用户的信息。\n- 列出可用的工作区。\n- 列出工作区中的代码库。\n- 获取特定代码库的详细信息。\n- 创建新的代码库。\n- 删除现有代码库。\n- 获取代码库中某个文件的内容。\n- 向代码库上传文件。\n- 从代码库中删除文件。\n- 列出代码库中的问题。\n- 创建新问题。\n- 对现有问题发表评论。\n- 更新问题的详细信息。\n- 列出代码库中的拉取请求。\n- 创建新的拉取请求。\n- 合并现有的拉取请求。\n- 搜索代码库。\n\n文档链接：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#bitbucket\n\n## 文件聊天（LlamaIndex，内联）\n\n该插件将 `LlamaIndex` 存储集成到任何聊天中，并将额外的知识引入上下文中。\n\n文档链接：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#chat-with-files-llamaindex-inline\n\n## 代码解释器\n\n### 执行代码\n\n自版本 `2.4.13` 起内置了 `IPython`。\n\n该插件的工作方式与 `ChatGPT` 中的 `代码解释器` 类似，主要区别在于它在用户的本地系统上运行。它可以执行模型生成的任何 Python 代码。当与 `文件 I\u002FO` 插件结合使用时，它能够运行保存在 `data` 目录中的文件中的代码。您也可以准备自己的代码文件，并让模型使用它们，或者为此目的添加您自己的插件。您可以在主机上或 Docker 容器中执行命令和代码。\n\n**IPython：** 自版本 `2.4.13` 起，强烈建议采用新的选项：`IPython`，它相比之前的流程有了显著改进。IPython 提供了一个强大的内核环境来执行代码，允许您通过保留先前命令的结果来维持会话状态。这一特性对于迭代开发和数据分析尤其有用，因为它使您能够在先前计算的基础上继续工作，而不必从头开始。此外，IPython 还支持使用魔法命令，例如 `!pip install \u003Cpackage_name>`，这些命令可以直接在会话中安装新包。这种能力简化了依赖项管理过程，增强了开发环境的灵活性。总体而言，IPython 提供了更高效、更友好的代码执行和管理体验。\n\n要在沙盒模式下使用 IPython，您的系统上必须安装 Docker。\n\n安装说明请参见：https:\u002F\u002Fdocs.docker.com\u002Fengine\u002Finstall\u002F\n\n**提示：在 Snap 版本中连接 Docker 中的 IPython**：\n\n要在 Snap 版本中使用 IPython，您必须将 PyGPT 连接到 Docker 守护进程：\n\n```commandline\nsudo snap connect pygpt:docker-executables docker:docker-executables\n```\n\n```commandline\nsudo snap connect pygpt:docker docker:docker-daemon\n```\n\n**代码解释器：** 内置了实时 Python 代码解释器。单击 `\u003C>` 图标即可打开解释器窗口。解释器的输入和输出都与插件相连。执行代码产生的任何输出都会显示在解释器中。此外，您还可以要求模型获取解释器窗口中的内容。\n\n![v2_python](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_a409c46c7774.png)\n\n**信息：** 在编译版本中使用 IPython 执行 Python 代码需要启用沙盒（Docker 容器）。您可以通过 `插件 -> 设置` 来连接 Docker 容器。\n\n\n**提示：** 始终记得启用 `+ 工具` 选项，以允许插件执行命令。\n\n文档链接：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#code-interpreter\n\n## 上下文历史记录（日历，内联）\n\n提供对上下文历史数据库的访问权限。\n该插件还允许读取和创建每日笔记。\n\n使用示例，您可以询问如下内容：\n\n```给我今天的日记```\n\n```为今天保存一条新笔记```\n\n```用...更新我的今日笔记```\n\n```获取昨天的对话列表```\n\n```获取 ID 为 123 的对话内容```\n\n等等。\n\n文档链接：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#context-history-calendar-inline\n\n## Crontab \u002F 任务调度器\n\n该插件提供基于 cron 的作业调度功能——您可以使用 cron 语法安排任务\u002F提示，在指定时间发送。\n\n![v2_crontab](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_6ed3ef0ddf6c.png)\n\n文档链接：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#crontab-task-scheduler\n\n## 自定义命令\n\n借助 `Custom Commands` 插件，您可以将 **PyGPT** 与您的操作系统以及脚本或应用程序集成。您可以定义无限数量的自定义命令，并指示模型在何时以及如何执行它们。配置非常简单，**PyGPT** 还包含一个简单的教程命令，用于测试和学习其工作原理：\n\n![v2_custom_cmd](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_b7f90f5ca0d1.png)\n\n文档：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#custom-commands\n\n## 专家（内联）\n\n该插件允许在任何聊天模式下调用专家。这就是内联专家（协作）模式。\n\n更多详情请参阅“工作模式 -> 专家”部分。\n\n## Facebook\n\n该插件通过 Facebook 的 Graph API 实现与 Facebook 的集成，支持管理页面、帖子和上传媒体等多种操作。它使用 OAuth2 进行身份验证，并支持自动的令牌交换流程。\n\n- 获取已认证用户的基本信息。\n- 列出用户有权访问的所有 Facebook 页面。\n- 将指定的 Facebook 页面设置为默认页面。\n- 获取 Facebook 页面上的帖子列表。\n- 在 Facebook 页面上创建新帖子。\n- 删除 Facebook 页面上的帖子。\n- 向 Facebook 页面上传照片。\n\n文档：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#facebook\n\n## 文件 I\u002FO\n\n该插件允许在本地文件系统中进行文件管理。它使模型能够创建、读取、写入和查询位于用户工作目录中的 `data` 目录下的文件。借助此插件，AI 还可以生成 Python 代码文件，并在用户的系统中执行这些代码。\n\n插件功能包括：\n\n- 以附件形式发送文件。\n- 读取文件。\n- 追加内容到文件。\n- 写入文件。\n- 删除文件和目录。\n- 列出文件和目录。\n- 创建目录。\n- 下载文件。\n- 复制文件和目录。\n- 移动（重命名）文件和目录。\n- 读取文件信息。\n- 使用 LlamaIndex 对文件和目录进行索引。\n- 使用 LlamaIndex 查询文件。\n- 搜索文件和目录。\n\n如果要创建的文件（同名）已存在，则会在文件名前添加包含日期和时间的前缀。\n\n文档：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#files-i-o\n\n## GitHub\n\n该插件提供了与 GitHub 的无缝集成，允许通过 GitHub 的 API 执行各种操作，如仓库管理、问题跟踪、拉取请求等。此插件需要身份验证，可以通过个人访问令牌 (PAT) 或 OAuth 设备流进行配置。\n\n- 获取您的 GitHub 个人资料详细信息。\n- 获取特定 GitHub 用户的信息。\n- 列出用户或组织的仓库。\n- 获取特定仓库的详细信息。\n- 创建新仓库。\n- 删除现有仓库。\n- 获取仓库中某个文件的内容。\n- 向仓库中上传或更新文件。\n- 删除仓库中的文件。\n- 列出仓库中的问题。\n- 在仓库中创建新问题。\n- 为现有问题添加评论。\n- 关闭现有问题。\n- 列出仓库中的拉取请求。\n- 创建新的拉取请求。\n- 合并现有的拉取请求。\n- 根据查询搜索仓库。\n- 根据查询搜索问题。\n- 根据查询搜索代码。\n\n文档：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#github\n\n## Google（Gmail、Drive、日历、联系人、YouTube、Keep、Docs、地图、Colab）\n\n该插件集成了多种 Google 服务，通过 Google API 实现电子邮件管理、日历事件、联系人处理和文档操作等功能。\n\n- **Gmail**\n  - 列出 Gmail 中的近期邮件。\n  - 列出 Gmail 中的所有邮件。\n  - 搜索 Gmail 中的邮件。\n  - 根据 ID 获取 Gmail 中的邮件详情。\n  - 通过 Gmail 发送邮件。\n\n- **Google 日历**\n  - 列出近期的日历事件。\n  - 列出今天的日历事件。\n  - 列出明天的日历事件。\n  - 列出所有的日历事件。\n  - 根据特定日期获取日历事件。\n  - 向日历中添加新事件。\n  - 从日历中删除事件。\n\n- **Google Keep**\n  - 列出 Google Keep 中的笔记。\n  - 向 Google Keep 中添加新笔记。\n\n- **Google Drive**\n  - 列出 Google Drive 中的文件。\n  - 根据路径查找 Google Drive 中的文件。\n  - 从 Google Drive 下载文件。\n  - 向 Google Drive 上传文件。\n\n- **YouTube**\n  - 获取 YouTube 视频的相关信息。\n  - 获取 YouTube 视频的字幕文本。\n\n- **Google 联系人**\n  - 列出 Google 联系人中的所有联系人。\n  - 向 Google 联系人中添加新联系人。\n\n- **Google Docs**\n  - 创建新文档。\n  - 获取现有文档。\n  - 列出文档。\n  - 向文档中追加文本。\n  - 替换文档中的文本。\n  - 向文档中插入标题。\n  - 导出文档。\n  - 从模板复制内容。\n\n- **Google 地图**\n  - 地址编码。\n  - 坐标反向地理编码。\n  - 获取地点之间的路线。\n  - 使用距离矩阵。\n  - 按文本搜索地点。\n  - 查找附近地点。\n  - 生成静态地图图像。\n\n- **Google Colab**\n  - 列出笔记本。\n  - 创建新笔记本。\n  - 添加代码单元格。\n  - 添加 Markdown 单元格。\n  - 获取笔记本的链接。\n  - 重命名笔记本。\n  - 复制笔记本。\n\n文档：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#google-gmail-drive-calendar-contacts-yt-keep-docs-maps-colab\n\n## 图像生成（内联）\n\n该插件将 `DALL-E 3` 图像生成功能与任何聊天模式集成。只需启用它，并在聊天模式下使用标准模型（如 `GPT-4`）请求生成图像即可。该插件无需启用 `+ Tools` 选项。\n\n文档：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#image-generation-inline\n\n## 邮件客户端\n\n支持发送、接收和阅读收件箱中的电子邮件。目前仅支持 SMTP 协议。更多选项即将推出。\n\n文档：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#mailer\n\n## MCP（模型上下文协议）\n\n借助 `MCP` 插件，您可以将 **PyGPT** 连接到由 `Model Context Protocol` 服务器（stdio、Streamable HTTP 或 SSE）暴露的远程工具。该插件会发现您配置的服务器上可用的工具，并将其作为可调用命令发布给模型，同时提供正确的参数架构。您可以按服务器对工具进行白名单\u002F黑名单设置，并可选择缓存发现结果以提高速度。\n\n文档：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#mcp\n\n## 鼠标与键盘\n\n引入版本：`2.4.4`（2024-11-09）\n\n**警告：请谨慎使用此插件——启用所有选项将使模型获得对鼠标和键盘的完全控制权。**\n\n该插件允许模型控制鼠标和键盘。借助此插件，您可以向模型发送任务，例如“打开记事本，在其中输入一些内容”或“打开网页浏览器，进行搜索，找到相关内容”。\n\n插件功能包括：\n\n- 获取鼠标光标位置\n- 控制鼠标光标位置\n- 控制鼠标点击\n- 控制鼠标滚动\n- 控制键盘（按键、输入文本）\n- 截取屏幕截图\n\n文档链接：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#mouse-and-keyboard\n\n## OpenStreetMap\n\n利用 OpenStreetMap 服务提供日常地图工具：\n\n- 通过 Nominatim 进行正反地理编码\n- 支持可选的附近或边界框过滤器的搜索\n- 通过 OSRM 进行路线规划（驾车、步行、骑行）\n- 生成 openstreetmap.org URL（中心点\u002F缩放级别或边界框；可选标记）\n- 实用辅助功能：打开以某点为中心的 OSM 网站 URL；下载单个 XYZ 图块\n\n图片将保存在用户数据目录下的 `data\u002Fopenstreetmap\u002F` 文件夹中。\n\n文档链接：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#openstreetmap\n\n## 实时\n\n此插件会自动将当前日期和时间添加到您发送的每个系统提示中。您可以选择仅包含日期、仅包含时间，或同时包含两者。\n\n启用后，它会在将系统提示发送给模型之前，悄悄地将其与当前时间信息结合。\n\n文档链接：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#real-time\n\n## 串口\u002FUSB\n\n提供用于读取和向 USB 端口发送数据的命令。\n\n**提示：** 在 Snap 版本中，您必须先连接接口：https:\u002F\u002Fsnapcraft.io\u002Fdocs\u002Fserial-port-interface\n\n您可以向使用串口通信的 Arduino 或其他控制器发送命令。\n\n文档链接：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#serial-port-usb\n\n## 服务器（SSH\u002FFTP）\n\n服务器插件提供了通过 SSH、SFTP 和 FTP 协议远程管理服务器的集成。该插件允许在远程服务器上执行命令、传输文件以及管理目录。\n\n出于安全考虑，模型不会看到任何凭据，只会显示服务器名称和端口字段（详见文档）。\n\n文档链接：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#server-ssh-ftp\n\n## Slack\n\nSlack 插件与 Slack Web API 集成，使应用程序能够与 Slack 工作区交互。该插件支持 OAuth2 认证，可实现与 Slack 服务的无缝集成，从而执行诸如发布消息、获取用户信息和管理对话等操作。\n\n- 获取用户列表。\n- 列出所有对话。\n- 访问对话历史记录。\n- 获取对话回复。\n- 打开对话。\n- 在聊天中发送消息。\n- 删除聊天消息。\n- 向 Slack 上传文件。\n\n文档链接：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#slack\n\n## 系统（操作系统）\n\n该插件提供对操作系统的访问权限，并执行系统命令。\n\n文档链接：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#system-os\n\n## 系统提示扩展（附加）\n\n该插件会将额外的系统提示（附加数据）从列表中附加到每个当前的系统提示中。您可以为每个系统提示添加额外的指令，这些指令将自动附加到系统提示中。\n\n文档链接：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#system-prompt-extra-append\n\n## Telegram\n\n该插件分别通过 ``Bot API`` 和 ``Telethon`` 库，实现了与 Telegram 的机器人和用户账户的集成。它允许发送和接收消息、管理聊天以及处理更新。\n\n- 向聊天或频道发送文本消息。\n- 向聊天或频道发送带有可选标题的照片。\n- 向聊天或频道发送文档或文件。\n- 获取特定聊天或频道的信息。\n- 在机器人模式下轮询更新。\n- 使用文件标识符下载文件。\n- 在用户模式下列出联系人。\n- 在用户模式下列出最近的对话或聊天。\n- 在用户模式下从特定聊天或频道获取最近的消息。\n\n文档链接：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#telegram\n\n## Tuya（物联网）\n\nTuya 插件与 Tuya 智能家居平台集成，通过 Tuya Cloud API 实现与您的智能设备的无缝交互。该插件提供了一个友好的界面，让您可以直接从助手管理并控制设备。\n\n* 请提供您的 Tuya Cloud 凭据以启用通信。\n* 访问并列出连接到您 Tuya 应用程序账户的所有智能设备。\n* 获取每个设备的详细信息，包括其状态和支持的功能。\n* 使用缓存数据按设备名称轻松搜索设备，以便快速访问。\n* 控制设备的开关、切换状态，并设置特定的设备参数。\n* 向设备发送自定义命令以实现更高级的控制。\n* 读取传感器数值并进行归一化处理，以便于理解。\n\n文档链接：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#tuya-iot\n\n## 视觉（内联）\n\n该插件在所有聊天模式中都集成了视觉功能，而不仅限于视觉模式。一旦启用，当检测到图像附件或视觉捕捉时，模型就会在后台无缝切换到视觉处理。\n\n**提示：** 在标准模式（如“聊天”模式，而非“视觉”模式）中使用 `视觉（内联）` 插件时，聊天窗口底部会出现一个特殊的“+ 视觉”复选框。每当您提供需要分析的内容（如上传的照片）时，该复选框将自动启用。复选框启用后，将使用视觉模型。如果您希望在图像分析完成后退出视觉模式，只需取消选中该复选框即可。下次提供需要分析的图像内容时，它会再次自动启用。\n\n文档链接：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#vision-inline\n\n## 语音控制（内联）\n\n该插件在对话中提供语音控制命令执行功能。\n\n更多详情请参阅“无障碍”部分。\n\n## 网络搜索\n\n**PyGPT** 允许您将模型连接到互联网，并在您提出查询时实时进行网络搜索。\n\n要激活此功能，请在“插件”菜单中启用 `网络搜索` 插件。\n\n网络搜索由 `Google 自定义搜索引擎` 和 `Microsoft Bing` API 提供，并可扩展至其他搜索引擎提供商。\n\n文档链接：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#web-search\n\n## 维基百科\n\n维基百科插件允许与维基百科进行全方位互动，包括语言设置、文章搜索、摘要获取以及随机文章发现等功能。该插件提供了多种选项来优化您的搜索体验。\n\n* 设置您偏好的维基百科查询语言。\n* 检索并查看当前的语言设置。\n* 浏览支持的语言列表。\n* 使用关键词搜索文章或获取查询建议。\n* 获取文章摘要和详细页面内容。\n* 按地理位置或随机方式发现文章。\n* 直接在您的网页浏览器中打开文章。\n\n文档：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#wikipedia\n\n## Wolfram Alpha\n\n通过 Wolfram Alpha 提供计算知识服务：简短答案、完整的 JSON 数据包、数值与符号数学运算（求解、求导、积分）、单位换算、矩阵运算以及以图片形式呈现的图表。生成的图片将保存在用户数据目录下的 `data\u002Fwolframalpha\u002F` 文件夹中。\n\n文档：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#wolfram-alpha\n\n## X\u002FTwitter\n\nX\u002FTwitter 插件与 X 平台集成，支持推文发布、转发、点赞、媒体上传等全方位互动功能。该插件需要 OAuth2 认证，并提供多种配置选项，以便有效管理 API 交互。\n\n- 通过用户名获取用户详情。\n- 使用用户的唯一 ID 拉取用户信息。\n- 查看特定用户的最新推文。\n- 使用特定关键词或话题标签搜索最近的推文。\n- 创建并发布一条新推文。\n- 从您的个人主页删除现有推文。\n- 对特定推文回复新评论。\n- 引用某条推文，并添加您自己的评论或想法。\n- 点赞某条推文以表达欣赏或支持。\n- 取消对之前点赞的推文的点赞。\n- 转发某条推文，与您的关注者分享。\n- 撤销转发，将其从您的主页移除。\n- 隐藏对某条推文的特定回复。\n- 列出所有书签中的推文，方便快速访问。\n- 将某条推文添加到书签中，以备日后参考。\n- 从书签中移除某条推文。\n- 上传图片或视频等媒体文件用于推文发布。\n- 为上传的媒体设置替代文本，以提升可访问性。\n\n文档：https:\u002F\u002Fpygpt.readthedocs.io\u002Fen\u002Flatest\u002Fplugins.html#x-twitter\n\n# 创建您自己的插件\n\n您可以随时为 **PyGPT** 创建自己的插件。插件可以用 Python 编写，然后在应用启动前注册到程序中。应用程序自带的所有插件都存储在 `plugin` 目录下，您可以将其作为编写自定义插件的示例。\n\nPyGPT 可以通过以下方式扩展：\n\n- 自定义模型\n- 自定义插件\n- 自定义大语言模型\n- 自定义向量存储提供商\n- 自定义数据加载器\n- 自定义音频输入提供商\n- 自定义音频输出提供商\n- 自定义网络搜索引擎提供商\n- 自定义智能体（如 LlamaIndex 或 OpenAI 智能体）\n\n\n更多详细信息，请参阅“扩展 PyGPT \u002F 添加自定义插件”章节。\n\n# 函数、命令和工具\n\n**提示** 请记住启用 `+ 工具` 复选框，以允许执行插件中的工具和命令。\n\n从版本 `2.2.20` 开始，PyGPT 默认使用原生 API 函数调用。您可以通过关闭选项 `配置 -> 设置 -> 提示词 -> 使用原生 API 函数调用` 恢复到内部语法（如下所述）。此外，您还必须在模型高级设置中启用 `工具调用` 复选框，才能使用指定模型的原生函数调用。\n\n在后台，**PyGPT** 使用一种内部语法来定义命令及其参数，这些命令随后可以由模型使用，并在应用程序端甚至直接在系统中执行。该语法如下所示（以下为示例命令）：\n\n```\u003Ctool>{\"cmd\": \"send_email\", \"params\": {\"quote\": \"为什么骷髅不会互相打架？因为他们没有勇气！\"}}\u003C\u002Ftool>```\n\n它是一个包裹在 `\u003Ctool>` 标签之间的 JSON 对象。应用程序会从这种格式化的文本中提取 JSON 对象，并根据提供的参数和命令名称执行相应的函数。许多此类命令是在插件中定义的（例如用于文件操作或互联网搜索的命令）。您也可以使用 `自定义命令` 插件，或者通过创建自己的插件并将其添加到应用程序中，来定义自己的命令。\n\n**提示：** 必须启用 `+ 工具` 选项复选框，才能允许执行来自插件的命令。如果您不想使用命令，请禁用此选项，以避免额外的令牌消耗（因为命令执行的系统提示会消耗额外的令牌，并可能降低本地模型的速度）。\n\n![v2_code_execute](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_385abd0fa1a3.png)\n\n当原生 API 函数调用被禁用时，如果 `+ 工具` 选项处于激活状态，一个负责调用命令的特殊系统提示会被添加到主系统提示中。\n\n然而，还有一种额外的可能性，即您可以定义自己的命令，并借助模型来执行它们。\n这些是函数\u002F工具——在 API 端定义，并使用 JSON 对象进行描述。有关如何定义函数的完整指南，请参阅以下链接：\n\nhttps:\u002F\u002Fplatform.openai.com\u002Fdocs\u002Fguides\u002Ffunction-calling\n\nhttps:\u002F\u002Fcookbook.openai.com\u002Fexamples\u002Fhow_to_call_functions_with_chat_models\n\nPyGPT 提供了这些函数与应用程序中使用的命令（工具）之间的兼容性。您只需使用正确的 JSON 模式定义相应的函数，其余的工作将由 PyGPT 完成，它会实时将这种语法转换为自己的内部格式。\n\n本地函数和插件中的工具在所有模式下均可使用，除了 `助手` 模式。\n\n要为 `助手` 模式启用本地函数（在此模式下默认使用远程工具），请创建一个新的助手，打开预设编辑对话框，然后从插件中导入工具，或使用 `+ 函数` 按钮添加新函数，例如具有以下内容：\n\n**名称：** `send_email`\n\n**描述：** `通过电子邮件发送一句名言`\n\n**参数（JSON）：**\n\n```json\n{\n        \"type\": \"object\",\n        \"properties\": {\n            \"quote\": {\n                \"type\": \"string\",\n                \"description\": \"一条生成的有趣名言\"\n            }\n        },\n        \"required\": [\n            \"quote\"\n        ]\n}\n```\n\n然后，在 `自定义命令` 插件中，创建一个同名且参数相同的命令：\n\n**命令名称：** `send_email`\n\n**指令\u002F提示：** `发送邮件`\n\n**参数列表：** `quote`\n\n**要执行的命令：** `echo \"OK. 邮件已发送：{quote}\"`\n\n接下来，启用 `+ 工具` 选项并启用该插件。\n\n向模型提问：\n\n```创建一句有趣的名言并将其通过电子邮件发送出去``` \n\n作为回应，您将收到一条准备好的命令，如下所示：\n\n```\u003Ctool>{\"cmd\": \"send_email\", \"params\": {\"quote\": \"我们为什么对演员说‘祝你好运’？因为每部戏剧都有演员阵容！\"}}\u003C\u002Ftool>```\n\n收到后，PyGPT 将执行系统 `echo` 命令，使用 `params` 字段中的参数，并将 `{quote}` 占位符替换为 `quote` 参数值。\n\n最终，模型将收到如下响应：\n\n```[{\"request\": {\"cmd\": \"send_email\"}, \"result\": \"OK. 邮件已发送：我们为什么对演员说‘祝你好运’？因为每部戏剧都有演员阵容！\"}]```\n\n通过这种方式，您可以在应用程序中同时使用两种形式——API 提供商的 JSON 模式和 PyGPT 的模式——来定义和执行命令及函数。它们将相互协作，您可以交替使用它们。\n\n# 工具\n\nPyGPT 提供了多种实用工具，包括：\n\n- 记事本\n- 绘图工具\n- 日历\n- 索引器\n- 媒体播放器\n- 图片查看器\n- 文本编辑器\n- 转录音频\u002F视频文件\n- OpenAI 向量存储\n- Google 向量存储\n- Python 代码解释器\n- HTML\u002FJS 画布（内置 HTML 渲染器）\n- 翻译工具\n- 网页浏览器（Chromium）\n- 代理构建器（测试版）\n\n![v2_tool_menu](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_fb04ffa33dc5.png)\n\n\n## 记事本\n\n应用程序内置了一个记事本，分为多个标签页。这可以帮助您以方便的方式存储信息，而无需打开外部文本编辑器。每当内容发生变化时，记事本的内容都会自动保存。\n\n![v2_notepad](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_ac81dab3bd0f.png)\n\n## 绘图工具\n\n使用 `绘图工具`，您可以快速绘制草图，并将其提交给模型进行分析。您还可以编辑从磁盘打开或从相机捕获的图像，例如添加箭头或为物体勾勒轮廓等。此外，您还可以截取系统屏幕截图——捕获的图像会放入绘图工具中，并附加到正在发送的查询中。\n\n![v2_draw](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_826b6a0ca3aa.png)\n\n要截取屏幕截图，只需在托盘图标下拉菜单中点击 `带截图提问` 选项：\n\n![v2_screenshot](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_d2a13ee6d709.png)\n\n## 日历\n\n使用日历，您可以回溯到特定日期的对话，并添加每日笔记。添加笔记后，它会在列表上标记出来，您可以通过右键单击并选择 `设置标签颜色` 来更改其标签颜色。单击某一天，即可显示当天的对话。\n\n![v2_calendar](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_a4c953c9e46d.png)\n\n\n## 索引器\n\n\n此工具可用于将本地文件或目录以及外部网页内容索引到向量数据库中，随后可与 `文件聊天` 模式一起使用。借助此工具，您可以管理本地索引，并通过内置的 `LlamaIndex` 集成添加新数据。\n\n![v2_tool_indexer](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_40cfb3d01d15.png)\n\n## 媒体播放器\n\n\n一个简单的视频\u002F音频播放器，允许您直接在应用内播放视频文件。\n\n## 图片查看器\n\n\n一款简单的图片浏览器，允许您直接在应用内预览图片。\n\n\n## 文本编辑器\n\n\n一款简单的文本编辑器，使您能够直接在应用内编辑文本文件。\n\n\n## 音频\u002F视频文件转录\n\n\n一个音频转录工具，您可以使用它从视频或音频文件中生成文字稿。该工具将利用语音识别插件从文件中提取文本内容。\n\n\n## OpenAI \u002F Google 向量存储\n\n\n远程向量存储管理。\n\n\n## Python 代码解释器\n\n\n此工具允许您直接在应用内运行 Python 代码。它与 `Code Interpreter` 插件集成，确保模型生成的代码可自动在解释器中执行。在插件设置中，您可以启用在 Docker 环境中执行代码。\n\n**提示：** 在编译版本中使用 IPython 执行 Python 代码时，需要启用沙盒（Docker 容器）。您可以通过 `插件 -> 设置` 连接 Docker 容器。\n\n## HTML\u002FJS Canvas\n\n允许在 HTML Canvas 中渲染 HTML\u002FJS 代码（基于 Chromium 的内置渲染器）。要使用它，只需让模型在内置浏览器（HTML Canvas）中渲染 HTML\u002FJS 代码即可。该工具与 `Code Interpreter` 插件集成。\n\n## 翻译器\n\n通过 AI 模型实现多语言之间的翻译功能。\n\n## 网页浏览器\n\n基于 Chromium 的内置网页浏览器，允许您直接在应用内打开网页。**安全提示：** 为保护您的安全，请勿使用内置浏览器进行敏感或关键任务。它仅适用于基本用途。\n\n# 代理构建器（测试版）\n\n要启动代理编辑器，请前往：\n\n`工具 -> 代理构建器`\n\n![节点](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_47ad606dbc3c.png)\n\n此工具允许您使用节点编辑器创建代理工作流，无需编写任何代码。您可以添加新的代理类型，它将出现在预设列表中。\n\n要添加新元素，右键单击编辑网格并选择“添加”以插入新节点。\n\n**节点类型：**\n\n- **开始**：代理的起点（用户输入）。\n- **代理**：具有可自定义默认参数的单个代理，例如系统指令和工具使用。这些设置可以在预设中被覆盖。\n- **记忆**：代理之间的共享内存（共享上下文）。\n- **结束**：终点，将控制权交还给用户。\n\n连接了共享内存的代理会彼此共享该内存。未连接共享内存的代理仅接收前一代理的最新输出。\n\n序列中的第一个代理始终接收用户传递的完整上下文。\n\n通过槽位连接节点来连接代理和记忆。要连接槽位，只需从输入端口拖动到输出端口即可（按住 Ctrl 键并点击鼠标可移除连接）。\n\n**节点编辑器导航：**\n\n- **右键单击**：添加节点、撤销、重做、清除\n- **中键 + 拖动**：平移视图\n- **Ctrl + 鼠标滚轮**：缩放\n- **左键单击端口**：创建连接\n- **Ctrl + 左键单击端口**：重新布线或断开连接\n- **右键单击或 DELETE 节点\u002F连接**：移除节点\u002F连接\n\n**提示：** 在 `设置 -> 调试 -> 将代理使用情况记录到控制台` 中启用代理调试，以便将完整的工作流记录到控制台。\n\n使用此工具构建的代理与 OpenAI Agents 和 LlamaIndex 兼容。\n\n**注释：**\n\n路由与系统指令：对于每个拥有多个通往下一代理连接的代理，在您的系统提示之前会自动注入一条路由指令：\n\n```\n您是多代理流程中的具备路由能力的代理。\n您的 ID 是：\u003Ccurrent_id>, 名称：\u003Cagent_name>。\n您必须仅以一个 JSON 对象回应，不得包含其他内容。\n模式：\n{\n  \"route\": \"\u003C下一个代理的 ID，来自允许的路由列表，或字符串 'end'>\",\n  \"content\": \"\u003C最终面向用户的响应文本（或工具结果）>\"\n}\n规则：\n- allowed_routes: [\u003C允许的 >]\n- 如果您想结束流程，请将 route 设置为 \"end\"。\n- content 必须包含面向用户的答案（您可以在 content 中包含结构化数据，如 JSON 或 Markdown）。\n- 不得在 JSON 外添加任何评论。不得有前后缀文本。\n- 如果使用工具，仍需返回包含工具结果摘要的最终 JSON。\n- 人性化的路由名称：\u003Cnames>\n- 人性化的路由角色（可选）：\u003Croles>\n\n\u003C此处开始您的系统指令>\n```\n\n**提示：** 代理构建器目前处于测试阶段。\n\n\n# 令牌用量计算\n\n## 输入令牌\n\n应用程序内置了令牌计算器。它会尝试预测特定查询将消耗的令牌数量，并实时显示这一估算值。这有助于您更好地控制令牌使用情况。应用会提供关于用户提示、系统提示、任何附加数据以及上下文中使用的令牌（即先前交互的记忆）的详细信息。\n\n**请注意，这些仅为近似计算，不包括某些插件所消耗的令牌数量。确切的令牌用量可在服务提供商的网站上查询。**\n\n![v2_tokens1](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_690f4136eda7.png)\n\n## 总令牌数\n\n在收到模型响应后，应用程序会显示此次查询实际使用的总令牌数量（由 API 返回）。\n\n![v2_tokens2](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_26d89ee90f82.png)\n\n# 无障碍功能\n\n自版本 `2.2.8` 起，PyGPT 增加了对残障人士和语音控制的测试版支持。这对于视障人士来说可能非常有用。\n\n在 `设置 \u002F 无障碍` 菜单中，您可以开启以下无障碍功能：\n\n\n- 启用语音控制\n\n- 将屏幕上的操作和事件通过语音播报出来\n\n- 为各项操作设置键盘快捷键。\n\n\n**使用语音控制**\n\n语音控制可以通过两种方式开启：全局方式，即通过 `设置 -> 无障碍` 中的设置；以及通过 `语音控制（内联）` 插件。这两种方式可以使用相同的语音命令，但工作方式略有不同——全局方式允许您在任何地方、不局限于对话中执行命令，而插件方式则让您在对话过程中直接执行命令，从而可以在与模型交互的同时发出指令。\n\n在插件（内联）模式下，您还可以启用一个特殊的触发词，只有说出该词后，内容才会被识别为语音命令。您可以通过进入 `插件 -> 设置 -> 语音控制（内联）` 来进行设置：\n\n```bash\n语音命令的魔法前缀\n```\n\n**提示：** 当通过插件启用语音控制时，只需在正常输入对话内容的同时，使用标准的 `麦克风` 按钮来发出命令即可。\n\n\n**全局启用语音控制**\n\n\n在 `设置 \u002F 无障碍` 中开启语音控制选项：\n\n\n```bash\n启用语音控制（使用麦克风）\n```\n\n启用此选项后，窗口右下角会出现一个 `语音控制` 按钮。点击该按钮后，麦克风将开始监听；再次点击则停止监听并开始识别您所说的语音命令。您也可以随时按下 `ESC` 键取消录音。此外，您还可以设置一个键盘快捷键来开关语音录制功能。\n\n\n语音命令的识别基于模型，因此您无需担心发音是否完全准确。\n\n\n**以下是您可以使用语音发出的命令列表：**\n\n- 获取当前应用状态\n- 退出应用\n- 开启音频输出\n- 关闭音频输出\n- 开启音频输入\n- 关闭音频输入\n- 向日历添加备忘录\n- 清除日历中的备忘录\n- 阅读日历备忘录\n- 开启摄像头\n- 关闭摄像头\n- 使用摄像头拍摄图片\n- 创建新上下文\n- 切换到上一个上下文\n- 切换到下一个上下文\n- 切换到最新上下文\n- 聚焦输入框\n- 发送输入内容\n- 清空输入内容\n- 获取当前对话信息\n- 获取可用命令列表\n- 停止执行当前操作\n- 清除附件\n- 阅读最近一次对话记录\n- 阅读整个对话\n- 重命名当前上下文\n- 搜索对话\n- 清除搜索结果\n- 将消息发送到输入框\n- 将消息追加到当前输入而不发送\n- 切换到聊天模式\n- 切换到文件聊天（llama-index）模式\n- 切换到下一模式\n- 切换到上一模式\n- 切换到下一模型\n- 切换到上一模型\n- 向记事本添加笔记\n- 清空记事本内容\n- 阅读当前记事本内容\n- 切换到下一预设\n- 切换到上一预设\n- 切换到聊天标签页\n- 切换到日历标签页\n- 切换到绘图（画板）标签页\n- 切换到文件标签页\n- 切换到记事本标签页\n- 切换到下一标签页\n- 切换到上一标签页\n- 开始监听语音输入\n- 停止监听语音输入\n- 切换监听语音输入状态\n\n更多命令即将推出。\n\n只需说出与上述描述之一相符的动作即可。这些描述已被模型所知，并为其分配了相应的命令。当您说出符合这些模式的命令时，模型就会触发相应的操作。\n\n\n为了方便起见，您可以在语音录制开始和结束时播放一段简短的声音提示。为此，请开启以下选项：\n\n\n```bash\n麦克风监听开始\u002F结束时播放音频提示\n```\n\n若要启用语音命令被识别并开始执行时的音效通知，请开启以下选项：\n\n\n```bash\n语音命令执行时播放音频提示\n```\n\n如需通过语音合成将屏幕上的事件及已完成命令的信息进行语音播报，您可以开启以下选项：\n\n```bash\n使用语音合成描述屏幕上的事件。\n```\n![v2_access](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_433d14c9a2a5.png)\n\n# 配置\n\n## 设置\n\n以下基本选项可以直接在应用程序中修改：\n\n``` ini\n设置 -> 设置...\n```\n\n![v2_settings](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_readme_ca405d7f7897.png)\n\n**常规**\n\n- `退出时最小化到托盘`：退出时最小化到系统托盘图标。此选项需要启用系统托盘图标才能生效。默认值：否。\n\n- `渲染引擎`：聊天输出的渲染引擎：`WebEngine \u002F Chromium`——用于完整的 HTML\u002FCSS；`Legacy (markdown)`——用于传统的简单 Markdown 格式。默认值：WebEngine \u002F Chromium。\n\n- `OpenGL 硬件加速`：在 `WebEngine \u002F Chromium` 渲染器中启用硬件加速。默认值：否。\n\n- `使用代理`：启用此选项以通过代理连接 API。默认值：否。\n\n- `代理地址`：用于 API SDK 连接的代理地址；支持 HTTP\u002FSOCKS 协议，例如 http:\u002F\u002Fproxy.example.com 或 socks5:\u002F\u002Fuser:pass@host:port。\n\n- `应用环境变量 (os.environ)`：在应用启动时设置的额外环境变量。\n\n- `内存限制`：渲染器的内存限制；设置为 0 表示禁用。如果大于 0，应用将在达到限制后尝试释放内存。接受的格式：3.5GB、2GB、2048MB、1_000_000。最低值：2GB。\n\n**API 密钥**\n\n- `OpenAI API KEY`：使用 OpenAI API 所需。如果您希望使用自定义端点或本地 API，则此处可填写任意值。\n\n- `OpenAI ORGANIZATION KEY`：组织的 API 密钥，在应用中使用时为可选。\n\n- `API 端点`：OpenAI API 的端点 URL，默认值：https:\u002F\u002Fapi.openai.com\u002Fv1。\n\n- `Anthropic API KEY`：使用 Anthropic API 和 Claude 模型所需的密钥。\n\n- `Deepseek API KEY`：使用 Deepseek API 所需。\n\n- `Google API KEY`：使用 Google API 和 Gemini 模型所需的密钥。\n\n- `HuggingFace API KEY`：使用 HuggingFace API 所需。\n\n- `Mistral AI API KEY`：使用 Mistral AI API 所需。\n\n- `Perplexity API KEY`：使用 Perplexity API 和 Sonar 模型所需的密钥。\n\n- `xAI API KEY`：使用 xAI API 和 Grok 模型所需的密钥。\n\n- `OpenAI API 版本`：Azure OpenAI API 的版本，例如 2023-07-01-preview。\n\n- `Azure OpenAI API 端点`：Azure OpenAI API 的端点，例如 https:\u002F\u002F\u003Cyour-resource-name>.openai.azure.com\u002F。\n\n**布局**\n\n- `样式（聊天）`：聊天界面的样式（区块式、类似 ChatGPT 的样式，或宽版类似 ChatGPT 的样式）。仅适用于 `WebEngine \u002F Chromium` 渲染模式。\n\n- `缩放`：调整聊天窗口（网页渲染视图）的缩放比例。仅适用于 `WebEngine \u002F Chromium` 渲染模式。\n\n- `字体大小（聊天窗口）`：调整聊天窗口（纯文本）和记事本中的字体大小。\n\n- `字体大小（输入）`：调整输入窗口中的字体大小。\n\n- `字体大小（上下文列表）`：调整上下文列表中的字体大小。\n\n- `字体大小（工具箱）`：调整右侧工具箱中的字体大小。\n\n- `布局密度`：调整布局元素的密度。默认值：-1。\n\n- `DPI 缩放`：启用或禁用 DPI 缩放。此选项生效需要重启应用程序。默认值：True。\n\n- `DPI 因子`：DPI 因子。此选项生效需要重启应用程序。默认值：1.0。\n\n- `自动折叠用户消息（px）`：在用户消息高度达到 N 像素后自动折叠，设置为 0 则禁用自动折叠。\n\n- `显示提示（帮助说明）`：显示帮助提示，默认值：True。\n\n- `存储对话框位置`：启用或禁用对话框位置的存储\u002F恢复功能，默认值：True。\n\n**代码语法**\n\n- `代码语法高亮`：代码块中的语法高亮主题。仅适用于 WebEngine \u002F Chromium 渲染模式。\n\n- `禁用语法高亮`：在代码块中禁用语法高亮的选项。仅适用于 WebEngine \u002F Chromium 渲染模式。\n\n- `静态内容最大高亮字符数`：设置静态内容中最多可高亮的字符数。设置为 0 则禁用。仅适用于 WebEngine \u002F Chromium 渲染模式。\n\n- `静态内容最大高亮行数`：设置静态内容中最多可高亮的行数。设置为 0 则禁用。仅适用于 WebEngine \u002F Chromium 渲染模式。\n\n- `实时流模式最大高亮行数`：设置实时流模式中最多可高亮的行数。设置为 0 则禁用。仅适用于 WebEngine \u002F Chromium 渲染模式。\n\n- `实时流模式每 N 字符高亮一次`：设置实时流模式中每隔 N 个字符进行高亮的间隔。仅适用于 WebEngine \u002F Chromium 渲染模式。\n\n- `实时流模式每 N 行高亮一次`：设置实时流模式中每隔 N 行进行高亮的间隔。仅适用于 WebEngine \u002F Chromium 渲染模式。\n\n**文件和附件**\n\n- `将附件存储在工作目录上传文件夹中`：启用后，会存储已上传附件的本地副本以供将来使用。默认值：True。\n\n- `将图片、截图及上传文件存储在数据目录中`：启用后，所有内容将存储在一个单独的数据目录中。默认值：False。\n\n- `允许将图片作为附加上下文`：若启用，图片可用作附加上下文。默认值：False。\n\n- `仅附加一次附件（模式：始终）`：若启用，发送的附件将仅附加到发送的消息中一次，而不是每次都作为附加上下文附加到输入提示中。强制模式——影响所有模型。默认值：False。\n\n- `仅附加一次附件（模式：仅当可用时，自动检测）`：若启用，如果所选模型和 API 在服务器端处理已发送消息的存储，则发送的附件将仅附加到发送的消息中一次。这可以优化令牌使用，只需发送一次附件。默认值：True。\n\n- `用于查询索引的模型`：在选择 RAG 选项时，用于准备查询并查询索引的模型。\n\n- `用于附件内容摘要的模型`：在选择“摘要”选项时，用于生成文件内容摘要的模型。\n\n- `RAG 查询中使用历史记录`：若启用，在 RAG 或摘要模式下准备查询时，将使用整个对话的内容。\n\n- `RAG 限制`：仅在启用“RAG 查询中使用历史记录”选项时有效。指定在生成 RAG 查询时将使用多少条最近的对话条目。0 = 无限制。\n\n- `文件下载目录`：下载文件的子目录，例如在助手模式下位于“data”目录内。默认值：“download”。\n\n**上下文**\n\n- `上下文阈值`：设置为模型响应下一个提示预留的令牌数量。\n\n- `上下文列表中显示的最后上下文数量限制（0 = 无限制）`：上下文列表中显示的最后上下文数量限制，默认值：0（无限制）。\n\n- `在上下文列表顶部显示上下文分组`：在顶部显示分组，默认值：False。\n\n- `在上下文列表中显示日期分隔线`：显示日期区间，默认值：True。\n\n- `在上下文列表的分组中显示日期分隔线`：在分组中显示日期区间，默认值：True。\n\n- `在上下文列表的置顶项中显示日期分隔线`：在置顶项中显示日期区间，默认值：False。\n\n- `使用上下文`：切换是否使用对话上下文（对先前输入的记忆）。\n\n- `存储历史记录`：切换是否存储对话历史记录。\n\n- `在历史记录中存储时间`：选择是否在 .txt 文件中添加时间戳。\n\n- `上下文自动摘要`：启用上下文标题的自动生成，默认值：True。\n\n- `锁定不兼容模式`：若启用，当在现有上下文中切换到不兼容模式时，应用将创建一个新的上下文。\n\n- `不仅在标题中，也在对话内容中搜索`：若启用，上下文搜索将同时考虑对话内容，而不仅仅是对话标题。\n\n- `显示 LlamaIndex 来源`：若启用，将在响应中显示所使用的来源（如果可用；流式聊天中可能无法使用）。\n\n- `显示代码解释器输出`：若启用，助理 API 中的代码解释器输出将以实时方式显示（流式模式），默认值：True。\n\n- `使用额外的上下文输出`：若启用，来自命令结果的纯文本输出（如果可用）将与 JSON 输出一起显示，默认值：True。\n\n- `在内置浏览器中打开 URL`：启用此选项后，所有 URL 将在内置浏览器（Chromium）中打开，而非外部浏览器。默认值：False。\n\n- `用于自动摘要的模型`：用于上下文自动摘要（在上下文列表中生成标题）的模型（默认：*gpt-4o-mini*）。**提示**：如果您更倾向于使用本地模型，也应在此处更改模型。\n\n**远程工具**\n\n启用或禁用远程工具，如网络搜索、MCP 或图像生成。\n\n远程工具适用于以下提供商，并且仅可通过其原生 SDK 使用：\n\n- Anthropic\n- Google\n- OpenAI\n- xAI\n\n**模型**\n\n- `最大输出令牌数`：设置模型单次响应所能生成的最大令牌数。\n\n- `最大总令牌数`：设置应用程序可发送给模型的最大令牌总数，包括对话上下文。\n\n- `RPM 限制`：设置每分钟最大请求次数（RPM）的限制，0 = 无限制。\n\n- `温度`：设置对话的随机性。较低的值会使模型的回答更具确定性，而较高的值则会增加创造性和抽象性。\n\n- `Top-p`：一个影响模型回答多样性的参数，类似于温度。更多信息请参阅 OpenAI 文档。\n\n- `频率惩罚`：降低模型回答中重复出现的可能性。\n\n- `存在惩罚`：阻止模型提及已在对话中讨论过的主题。\n\n**提示**\n\n- `使用原生 API 函数调用`：使用 API 函数调用来运行插件中的命令，而不是使用命令提示——自主模式和专家模式中禁用，默认值：True。\n\n- `命令执行：指令`: 用于追加命令执行指令的提示。占位符：{schema}, {extra}\n\n- `命令执行：额外页脚（非助理模式）`: 在命令 JSON 模式后追加的额外页脚。\n\n- `命令执行：额外页脚（仅助理模式）`: 额外的说明，用于将本地命令与已在助理中配置的远程环境区分开来。\n\n- `上下文：自动摘要（系统提示）`: 用于上下文自动摘要的系统提示。\n\n- `上下文：自动摘要（用户消息）`: 用于上下文自动摘要的用户消息。占位符：{input}, {output}\n\n- `代理：循环中的评估提示（LlamaIndex） - 完成百分比`: 用于在代理（LlamaIndex\u002FOpenAI）模式下按完成百分比评估响应的提示。\n\n- `代理：循环中的评估提示（LlamaIndex） - 分数百分比`: 用于在代理（LlamaIndex\u002FOpenAI）模式下按分数百分比评估响应的提示。\n\n- `代理：系统指令（旧版）`: 用于指示如何处理自主模式的提示。\n\n- `代理：继续（旧版）`: 用于自动继续对话的提示。\n\n- `代理：继续（始终，更多步骤）（旧版）`: 用于始终自动继续对话的提示（更多推理——“始终继续…”选项）。\n\n- `代理：目标更新（旧版）`: 用于指示如何更新当前目标状态的提示。\n\n- `专家：主提示`: 用于指示如何处理专家的提示。\n\n- `图像生成`: 用于生成图像生成提示的提示（如果禁用了原始模式）。\n\n**图片和视频**\n\n**图片**\n\n- `图片尺寸`: 生成图片的分辨率（DALL-E）。默认值：1024x1024。\n\n- `图片质量`: 生成图片的质量（DALL-E）。默认值：标准。\n\n- `提示生成模型`: 用于生成图像生成提示的模型（如果禁用了原始模式）。\n\n**视频**\n\n- `宽高比`: 指定帧的宽高比（例如 16:9、9:16、1:1）。可用性取决于所选模型。\n\n- `视频时长`: 设置片段长度，单位为秒；不同模型可能有不同的限制。\n\n- `帧率`: 确定每秒帧数（例如 24、25、30）。实际值可能会被模型四舍五入或忽略。\n\n- `生成音频`: 如果模型支持，可以选择包含合成背景音频。\n\n- `负面提示`: 指定输出中应避免的词语或短语（用逗号分隔）。\n\n- `提示增强模型`: 定义在视频生成前用于优化您的提示的 LLM。这并非视频生成模型。\n\n- `视频分辨率`: 设置目标输出分辨率（例如 720p、1080p）。可用性取决于模型。\n\n- `种子`: 提供可选的随机种子以实现结果的可重复性；留空则使用随机种子。\n\n**视觉与摄像头**\n\n- `摄像头输入设备`: 视频采集摄像头索引（摄像头索引，默认为 0）。\n\n- `摄像头采集宽度（像素）`: 视频采集分辨率（宽度）。\n\n- `摄像头采集高度（像素）`: 视频采集分辨率（高度）。\n\n- `图像采集质量`: 视频采集图像 JPEG 质量（%）。\n\n**音频**\n\n- `音频输入后端`: 选择音频输入的后端（Native\u002FQtMultimedia、PyAudio、PyGame）。\n\n- `音频输入设备`: 选择麦克风输入的音频设备。\n\n- `音频输出后端`: 选择音频输出的后端（Native\u002FQtMultimedia、PyAudio）。\n\n- `音频输出设备`: 选择音频输出的音频设备。\n\n- `声道`: 输入声道，默认为 1。\n\n- `采样率`: 采样率，默认为 44100。\n\n- `使用缓存`: 使用缓存生成音频文件。\n\n- `最大存储文件数`: 音频缓存中最多可存储的文件数量。\n\n- `音频通知麦克风监听开始\u002F结束`: 启用麦克风监听开始\u002F结束时的音频提示音。\n\n- `连续音频录制（分段）`: 启用记事本中长时间音频录制（语音笔记）的分段录制功能。\n\n- `VAD 前缀填充（毫秒）`: VAD 前缀填充时间，默认为 300 毫秒（实时音频模式）。\n\n- `VAD 结束静音（毫秒）`: VAD 结束静音时间，默认为 2000 毫秒（实时音频模式）。\n\n**索引 \u002F LlamaIndex**\n\n**通用**\n\n- `索引`: 已创建索引的列表。\n\n**向量存储**\n\n- `向量存储`: 要使用的向量存储（由 LlamaIndex 提供的向量数据库）。\n\n- `向量存储 (**kwargs)`: 向量存储提供商的关键字参数（api_key、index_name 等）。\n\n**聊天**\n\n- `聊天模式`: LlamaIndex 查询引擎中使用的聊天模式，默认为上下文模式。\n\n- `在带文件的聊天模式中使用 ReAct 代理进行工具调用`: 在带文件的聊天模式中启用 ReAct 代理进行工具调用。\n\n- `自动检索附加上下文`: 在每次查询时启用从向量存储中自动检索附加上下文的功能。\n\n**嵌入**\n\n- `嵌入提供者`: 全局嵌入提供者（用于索引和带文件的聊天）。\n\n- `嵌入提供者（ENV）`: 全局嵌入提供者的环境变量（API 密钥等）。\n\n- `嵌入提供者 (**kwargs)`: 全局嵌入提供者的关键字参数（model_name 等）。\n\n- `附件的默认嵌入提供者`: 定义附件中使用的嵌入模型提供者。\n\n- `嵌入 API 调用的 RPM 限制`: 指定每分钟的最大请求数（RPM），0 表示无限制。\n\n**索引**\n\n- `递归目录索引`: 启用递归目录索引，默认为关闭。\n\n- `重新索引时替换索引中的旧文档版本`: 如果启用，当最新版本的文档被索引时，索引中之前的版本将被删除，默认为开启。\n\n- `排除的文件扩展名`: 如果没有针对该扩展名的数据加载器，则排除的文件扩展名，用逗号分隔。\n\n- `强制排除文件`: 如果启用，即使该扩展名的数据加载器处于活动状态，排除列表也会生效。默认：关闭。\n\n- `发生错误时停止索引`: 如果启用，每当发生错误时索引就会停止。默认：开启。\n\n- `要追加或替换到已索引文档（文件）中的自定义元数据`: 为指定的文件扩展名定义自定义元数据键=>值字段，不同扩展名之间用逗号分隔。\\n允许的占位符：{path}、{relative_path}、{filename}、{dirname}、{relative_dir}、{ext}、{size}、{mtime}、{date}、{date_time}、{time}、{timestamp}。如果您希望将字段应用于所有文件，请使用 *（星号）作为扩展名。将值设置为空以从元数据中移除指定键的字段。\n\n- `要追加或替换到已索引文档（网页）中的自定义元数据`: 为指定的外部数据加载器定义自定义元数据键=>值字段。\\n允许的占位符：{date}、{date_time}、{time}、{timestamp} + {数据加载器参数}。\n\n**数据加载器**\n\n- `数据加载器的附加关键字参数 (**kwargs)`: 数据加载器的附加关键字参数，如设置、API 密钥等。这些参数将传递给加载器；请参阅 LlamaIndex 或 LlamaHub 加载器参考，以获取指定数据加载器允许的参数列表。\n\n- `在视频\u002F音频和图像（视觉）加载器中使用本地模型`: 启用在视频\u002F音频和图像（视觉）加载器中使用本地模型。如果禁用，则将使用 API 模型（GPT-4 Vision 和 Whisper）。注意：本地模型仅在 Python 版本中可用（非编译版\u002FSnap）。默认值：False。\n\n**更新**\n\n- `实时自动索引数据库`: 在指定模式下启用对话上下文的自动索引。\n\n- `用于自动索引的索引 ID`: 如果启用了对话上下文的自动索引，则使用的索引。\n\n- `在哪些模式下启用自动索引`: 启用上下文自动索引的模式列表，以逗号分隔。\n\n- `DB (ALL), DB (UPDATE), FILES (ALL)`: 对数据进行索引——此处提供批量索引功能。\n\n**代理与专家**\n\n**通用设置**\n\n- `从 RAG 自动检索额外上下文`: 如果提供了索引，则在开始时自动从 RAG 检索额外上下文。\n\n- `目标达成时显示托盘通知`: 如果启用，目标达成或运行结束后将显示通知。\n\n- `在聊天窗口中显示完整的代理输出`: 如果启用，代理推理的实时输出将与回复一起显示。\n\n**代理（LlamaIndex \u002F OpenAI）**\n\n- `最大步骤数（每轮）`: 目标达成前每一轮的最大步骤数。\n\n- `循环中的最大评估步骤`: 达成最终结果的最大评估步骤；设置为 0 表示无限。\n\n- `用于评估的模型`: 使用评分\u002F百分比进行评估的模型（循环）。未选择时，将使用当前活动模型。\n\n- `在下一次评估中附加并比较之前的评估提示`: 如果启用，将在循环中的下一次评估中检查之前的改进提示，默认值：False。\n\n- `拆分响应消息`: 在 OpenAI 代理模式下，将响应消息拆分为独立的上下文条目。\n\n**自主代理（旧版代理）**\n\n- `代理子模式`: 在代理（自主）模式下使用的子模式（聊天、LlamaIndex 等）。默认值：聊天。\n\n- `要使用的索引`: 仅当子模式为 LlamaIndex（带文件的聊天）时，在代理和专家模式中选择要使用的索引。\n\n- `使用原生 API 函数调用`: 使用 API 函数调用来运行插件中的工具，而不是使用命令提示符——仅限自主代理模式，默认值：False。\n\n- `在代理模式中使用 Responses API`: 在代理（自主）模式中使用 Responses API 代替 ChatCompletions API。仅适用于 OpenAI 模型。默认值：False。\n\n**专家**\n\n- `专家子模式`: 在专家模式下使用的子模式（聊天、LlamaIndex 等）。默认值：聊天。\n\n- `使用代理进行专家推理`: 如果启用，ReAct 代理将用于专家调用和专家推理。默认值：True。\n\n- `使用原生 API 函数调用`: 使用 API 函数调用来运行插件中的工具，而不是使用命令提示符——仅限专家，默认值：False。\n\n- `在专家模式（主模型）中使用 Responses API`: 在专家（主模型）中使用 Responses API 代替 ChatCompletions API。仅适用于 OpenAI 模型。默认值：False。\n\n- `在专家（从属模型）中使用 Responses API`: 在专家实例（从属模型）中使用 Responses API 代替 ChatCompletions API。仅适用于 OpenAI 模型。默认值：False。\n\n**辅助功能**\n\n- `启用语音控制（使用麦克风）`: 启用语音控制（使用麦克风和预定义命令）。\n\n- `模型`: 用于语音命令识别的模型。\n\n- `使用语音合成描述屏幕上的事件`: 启用对屏幕事件的音频描述。\n\n- `使用音频输出缓存`: 如果启用，所有静态音频输出将被缓存在磁盘上，而不是每次都重新生成。默认值：True。\n\n- `语音命令执行时发出音频提示`: 启用语音命令执行时的“滴”声提示。\n\n- `控制快捷键`: 为指定操作配置键盘快捷键。\n\n- `语音合成事件描述黑名单（忽略事件）`: “使用语音合成描述事件”选项中静音事件的列表。\n\n- `语音控制操作黑名单`: 禁用语音控制中的某些操作；将操作添加到黑名单以防止通过语音命令执行。\n\n**个性化**\n\n- `关于您`: 提供您的个人信息，例如“我叫……，30 岁，我对……感兴趣”。这些信息将包含在模型的系统提示中。**警告：** 请勿将 AI 视为“朋友”。现实生活中的友谊远胜于用 AI 取代友谊。切勿在与 AI 的互动中产生情感依恋。\n\n- `在哪些模式下启用`: 选择将使用个性化“关于您”提示的模式。\n\n**更新**\n\n- `启动时检查更新`: 启动时检查更新。默认值：True。\n\n- `后台检查更新`: 在后台检查更新（每 5 分钟检查一次）。默认值：True。\n\n**调试**\n\n- `显示调试菜单`: 启用调试（开发者）菜单。\n\n- `日志级别`: 切换日志级别（ERROR|WARNING|INFO|DEBUG）。\n\n- `记录和调试上下文`: 启用上下文输入\u002F输出的日志记录。\n\n- `记录和调试事件`: 启用事件分发的日志记录。\n\n- `将插件使用情况记录到控制台`: 启用将插件使用情况记录到控制台。\n\n- `将 DALL-E 使用情况记录到控制台`: 启用将 DALL-E 使用情况记录到控制台。\n\n- `将附件使用情况记录到控制台`: 启用将附件使用情况记录到控制台。\n\n- `将 Agents 使用情况记录到控制台`: 启用将 Agents 使用情况记录到控制台。\n\n- `将 LlamaIndex 使用情况记录到控制台`: 启用将 LlamaIndex 使用情况记录到控制台。\n\n- `将 Assistants 使用情况记录到控制台`: 启用将 Assistants API 使用情况记录到控制台。\n\n\n\n\n## JSON 文件\n\n配置存储在 JSON 文件中，便于在应用程序外部手动修改。\n这些配置文件位于用户的工作目录下的以下子目录中：\n\n``` ini\n{HOME_DIR}\u002F.config\u002Fpygpt-net\u002F\n```\n\n## 手动配置\n\n您可以在此目录中手动编辑配置文件（这是您的工作目录）：\n\n``` ini\n{HOME_DIR}\u002F.config\u002Fpygpt-net\u002F\n```\n\n- `assistants.json` - 存储助手列表。\n- `attachments.json` - 存储当前附件列表。\n- `config.json` - 存储主要配置设置。\n- `models.json` - 存储模型配置。\n- `cache` - 音频缓存目录。\n- `capture` - 用于存储相机拍摄的图片和截图的目录。\n- `css` - CSS 样式表目录（用户覆盖）。\n- `history` - 上下文历史记录目录，格式为 `.txt`。\n- `idx` - `LlamaIndex` 索引。\n- `img` - 存储由 `DALL-E 3` 和 `DALL-E 2` 生成的图片的目录，保存为 `.png` 文件。\n- `locale` - 区域设置目录（用户覆盖）。\n- `data` - 数据文件以及由模型下载或生成的文件的目录。\n- `presets` - 存储预设的 `.json` 文件的目录。\n- `upload` - 存储来自工作目录之外的附件本地副本的目录。\n- `db.sqlite` - 包含上下文、记事本和索引数据记录的数据库。\n- `app.log` - 错误和调试日志文件。\n\n---\n\n## 使用命令行参数设置工作目录\n\n要使用命令行参数设置当前工作目录，请执行以下命令：\n\n```\npython3 .\u002Frun.py --workdir=\"\u002Fpath\u002Fto\u002Fworkdir\"\n```\n或者，对于二进制版本：\n\n```\npygpt.exe --workdir=\"\u002Fpath\u002Fto\u002Fworkdir\"\n```\n\n\n## 翻译 \u002F 区域设置\n\n区域设置 `.ini` 文件位于应用程序目录中：\n\n``` ini\n.\u002Fdata\u002Flocale\n```\n\n应用程序启动时会自动扫描该目录。要添加新的翻译，请创建并保存相应名称的文件，例如：\n\n``` ini\nlocale.es.ini   \n```\n\n这将在应用程序的语言菜单中添加西班牙语作为可选语言。\n\n**用您自己的文件覆盖 CSS 和区域设置：**\n\n您也可以在用户目录中使用自己的文件覆盖 `locale` 和 `css` 应用程序目录中的文件。这样，只需在您的工作目录中创建文件，即可非常简单地覆盖语言文件或 CSS 样式。\n\n\n``` ini\n{HOME_DIR}\u002F.config\u002Fpygpt-net\u002F\n```\n\n- `locale` - 用于存放 `.ini` 格式的区域设置文件的目录。\n- `css` - 用于存放 `.css` 格式的 CSS 样式文件的目录。\n\n**添加您自己的字体**\n\n您可以添加自己的字体，并在 CSS 文件中使用它们。要加载您自己的字体，应将其放置在 `%workdir%\u002Ffonts` 目录中。支持的字体类型包括：`otf`、`ttf`。已加载字体的列表可在“调试\u002F配置”中查看。\n\n**示例：**\n\n```\n%workdir%\n|_css\n|_data\n|_fonts\n   |_MyFont\n     |_MyFont-Regular.ttf\n     |_MyFont-Bold.ttf\n     |...\n```\n\n```css\npre {{\n    font-family: 'MyFont';\n}}\n```\n\n## 数据加载器\n\n**配置数据加载器**\n\n在 `设置 -> LlamaIndex -> 数据加载器` 部分，您可以定义要传递给数据加载器实例的附加关键字参数。\n\n在大多数情况下，内部会使用 LlamaIndex 的内置加载器。您可以在以下位置查看这些基础加载器：\n\n文件：https:\u002F\u002Fgithub.com\u002Frun-llama\u002Fllama_index\u002Ftree\u002Fmain\u002Fllama-index-integrations\u002Freaders\u002Fllama-index-readers-file\u002Fllama_index\u002Freaders\u002Ffile\n\n网页：https:\u002F\u002Fgithub.com\u002Frun-llama\u002Fllama_index\u002Ftree\u002Fmain\u002Fllama-index-integrations\u002Freaders\u002Fllama-index-readers-web\n\n**提示：** 若要索引外部数据或网络上的数据，只需通过 `Web Search` 插件请求即可，例如，您可以对模型说“请索引这个 YouTube 视频：视频 URL”等。系统会自动选择适合指定内容的数据加载器。\n\n内置数据加载器（文件）允许的附加关键字参数：\n\n**CSV 文件** (file_csv)\n\n- `concat_rows` - 布尔值，默认为 `True`\n- `encoding` - 字符串，默认为 `utf-8`\n\n**HTML 文件** (file_html)\n\n- `tag` - 字符串，默认为 `section`\n- `ignore_no_id` - 布尔值，默认为 `False`\n\n**图像（视觉）** (file_image_vision)\n\n此加载器有两种模式：本地模型模式和 API 模式。如果启用本地模式，则将使用本地模型。本地模式需要应用程序的 Python\u002FPyPi 版本，在编译版或 Snap 版中不可用。如果选择 API 模式（默认），则将使用 OpenAI API 和标准的视觉模型。\n\n**注意：** 使用 API 模式会消耗 OpenAI API 中的额外 token（针对 `GPT-4 Vision` 模型）！\n\n本地模式需要安装 `torch`、`transformers`、`sentencepiece` 和 `Pillow`，并使用 `Salesforce\u002Fblip2-opt-2.7b` 模型来描述图像。\n\n- `keep_image` - 布尔值，默认为 `False`\n- `local_prompt` - 字符串，默认为 `问题：请描述你在这张图片中看到的内容。答案：`\n- `api_prompt` - 字符串，默认为 `描述你在这张图片中看到的内容` - 用于 API 的提示\n- `api_model` - 字符串，默认为 `gpt-4-vision-preview` - 在 API 中使用的模型\n- `api_tokens` - 整数，默认为 `1000` - API 中的最大输出 token 数量\n\n**IPYNB 笔记本文件** (file_ipynb)\n\n- `parser_config` - 字典，默认为 `None`\n- `concatenate` - 布尔值，默认为 `False`\n\n**Markdown 文件** (file_md)\n\n- `remove_hyperlinks` - 布尔值，默认为 `True`\n- `remove_images` - 布尔值，默认为 `True`\n\n**PDF 文档** (file_pdf)\n\n- `return_full_document` - 布尔值，默认为 `False`\n\n**视频\u002F音频** (file_video_audio)\n\n此加载器有两种模式：本地模型模式和 API 模式。如果启用本地模式，则将使用本地的 `Whisper` 模型。本地模式需要应用程序的 Python\u002FPyPi 版本，在编译版或 Snap 版中不可用。如果选择 API 模式（默认），则将使用 `Audio Input` 插件中当前选定的服务提供商。如果选择了 `OpenAI Whisper`，则将使用 OpenAI API 和 API 版的 Whisper 模型。\n\n**注意：** 通过 API 使用 Whisper 会消耗 OpenAI API 中的额外 token（针对 `Whisper` 模型）！\n\n本地模式需要安装 `torch` 和 `openai-whisper`，并在本地使用 `Whisper` 模型来转录视频和音频。\n\n- `model_version` - 字符串，默认为 `base` - 要使用的 Whisper 模型，可用模型列表：https:\u002F\u002Fgithub.com\u002Fopenai\u002Fwhisper\n\n**XML 文件** (file_xml)\n\n- `tree_level_split` - 整数，默认为 `0`\n\n内置数据加载器（Web 和外部内容）允许的附加关键字参数：\n\n**Bitbucket** (web_bitbucket)\n\n- `username` - 字符串，默认为 `None`\n- `api_key` - 字符串，默认为 `None`\n- `extensions_to_skip` - 列表，默认为 `[]`\n\n**ChatGPT 检索** (web_chatgpt_retrieval)\n\n- `endpoint_url` - 字符串，默认为 `None`\n- `bearer_token` - 字符串，默认为 `None`\n- `retries` - 整数，默认为 `None`\n- `batch_size` - 整数，默认为 `100`\n\n**Google 日历** (web_google_calendar)\n\n- `credentials_path` - 字符串，默认为 `credentials.json`\n- `token_path` - 字符串，默认为 `token.json`\n\n**Google 文档** (web_google_docs)\n\n- `credentials_path` - 字符串，默认为 `credentials.json`\n- `token_path` - 字符串，默认为 `token.json`\n\n**Google 云端硬盘** (web_google_drive)\n\n- `credentials_path` - 字符串，默认为 `credentials.json`\n- `token_path` - 字符串，默认为 `token.json`\n- `pydrive_creds_path` - 字符串，默认为 `creds.txt`\n- `client_config` - 字典，默认为 `{}`\n\n**Google Gmail** (web_google_gmail)\n\n- `credentials_path` - 字符串，默认为 `credentials.json`\n- `token_path` - 字符串，默认为 `token.json`\n- `use_iterative_parser` - 布尔值，默认为 `False`\n- `max_results` - 整数，默认为 `10`\n- `results_per_page` - 整数，默认为 `None`\n\n**Google Keep** (web_google_keep)\n\n- `credentials_path` - 字符串，默认为 `keep_credentials.json`\n\n**Google 表格** (web_google_sheets)\n\n- `credentials_path` - 字符串，默认为 `credentials.json`\n- `token_path` - 字符串，默认为 `token.json`\n\n**GitHub 问题** (web_github_issues)\n\n- `token` - 字符串，默认为 `None`\n- `verbose` - 布尔值，默认为 `False`\n\n**GitHub 仓库** (web_github_repository)\n\n- `token` - 字符串，默认为 `None`\n- `verbose` - 布尔值，默认为 `False`\n- `concurrent_requests` - 整数，默认为 `5`\n- `timeout` - 整数，默认为 `5`\n- `retries` - 整数，默认为 `0`\n- `filter_dirs_include` - 列表，默认为 `None`\n- `filter_dirs_exclude` - 列表，默认为 `None`\n- `filter_file_ext_include` - 列表，默认为 `None`\n- `filter_file_ext_exclude` - 列表，默认为 `None`\n\n**Microsoft OneDrive** (web_microsoft_onedrive)\n\n- `client_id` - 字符串，默认为 `None`\n- `client_secret` - 字符串，默认为 `None`\n- `tenant_id` - 字符串，默认为 `consumers`\n\n**站点地图（XML）** (web_sitemap)\n\n- `html_to_text` - 布尔值，默认为 `False`\n- `limit` - 整数，默认为 `10`\n\n**SQL 数据库** (web_database)\n\n- `uri` - 字符串，默认为 `None`\n\n您可以提供一个格式为 `{scheme}:\u002F\u002F{user}:{password}@{host}:{port}\u002F{dbname}` 的 URI，也可以手动填写每个字段：\n\n- `scheme` - 字符串，默认为 `None`\n- `host` - 字符串，默认为 `None`\n- `port` - 字符串，默认为 `None`\n- `user` - 字符串，默认为 `None`\n- `password` - 字符串，默认为 `None`\n- `dbname` - 字符串，默认为 `None`\n\n**Twitter\u002FX 帖子** (web_twitter)\n\n- `bearer_token` - 字符串，默认为 `None`\n- `num_tweets` - 整数，默认为 `100`\n\n## 向量存储\n\n**可用的向量存储**（由 `LlamaIndex` 提供）：\n\n```\n- ChromaVectorStore\n- ElasticsearchStore\n- PinecodeVectorStore\n- QdrantVectorStore\n- RedisVectorStore\n- SimpleVectorStore\n```\n\n您可以通过在“设置 -> LlamaIndex”窗口中提供如 `api_key` 等配置选项来配置所选的向量存储。\n\n在此处提供的参数（在“高级设置”中的列表：`Vector Store (**kwargs)`）将被传递给所选的向量存储提供商。您可以查看 LlamaIndex API 参考页面上所选提供商所需的关键词参数：\n\nhttps:\u002F\u002Fdocs.llamaindex.ai\u002Fen\u002Fstable\u002Fapi_reference\u002Fstorage\u002Fvector_store.html\n\n那么，哪些关键词参数会被传递给各个提供商呢？\n\n对于 `ChromaVectorStore` 和 `SimpleVectorStore`，所有参数均由 PyGPT 设置并内部传递（您无需进行任何配置）。\n\n对于其他提供商，您可以提供以下参数：\n\n**ElasticsearchStore**\n\nElasticsearchStore 的关键词参数 (`**kwargs`)：\n\n- `index_name`（默认：当前索引 ID，已设置，无需指定）\n- 列表中提供的任何其他关键词参数\n\n**PinecodeVectorStore**\n\nPinecone 的关键词参数 (`**kwargs`)：\n\n- `api_key`\n- `index_name`（默认：当前索引 ID，已设置，无需指定）\n\n**QdrantVectorStore**\n\nQdrantVectorStore 的关键词参数 (`**kwargs`)：\n\n- `url` - 字符串，默认值为 `http:\u002F\u002Flocalhost:6333`\n- `api_key` - 字符串，默认值为 `None`（用于 Qdrant Cloud）\n- `collection_name`（默认：当前索引 ID，已设置，无需指定）\n- 列表中提供的任何其他关键词参数\n\n**RedisVectorStore**\n\nRedisVectorStore 的关键词参数 (`**kwargs`)：\n\n- `index_name`（默认：当前索引 ID，已设置，无需指定）\n- 列表中提供的任何其他关键词参数\n\n您可以通过创建自定义提供商并在应用启动时注册它来扩展可用提供商列表。\n\n默认情况下，在使用“与文件聊天”功能时，您处于基于聊天的模式。如果您只想查询索引（而不进行聊天），可以启用“仅查询索引（无聊天）”选项。\n\n### 添加自定义向量存储和数据加载器\n\n您可以为您的数据创建自定义向量存储提供商或数据加载器，并为应用程序开发自定义启动程序。\n\n有关更多详细信息，请参阅“扩展 PyGPT \u002F 添加自定义向量存储提供商”部分。\n\n# 更新\n\n### 更新 PyGPT\n\n**PyGPT** 自带一个集成的更新通知系统。当发布包含新功能的新版本时，您将在应用内收到提醒。\n\n要获取新版本，只需下载并用它替换旧版本即可。您所有的自定义设置，如配置、预设、索引和过往对话，都将保留，并可在新版本中立即使用。\n\n# 调试与日志记录\n\n在“设置 -> 开发者”对话框中，您可以启用“显示调试菜单”选项以打开调试菜单。该菜单允许您检查应用程序各组件的状态。在调试菜单中，有一个“Logger”选项，可打开日志窗口。在该窗口中，程序的运行情况会实时显示。\n\n**日志级别**：\n\n默认情况下，所有错误和异常都会被记录到以下文件中：\n\n```ini\n{HOME_DIR}\u002F.config\u002Fpygpt-net\u002Fapp.log\n```\n\n要提高日志级别（默认为 `ERROR` 级别），请使用 `--debug` 参数运行应用程序：\n\n``` ini\npython3 run.py --debug=1\n```\n\n或者\n\n```ini\npython3 run.py --debug=2\n```\n\n值 `1` 会启用 `INFO` 日志级别。\n\n值 `2` 会启用 `DEBUG` 日志级别（记录最多的信息）。\n\n**兼容性（旧版）模式**\n\n如果您遇到 `WebEngine \u002F Chromium` 渲染器的问题，可以通过命令行参数强制启用旧版模式：\n\n``` ini\npython3 run.py --legacy=1\n```\n\n要强制禁用 OpenGL 硬件加速：\n\n``` ini\npython3 run.py --disable-gpu=1\n```\n\n您也可以手动启用旧版模式，方法是编辑配置文件：在编辑器中打开 `%WORKDIR%\u002Fconfig.json` 配置文件，并设置以下选项：\n\n``` json\n\"render.engine\": \"legacy\",\n\"render.open_gl\": false,\n```\n\n# 扩展 PyGPT\n\n## 快速入门\n\n您可以随时为 **PyGPT** 创建自己的扩展。\n\nPyGPT 可以通过以下方式扩展：\n\n- 自定义模型\n- 自定义插件\n- 自定义 LLM 包装器\n- 自定义向量存储提供商\n- 自定义数据加载器\n- 自定义音频输入提供商\n- 自定义音频输出提供商\n- 自定义网页搜索引擎提供商\n- 自定义智能体（LlamaIndex 或 OpenAI 智能体）\n\n**示例（教程文件）**\n\n请参阅本仓库中的 `examples` 目录，其中包含自定义启动程序、插件、向量存储、LLM（LlamaIndex）提供商和数据加载器的示例：\n\n- `examples\u002Fcustom_launcher.py`\n- `examples\u002Fexample_audio_input.py`\n- `examples\u002Fexample_audio_output.py`\n- `examples\u002Fexample_data_loader.py`\n- `examples\u002Fexample_llm.py`\n- `examples\u002Fexample_plugin.py`\n- `examples\u002Fexample_vector_store.py`\n- `examples\u002Fexample_web_search.py`\n\n这些示例文件可以用作您为 **PyGPT** 创建自定义扩展的起点。\n\n通过自定义插件、LLM 包装器和向量存储扩展 PyGPT：\n\n- 您可以将自定义插件实例、LLM 包装器和向量存储提供商传递给启动程序。\n- 这在您希望用自有的插件、向量存储和 LLM 扩展 PyGPT 时非常有用。\n  \n要注册自定义插件：\n\n- 将插件实例列表作为 `plugins` 关键词参数传递。\n\n要注册自定义 LLM 包装器：\n\n- 将 LLM 包装器实例列表作为 `llms` 关键词参数传递。\n\n要注册自定义向量存储提供商：\n\n- 将向量存储提供商实例列表作为 `vector_stores` 关键词参数传递。\n\n要注册自定义数据加载器：\n\n- 将数据加载器实例列表作为 `loaders` 关键词参数传递。\n\n要注册自定义音频输入提供商：\n\n- 将音频输入提供商实例列表作为 `audio_input` 关键词参数传递。\n\n要注册自定义音频输出提供商：\n\n- 将音频输出提供商实例列表作为 `audio_output` 关键词参数传递。\n\n要注册自定义网页提供商：\n\n- 将网页提供商实例列表作为 `web` 关键词参数传递。\n\n## 添加自定义模型\n\n要使用 OpenAI API 或 LlamaIndex 封装添加新模型，请在 `配置 -> 模型` 中使用编辑器，或手动编辑 `models.json` 文件，插入该模型的配置详情。如果您通过 LlamaIndex 添加模型，务必包含模型名称、其支持的模式（`chat`、`completion` 或两者兼备）、LLM 提供商（如 `OpenAI` 或 `HuggingFace`），以及对于基于外部 API 的模型，可选的 `API KEY` 和其他必要的环境设置。\n\n模型配置示例 - `%WORKDIR%\u002Fmodels.json`：\n\n```\n\"gpt-3.5-turbo\": {\n    \"id\": \"gpt-3.5-turbo\",\n    \"name\": \"gpt-3.5-turbo\",\n    \"mode\": [\n        \"chat\",\n        \"assistant\",\n        \"langchain\",\n        \"llama_index\"\n    ],\n    \"provider\": \"openai\"\n    \"llama_index\": {\n        \"args\": [\n            {\n                \"name\": \"model\",\n                \"value\": \"gpt-3.5-turbo\",\n                \"type\": \"str\"\n            }\n        ],\n        \"env\": [\n            {\n                \"name\": \"OPENAI_API_KEY\",\n                \"value\": \"{api_key}\"\n            }\n        ]\n    },\n    \"ctx\": 4096,\n    \"tokens\": 4096,\n    \"default\": false\n},\n```\n\n内置支持以下 LLM 提供商：\n\n- Anthropic\n- Azure OpenAI\n- Deepseek API\n- Google\n- HuggingFace\n- 本地模型（兼容 OpenAI API）\n- Ollama\n- OpenAI\n- OpenRouter\n- Perplexity\n- xAI\n\n**提示**：`models.json` 中的 `{api_key}` 是设置中主 OpenAI API 密钥的占位符，它将被配置的实际密钥值替换。\n\n## 添加自定义插件\n\n### 创建您自己的插件\n\n您可以为 **PyGPT** 创建自己的插件。插件可以用 Python 编写，然后在应用程序启动前注册到应用中。所有随应用附带的插件都存储在 `plugin` 目录中，您可以将其用作编写您自己的插件的代码示例。\n\n**示例（教程文件）**\n\n请参阅此 `examples` 目录中的示例插件：\n\n- `examples\u002Fexample_plugin.py`\n\n这些示例文件可用作创建您自己的 **PyGPT** 插件的起点。\n\n要注册自定义插件：\n\n- 创建一个应用程序的自定义启动脚本。\n- 将自定义插件实例列表作为 `plugins` 关键字参数传递。\n\n**自定义启动脚本示例：**\n\n```python\n# custom_launcher.py\n\nfrom pygpt_net.app import run\nfrom plugins import CustomPlugin, OtherCustomPlugin\nfrom llms import CustomLLM\nfrom vector_stores import CustomVectorStore\n\nplugins = [\n    CustomPlugin(),\n    OtherCustomPlugin(),\n]\nllms = [\n    CustomLLM(),\n]\nvector_stores = [\n    CustomVectorStore(),\n]\n\nrun(\n    plugins=plugins,\n    llms=llms,\n    vector_stores=vector_stores,\n)\n```\n\n### 处理事件\n\n在插件中，您可以接收并修改分发的事件。为此，创建一个名为 `handle(self, event, *args, **kwargs)` 的方法，并按如下方式处理接收到的事件：\n\n```python\n# custom_plugin.py\n\nfrom pygpt_net.core.events import Event\n\n\ndef handle(self, event: Event, *args, **kwargs):\n    \"\"\"\n    处理分发的事件\n\n    :param event: 事件对象\n    \"\"\"\n    name = event.name\n    data = event.data\n    ctx = event.ctx\n\n    if name == Event.INPUT_BEFORE:\n        self.some_method(data['value'])\n    elif name == Event.CTX_BEGIN:\n        self.some_other_method(ctx)\n    else:\n        # ...\n```\n\n### 事件列表\n\n事件名称在 `pygpt_net.core.events` 模块的 `Event` 类中定义。\n\n语法：`事件名称` - 触发时机，`事件数据` *(数据类型)*：\n\n- `AI_NAME` - 在准备 AI 名称时触发，`data['value']` *(字符串，AI 助手的名称)*\n\n- `AGENT_PROMPT` - 在评估模式下生成代理提示时触发，`data['value']` *(字符串，提示文本)*\n\n- `AUDIO_INPUT_RECORD_START` - 开始录音输入\n\n- `AUDIO_INPUT_RECORD_STOP` - 停止录音输入\n\n- `AUDIO_INPUT_RECORD_TOGGLE` - 切换录音输入状态\n\n- `AUDIO_INPUT_TRANSCRIBE` - 在转录音频文件时触发，`data['path']` *(字符串，音频文件路径)*\n\n- `AUDIO_INPUT_STOP` - 强制停止音频输入\n\n- `AUDIO_INPUT_TOGGLE` - 当语音输入被启用或禁用时触发，`data['value']` *(布尔值，True\u002FFalse)*\n\n- `AUDIO_OUTPUT_STOP` - 强制停止音频输出\n\n- `AUDIO_OUTPUT_TOGGLE` - 当语音输出被启用或禁用时触发，`data['value']` *(布尔值，True\u002FFalse)*\n\n- `AUDIO_READ_TEXT` - 使用语音合成朗读文本时触发，`data['text']` *(字符串，待朗读的文本)*\n\n- `CMD_EXECUTE` - 当执行命令时触发，`data['commands']` *(列表，包含命令及其参数)*\n\n- `CMD_INLINE` - 当执行内联命令时触发，`data['commands']` *(列表，包含命令及其参数)*\n\n- `CMD_SYNTAX` - 在添加命令语法时触发，`data['prompt'], data['syntax']` *(字符串、列表，提示语和命令使用语法的列表)*\n\n- `CMD_SYNTAX_INLINE` - 在内联模式下添加命令语法时触发，`data['prompt'], data['syntax']` *(字符串、列表，提示语和命令使用语法的列表)*\n\n- `CTX_AFTER` - 上下文项发送之后触发，`ctx`\n\n- `CTX_BEFORE` - 上下文项发送之前触发，`ctx`\n\n- `CTX_BEGIN` - 创建上下文项时触发，`ctx`\n\n- `CTX_END` - 处理上下文项结束时触发，`ctx`\n\n- `CTX_SELECT` - 在从列表中选择上下文时触发，`data['value']` *(整数，上下文元 ID)*\n\n- `DISABLE` - 当插件被禁用时触发，`data['value']` *(字符串，插件 ID)*\n\n- `ENABLE` - 当插件被启用时触发，`data['value']` *(字符串，插件 ID)*\n\n- `FORCE_STOP` - 当强制停止插件时触发\n\n- `INPUT_BEFORE` - 从文本框接收到输入时触发，`data['value']` *(字符串，待发送的文本)*\n\n- `MODE_BEFORE` - 在选择模式之前触发，`data['value'], data['prompt']` *(字符串，模式 ID 和提示语)*\n\n- `MODE_SELECT` - 选择模式时触发，`data['value']` *(字符串，模式 ID)*\n\n- `MODEL_BEFORE` - 在选择模型之前触发，`data['value']` *(字符串，模型 ID)*\n\n- `MODEL_SELECT` - 选择模型时触发，`data['value']` *(字符串，模型 ID)*\n\n- `PLUGIN_SETTINGS_CHANGED` - 当插件设置更新时触发（保存设置）\n\n- `PLUGIN_OPTION_GET` - 请求插件选项值时触发，`data['name'], data['value']` *(字符串，任意，请求的选项名称及当前值)*\n\n- `POST_PROMPT` - 准备系统提示之后触发，`data['value']` *(字符串，系统提示)*\n\n- `POST_PROMPT_ASYNC` - 准备系统提示之后，在异步线程发起请求之前触发，`data['value']` *(字符串，系统提示)*\n\n- `POST_PROMPT_END` - 准备系统提示之后，在异步线程发起请求之前，在最后阶段触发，`data['value']` *(字符串，系统提示)*\n\n- `PRE_PROMPT` - 准备系统提示之前触发，`data['value']` *(字符串，系统提示)*\n\n- `SYSTEM_PROMPT` - 准备系统提示时触发，`data['value']` *(字符串，系统提示)*\n\n- `TOOL_OUTPUT_RENDER` - 当渲染来自插件工具的额外内容时触发，`data['content']` *(字符串，内容)*\n\n- `UI_ATTACHMENTS` - 当附件上传组件渲染时触发，`data['value']` *(布尔值，显示 True\u002FFalse)*\n\n- `UI_VISION` - 当视觉相关组件渲染时触发，`data['value']` *(布尔值，显示 True\u002FFalse)*\n\n- `USER_NAME` - 准备用户名称时触发，`data['value']` *(字符串，用户名)*\n\n- `USER_SEND` - 在输入文本发送之前触发，`data['value']` *(字符串，输入文本)*\n\n\n您可以通过将 `stop` 设置为 `True` 来随时阻止接收到的事件继续传播：\n\n```\nevent.stop = True\n```\n\n可以通过启用“配置 -> 设置 -> 开发者 -> 记录并调试事件”选项来调试事件流。\n\n## 添加自定义 LLM 提供商\n\n通过 LlamaIndex 处理 LLM 的方式是通过独立的封装器实现。这使得可以轻松添加对任何可通过 LlamaIndex 使用的提供商和模型的支持。所有内置的模型及其提供商的封装器都位于 `pygpt_net.provider.llms` 中。\n\n这些封装器会在应用启动时通过 `launcher.add_llm()` 方法加载到应用中：\n\n```python\n# app.py\n\nfrom pygpt_net.provider.api.openai import OpenAILLM\nfrom pygpt_net.provider.llms.azure_openai import AzureOpenAILLM\nfrom pygpt_net.provider.llms.anthropic import AnthropicLLM\nfrom pygpt_net.provider.llms.hugging_face import HuggingFaceLLM\nfrom pygpt_net.provider.llms.ollama import OllamaLLM\nfrom pygpt_net.provider.llms.google import GoogleLLM\n\n\ndef run(**kwargs):\n    \"\"\"运行应用程序\"\"\"\n    # 初始化应用\n    launcher = Launcher()\n    launcher.init()\n\n    # 注册插件\n    ...\n\n    # 注册 Langchain 和 LlamaIndex 的 LLM 封装器\n    launcher.add_llm(OpenAILLM())\n    launcher.add_llm(AzureOpenAILLM())\n    launcher.add_llm(AnthropicLLM())\n    launcher.add_llm(HuggingFaceLLM())\n    launcher.add_llm(OllamaLLM())\n    launcher.add_llm(GoogleLLM())\n\n    # 启动应用\n    launcher.run()\n```\n\n要添加默认未包含的提供商支持，您可以创建自己的封装器，向应用返回自定义模型，然后将此自定义封装器传递给启动器。\n\n使用自定义插件和 LLM 封装器扩展 **PyGPT** 非常简单：\n\n- 直接将自定义插件和 LLM 封装器的实例传递给启动器。\n\n注册自定义 LLM 封装器的方法：\n\n- 将 LLM 封装器实例的列表作为 `llms` 关键字参数提供。\n\n**示例：**\n\n\n```python\n\n# launcher.py\n\nfrom pygpt_net.app import run\nfrom plugins import CustomPlugin, OtherCustomPlugin\nfrom llms import CustomLLM\n\nplugins = [\n    CustomPlugin(),\n    OtherCustomPlugin(),\n]\nllms = [\n    CustomLLM(),  # \u003C--- 自定义大模型提供商（封装器）\n]\nvector_stores = []\n\nrun(\n    plugins=plugins, \n    llms=llms, \n    vector_stores=vector_stores,\n)\n```\n\n**示例（教程文件）**\n\n请参阅本仓库中的 `examples` 目录，其中包含自定义启动器、插件、向量存储、大模型提供商和数据加载器的示例：\n\n- `examples\u002Fcustom_launcher.py`\n\n- `examples\u002Fexample_audio_input.py`\n\n- `examples\u002Fexample_audio_output.py`\n\n- `examples\u002Fexample_data_loader.py`\n\n- `examples\u002Fexample_llm.py`  \u003C-- 可用作示例\n\n- `examples\u002Fexample_plugin.py`\n\n- `examples\u002Fexample_vector_store.py`\n\n- `examples\u002Fexample_web_search.py`\n\n这些示例文件可以作为您为 **PyGPT** 创建自定义扩展的起点。\n\n要将您自己的模型或提供商集成到 **PyGPT** 中，您还可以参考位于 `pygpt_net.provider.llms` 中的类。这些示例可作为您自定义类的更复杂示例。请确保您的自定义封装类包含两个必要方法：`chat` 和 `completion`。这两个方法应返回模型在“聊天”和“完成”模式下运行所需的相应对象。\n\n每个大模型提供商（封装器）都继承自 `BaseLLM` 类，并且可以提供两个组件：用于 LlamaIndex 的提供商，以及用于嵌入的提供商。\n\n\n## 添加自定义向量存储提供商\n\n您可以为自己的数据创建自定义向量存储提供商或数据加载器，并为应用程序开发自定义启动器。要注册您的自定义向量存储提供商或数据加载器，只需将其实例传递给 `vector_stores` 关键字参数，或将加载器实例传递给 `loaders` 关键字参数即可：\n\n\n```python\n# app.py\n\n# 向量存储\nfrom pygpt_net.provider.vector_stores.chroma import ChromaProvider\nfrom pygpt_net.provider.vector_stores.elasticsearch import ElasticsearchProvider\nfrom pygpt_net.provider.vector_stores.pinecode import PinecodeProvider\nfrom pygpt_net.provider.vector_stores.qdrant import QdrantProvider\nfrom pygpt_net.provider.vector_stores.redis import RedisProvider\nfrom pygpt_net.provider.vector_stores.simple import SimpleProvider\n\ndef run(**kwargs):\n    # ...\n    # 注册基础向量存储提供商（LlamaIndex）\n    launcher.add_vector_store(ChromaProvider())\n    launcher.add_vector_store(ElasticsearchProvider())\n    launcher.add_vector_store(PinecodeProvider())\n    launcher.add_vector_store(QdrantProvider())\n    launcher.add_vector_store(RedisProvider())\n    launcher.add_vector_store(SimpleProvider())\n\n    # 注册自定义向量存储提供商（LlamaIndex）\n    vector_stores = kwargs.get('vector_stores', None)\n    if isinstance(vector_stores, list):\n        for store in vector_stores:\n            launcher.add_vector_store(store)\n\n    # ...\n```\n\n要注册您的自定义向量存储提供商，只需将其实例传递给 `vector_stores` 关键字参数即可：\n\n```python\n\n# custom_launcher.py\n\nfrom pygpt_net.app import run\nfrom plugins import CustomPlugin, OtherCustomPlugin\nfrom llms import CustomLLM\nfrom vector_stores import CustomVectorStore\n\nplugins = [\n    CustomPlugin(),\n    OtherCustomPlugin(),\n]\nllms = [\n    CustomLLM(),\n]\nvector_stores = [\n    CustomVectorStore(),  # \u003C--- 自定义向量存储提供商\n]\n\nrun(\n    plugins=plugins,\n    llms=llms,\n    vector_stores=vector_stores,\n)\n```\n\n向量存储提供商必须是 `pygpt_net.provider.vector_stores.base.BaseStore` 的实例。\n您可以查看 `pygpt_net.provider.vector_stores` 中内置提供商的代码，并在创建自定义提供商时将其作为示例。\n\n### 添加自定义数据加载器\n\n\n```python\n\n# custom_launcher.py\n\nfrom pygpt_net.app import run\nfrom plugins import CustomPlugin, OtherCustomPlugin\nfrom llms import CustomLLM\nfrom vector_stores import CustomVectorStore\nfrom loaders import CustomLoader\n\nplugins = [\n    CustomPlugin(),\n    OtherCustomPlugin(),\n]\nllms = [\n    CustomLLM(),\n]\nvector_stores = [\n    CustomVectorStore(),\n]\nloaders = [\n    CustomLoader(),  # \u003C---- 自定义数据加载器\n]\n\nrun(\n    plugins=plugins,\n    llms=llms,\n    vector_stores=vector_stores,  # \u003C--- 包含自定义向量存储提供商的列表\n    loaders=loaders  # \u003C--- 包含自定义数据加载器的列表\n)\n```\n\n数据加载器必须是 `pygpt_net.provider.loaders.base.BaseLoader` 的实例。\n您可以查看 `pygpt_net.provider.loaders` 中内置加载器的代码，并在创建自定义加载器时将其用作示例。\n\n\n# 免责声明\n\n本应用与 OpenAI 并无官方关联。作者对因使用本应用而产生的任何损害不承担任何责任。本应用按“原样”提供，不附带任何形式的保证。\n提醒用户注意令牌使用情况——务必在 API 官网核实模型所使用的令牌数量，并以负责任的方式使用本应用。启用诸如网络搜索之类的插件可能会消耗额外的令牌，而这些令牌不会显示在主界面上。\n\n**请始终在 OpenAI、Google、Anthropic、xAI 等网站上监控您的实际令牌使用情况。**\n\n---\n\n# 更改日志\n\n## 最近的更新：\n\n**2.7.12（2026-02-06）**\n\n- xAI SDK 升级至 v1.6.1。\n- 在 xAI 提供商中新增视频生成支持。\n- 新增模型：grok-imagine-image 和 grok-imagine-video。\n- UI 改进，标签页和列之间的导航更加流畅。\n- 优化了附件添加功能。新增仅在 API 端存储消息时才附加一次附件的选项，以及强制仅附加一次附件的选项。\n- 更新了库文件。\n\n**2.7.11（2026-02-05）**\n\n- 改进了列之间的焦点处理。\n- 修复了模型调试器更新问题。\n\n**2.7.10（2026-02-03）**\n\n- 修复了在创建新预设时头像可能被覆盖的问题。\n- 修复了在第二列打开新标签页时未创建新上下文的问题。\n- 在输入框中新增提示历史导航功能（Ctrl + 上\u002F下箭头键）。\n- 在加载图片查看器时，新增初始图像居中显示功能。\n- 在记事本小部件中新增标记\u002F取消标记功能。\n- 新增 18 种语言：阿拉伯语 (ar)、保加利亚语 (bg)、捷克语 (cs)、丹麦语 (da)、芬兰语 (fi)、希伯来语 (he)、印地语 (hi)、匈牙利语 (hu)、日语 (ja)、韩语 (ko)、荷兰语 (nl)、挪威语 (no)、葡萄牙语 (pt)、罗马尼亚语 (ro)、俄语 (ru)、斯洛伐克语 (sk)、瑞典语 (sv)、土耳其语 (tr)。\n\n**2.7.9（2026-01-08）**\n\n- 改进了实时音频模式。\n- 在实时音频模式中新增 xAI 提供商及 Grok 支持。\n\n**2.7.8（2026-01-06）**\n\n- 新增 xAI Collections 远程工具，并将集合管理集成到远程向量存储工具中。\n- 将远程向量存储工具统一为适用于所有提供商的单一工具。\n- 新增 xAI Grok 音频输入输出提供商（Grok 的实时音频支持即将推出！）。\n- 增强了图片查看器工具，增加了包含上一张\u002F下一张等更多选项的工具箱。\n\n**2.7.7（2026-01-05）**\n\n- 在 xAI 中新增 Responses API 支持。\n- 新增 xAI 远程工具：远程 MCP、代码执行。\n- 新增 Anthropic 远程工具：远程 MCP、网页抓取、代码执行。\n\n**2.7.6（2026-01-03）**\n\n- 修复了与 xAI SDK 的兼容性问题，并解决了 Grok 模型返回空响应的问题。\n- 修复了 Snap 软件包中缺失的库文件。\n- 在图片查看器中新增缩放和拖动功能。\n- 在文本区域和网页小部件中新增缩放菜单。\n- 新增使用鼠标中键点击关闭标签页的功能。\n\n# 致谢与链接\n\n**官方网站：** \u003Chttps:\u002F\u002Fpygpt.net>\n\n**文档：** \u003Chttps:\u002F\u002Fpygpt.readthedocs.io>\n\n**支持与捐赠：** \u003Chttps:\u002F\u002Fpygpt.net\u002F#donate>\n\n**GitHub：** \u003Chttps:\u002F\u002Fgithub.com\u002Fszczyglis-dev\u002Fpy-gpt>\n\n**Discord：** \u003Chttps:\u002F\u002Fpygpt.net\u002Fdiscord>\n\n**Snap 商店：** \u003Chttps:\u002F\u002Fsnapcraft.io\u002Fpygpt>\n\n**微软商店：** \u003Chttps:\u002F\u002Fapps.microsoft.com\u002Fdetail\u002FXP99R4MX3X65VQ>\n\n**PyPI：** \u003Chttps:\u002F\u002Fpypi.org\u002Fproject\u002Fpygpt-net>\n\n**作者：** Marcin Szczygliński（波兰，欧盟）\n\n**联系方式：** \u003Cinfo@pygpt.net>\n\n**许可证：** MIT 许可证\n\n# 特别感谢\n\nGitHub 社区：\n\n- [@BillionShields](https:\u002F\u002Fgithub.com\u002FBillionShields)\n\n- [@gfsysa](https:\u002F\u002Fgithub.com\u002Fgfsysa)\n\n- [@glinkot](https:\u002F\u002Fgithub.com\u002Fglinkot)\n\n- [@kaneda2004](https:\u002F\u002Fgithub.com\u002Fkaneda2004)\n\n- [@KingOfTheCastle](https:\u002F\u002Fgithub.com\u002FKingOfTheCastle)\n\n- [@linnflux](https:\u002F\u002Fgithub.com\u002Flinnflux)\n\n- [@lukasz-pekala](https:\u002F\u002Fgithub.com\u002Flukasz-pekala)\n\n- [@moritz-t-w](https:\u002F\u002Fgithub.com\u002Fmoritz-t-w)\n\n- [@oleksii-honchar](https:\u002F\u002Fgithub.com\u002Foleksii-honchar)\n\n- [@yf007](https:\u002F\u002Fgithub.com\u002Fyf007)\n\n## 第三方库\n\n本项目所使用的外部库完整列表位于仓库主目录下的 [requirements.txt](https:\u002F\u002Fgithub.com\u002Fszczyglis-dev\u002Fpy-gpt\u002Fblob\u002Fmaster\u002Frequirements.txt) 文件中。\n\n所有使用的 SVG 图标均来自 Google 提供的 `Material Design Icons`：\n\nhttps:\u002F\u002Fgithub.com\u002Fgoogle\u002Fmaterial-design-icons\n\nhttps:\u002F\u002Ffonts.google.com\u002Ficons\n\nMonaspace 字体由 GitHub 提供：https:\u002F\u002Fgithub.com\u002Fgithubnext\u002Fmonaspace\n\n集成到应用中的 LlamaIndex 离线加载器代码来源于 LlamaHub：https:\u002F\u002Fllamahub.ai\n\nAwesome ChatGPT Prompts（用于模板中）：https:\u002F\u002Fgithub.com\u002Ff\u002Fawesome-chatgpt-prompts\u002F\n\n代码语法高亮由：https:\u002F\u002Fhighlightjs.org 提供\n\nMarkdown 解析由：https:\u002F\u002Fgithub.com\u002Fmarkdown-it\u002Fmarkdown-it 提供\n\nLaTeX 支持由：https:\u002F\u002Fkatex.org 提供\n\nPlaywright：https:\u002F\u002Fplaywright.dev\u002F","# PyGPT 快速上手指南\n\nPyGPT 是一款功能强大的开源桌面 AI 助手，支持本地运行。它不仅能连接 OpenAI (GPT-4\u002F5, o1 等)、Google Gemini、Anthropic Claude 等云端大模型，还能通过 Ollama 或 LlamaIndex 调用本地模型（如 Llama 3, DeepSeek, Mistral）。具备文件对话、代码执行、图像生成、语音交互及插件扩展等全功能模式。\n\n## 环境准备\n\n### 系统要求\n*   **操作系统**: Linux (GLIBC >= 2.35), Windows 10\u002F11, macOS。\n*   **Python 版本**: `>=3.10` 且 `\u003C3.14` (源码安装必需)。\n*   **硬件架构**: 64-bit。\n\n### 前置依赖\n*   **API Key**: 使用云端模型需准备对应的 API Key (如 OpenAI, Google 等)；使用本地模型 (Ollama) 无需 Key。\n*   **Linux 额外依赖** (若遇到图形界面或音频问题):\n    ```bash\n    sudo apt install libxcb-cursor0 portaudio19-dev libasound2-plugins\n    ```\n\n## 安装步骤\n\n您可以根据系统选择以下任意一种方式安装：\n\n### 方式一：直接下载二进制包 (推荐 Windows\u002FLinux 用户)\n访问官网下载页面获取编译好的版本：\n*   下载地址：https:\u002F\u002Fpygpt.net\u002F#download\n*   **Windows**: 下载 `.msi` 安装包直接安装。\n*   **Linux**: 下载 `.zip` 或 `AppImage`。\n    *   若使用 AppImage，需赋予执行权限：\n        ```bash\n        chmod +x .\u002FPyGPT-X.X.X-x86_64.AppImage\n        .\u002FPyGPT-X.X.X-x86_64.AppImage\n        ```\n\n### 方式二：通过 Snap 安装 (Linux)\n```bash\nsudo snap install pygpt\n```\n*注意：如需使用摄像头、麦克风或 Docker 功能，需额外授权：*\n```bash\nsudo snap connect pygpt:camera\nsudo snap connect pygpt:audio-record :audio-record\nsudo snap connect pygpt:audio-playback\nsudo snap connect pygpt:docker docker:docker-daemon\n```\n\n### 方式三：通过 Pip 安装 (跨平台)\n建议使用虚拟环境以避免依赖冲突。\n\n1. 创建并激活虚拟环境：\n   ```bash\n   python3 -m venv venv\n   source venv\u002Fbin\u002Factivate  # Windows 用户使用: venv\\Scripts\\activate\n   ```\n\n2. 安装 PyGPT：\n   ```bash\n   pip install pygpt-net\n   # 国内用户可使用清华源加速：\n   # pip install pygpt-net -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n   ```\n\n3. 启动应用：\n   ```bash\n   pygpt\n   ```\n\n### 方式四：从源码运行 (开发者模式)\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Fszczyglis-dev\u002Fpy-gpt.git\ncd py-gpt\npython3 -m venv venv\nsource venv\u002Fbin\u002Factivate\npip install -r requirements.txt\npython3 run.py\n```\n\n## 基本使用\n\n1.  **启动应用**\n    安装完成后，在终端输入 `pygpt` (或通过桌面快捷方式) 启动图形界面。\n\n2.  **配置 API Key**\n    *   首次启动后，进入设置界面 (Settings)。\n    *   在 **Models** 或 **API Keys** 选项卡中，填入您的 OpenAI API Key 或其他服务商 Key。\n    *   *提示：若使用本地模型，请确保本地服务 (如 Ollama) 已启动，并在设置中选择对应的 Local 模型提供商。*\n\n3.  **开始对话 (Chat 模式)**\n    *   在主界面选择 **Chat** 模式。\n    *   在下方的输入框中输入指令，例如：\n        ```text\n        请用 Python 写一个计算斐波那契数列的函数，并解释其原理。\n        ```\n    *   点击发送，AI 将生成回复。\n\n4.  **高级功能示例**\n    *   **文件对话**: 切换到 **Chat with Files** 模式，上传 PDF\u002FTXT 文档，即可针对文档内容进行提问。\n    *   **代码执行**: 启用 **Code Interpreter** 插件，让 AI 直接编写并运行 Python 代码处理数据或绘图。\n    *   **图像生成**: 切换到 **Image** 模式，输入描述词即可调用 DALL-E 或其他模型生成图片。\n\n5.  **管理上下文**\n    应用自动保存对话历史。您可以随时在左侧栏加载之前的会话，或利用“日历”功能查找特定日期的对话记录。","数据分析师小林需要快速从本地数百页的行业报告 PDF 中提取关键数据，编写 Python 脚本进行可视化分析，并生成带语音解说的工作汇报视频。\n\n### 没有 py-gpt 时\n- **工具切换繁琐**：需要在浏览器聊天窗口、本地代码编辑器和独立的绘图软件之间反复复制粘贴，上下文极易断裂。\n- **文件处理受限**：网页版 AI 无法直接读取本地大量私有 PDF 文档，手动整理数据耗时耗力且容易出错。\n- **自动化能力弱**：生成的代码只能手动运行，无法让 AI 直接调用本地系统命令完成文件整理或环境配置。\n- **多模态缺失**：生成图表后，还需单独使用其他工具制作语音解说和视频，工作流被割裂成多个孤立环节。\n\n### 使用 py-gpt 后\n- **一站式桌面交互**：py-gpt 作为本地桌面助手，让小林在同一个界面内完成对话、编码、运行和文件管理，无需切换应用。\n- **本地文件深度对话**：利用内置的 LlamaIndex 功能，py-gpt 直接索引并“阅读”本地数百页 PDF，精准提取数据供分析使用。\n- **自主执行与代理**：通过 Agents 模式，py-gpt 不仅能写出分析代码，还能直接在本地环境中运行脚本、安装依赖库并生成图表文件。\n- **全流程多模态输出**：py-gpt 串联了图像生成、TTS 语音合成及视频制作插件，一键将分析结果转化为带解说的汇报视频。\n\npy-gpt 将割裂的 AI 工具链整合为本地化的自主智能工作流，极大提升了复杂任务的端到端交付效率。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fszczyglis-dev_py-gpt_af3a5005.png","szczyglis-dev","Marcin Szczygliński","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fszczyglis-dev_b94ca89e.png","Python\u002FPHP\u002FJS Developer",null,"Poland","https:\u002F\u002Fszczyglis.dev","https:\u002F\u002Fgithub.com\u002Fszczyglis-dev",[83,87,91,95,99],{"name":84,"color":85,"percentage":86},"Python","#3572A5",74.9,{"name":88,"color":89,"percentage":90},"JavaScript","#f1e05a",24.1,{"name":92,"color":93,"percentage":94},"CSS","#663399",0.9,{"name":96,"color":97,"percentage":98},"Shell","#89e051",0,{"name":100,"color":101,"percentage":98},"Batchfile","#C1F12E",1734,320,"2026-04-18T23:31:32","NOASSERTION","Linux, Windows, macOS","未说明（支持本地模型如 Ollama，具体 GPU 需求取决于所选本地模型；云端 API 模式无本地 GPU 要求）","未说明",{"notes":110,"python":111,"dependencies":112},"1. Linux 二进制版本要求 GLIBC >= 2.35。2. macOS 用户无法使用预编译二进制包，必须通过 PyPi 或源代码运行。3. 若使用 Snap 安装版，需手动授权摄像头、麦克风及 Docker 权限才能使用相应功能。4. 主要作为桌面客户端连接外部 API（如 OpenAI, Google, Anthropic 等）或本地服务（如 Ollama），自身不捆绑大型模型权重。5. 遇到 Linux 图形界面问题时需安装 libxcb 相关库，音频问题需安装 portaudio 或 alsa 相关库。",">=3.10, \u003C3.14",[113,114,115,116,117,118,119,120],"PySide6 (隐含于 Qt 界面)","LlamaIndex","OpenAI SDK","PyInstaller (可选，用于编译)","Poetry (可选，用于依赖管理)","libxcb-cursor0 (Linux 图形依赖)","portaudio19-dev (Linux 音频依赖)","libasound2 (Linux 音频依赖)",[14,15,13,35],[123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142],"artificial-intelligence","chatbot","desktop-app","openai","ai","ai-assistant","llm","autonomous-agent","llama-index","gpt-4","claude","gemini","o1","ollama","deepseek","perplexity","grok","gpt-5","mcp","sora2","2026-03-27T02:49:30.150509","2026-04-19T15:38:04.329550",[146,151,156,161,166,171],{"id":147,"question_zh":148,"answer_zh":149,"source_url":150},42662,"如何在 Mac M1 (Apple Silicon) 上解决安装依赖缺失和模块找不到的问题？","在 Mac M1 上运行该项目时，可能会遇到 `ModuleNotFoundError`。建议尝试以下步骤：\n1. 完全卸载现有版本：`pip uninstall pygpt-net`\n2. 创建一个特定 Python 版本（如 3.8.11 或 3.11）的虚拟环境。\n3. 强制安装特定版本：`pip install pygpt-net==2.0.146`\n4. 如果仍然缺少模块（如 llama-index 相关），可能需要手动安装缺失的包，例如：`pip install llama-index-indices-managed-llama-cloud`。\n注意：这通常是由于当前操作系统环境下的依赖项兼容性问题导致的。","https:\u002F\u002Fgithub.com\u002Fszczyglis-dev\u002Fpy-gpt\u002Fissues\u002F16",{"id":152,"question_zh":153,"answer_zh":154,"source_url":155},42663,"是否支持代码高亮显示以及流式输出时的实时格式化？","是的，从版本 2.1.61 开始，已添加流模式（stream mode）下文本块的实时 HTML 格式化功能，包括语法高亮。\n- 版本 2.1.61 (2024-04-21)：添加了流模式下文本块的实时 HTML 格式化。\n- 版本 2.1.62 (2024-04-22)：如果未提供语言类型，添加了默认的代码语法高亮解析器（lexer）。\n现在代码不仅会在代码框中显示，还支持实时高亮和一键复制功能。","https:\u002F\u002Fgithub.com\u002Fszczyglis-dev\u002Fpy-gpt\u002Fissues\u002F43",{"id":157,"question_zh":158,"answer_zh":159,"source_url":160},42664,"如何使用 PyGPT 连接本地的 PrivateGPT 或其他兼容 OpenAI API 的本地模型而不需要 OpenAI 密钥？","默认情况下，插件（如 DALL-E）可能会向提示词中添加指令，导致即使使用本地模型也需要验证。解决方法如下：\n1. 默认情况下，现已关闭会自动添加指令的插件。\n2. 如果需要使用插件，请明确激活它们；否则保持禁用状态以避免混淆。\n3. 注意：插件设计主要针对 GPT-4，GPT-3.5 可能无法很好地处理多个系统提示符。只有一个系统提示符会被发送，额外的部分是由插件添加的。\n通过禁用不必要的插件，可以在没有 OpenAI API Key 的情况下使用自定义端点。","https:\u002F\u002Fgithub.com\u002Fszczyglis-dev\u002Fpy-gpt\u002Fissues\u002F52",{"id":162,"question_zh":163,"answer_zh":164,"source_url":165},42665,"程序标榜“免费”，为什么使用时感觉必须付费才能使用核心功能？","该程序本身是免费开源的（Free and Open Source），您可以自由下载和使用其核心功能而无需支付任何费用给开发者。\n用户产生的费用通常来自于调用的后端 AI 服务（如 OpenAI API），而非软件本身。如果您希望完全免费使用，可以配置软件连接到本地部署的开源模型（如通过 Ollama、LocalAI 或 PrivateGPT 等兼容 OpenAI 接口的本地服务），这样就不需要向任何公司支付代币费用。软件只是提供了一个图形化界面来管理这些连接。","https:\u002F\u002Fgithub.com\u002Fszczyglis-dev\u002Fpy-gpt\u002Fissues\u002F71",{"id":167,"question_zh":168,"answer_zh":169,"source_url":170},42666,"在安装或使用 LlamaIndex loaders 时遇到 LangChain 弃用警告或缺少依赖怎么办？","如果遇到 `LangChainDeprecationWarning` 或缺少特定文件类型（如 xlsx, pptx）的加载依赖，可以尝试以下操作：\n1. 激活您的 Python 虚拟环境。\n2. 安装必要的依赖包，例如：`pip install redis[hiredis]` 以及 `pip install torch transformers python-pptx Pillow`（后者用于解决 xlsx 等文件的依赖问题并使其在 UI 中加载）。\n3. 如果涉及 Redis 连接被拒绝的问题，可能需要编写一个自定义启动脚本，在启动主程序前先检查并启动 Redis 服务。","https:\u002F\u002Fgithub.com\u002Fszczyglis-dev\u002Fpy-gpt\u002Fissues\u002F17",{"id":172,"question_zh":173,"answer_zh":174,"source_url":160},42667,"为什么在使用 GPT-3.5-turbo 模型时会出现插件导致的系统提示符冲突？","这是因为某些插件会自动向系统提示符（system prompt）中添加额外的指令。虽然只有一个系统提示符会被发送，但插件激活后会追加内容。\nGPT-3.5 模型在处理复杂的或多部分的系统提示符时表现不佳，容易导致错误或混乱。相比之下，插件功能更多是为 GPT-4 设计的。\n解决方案：如果您使用的是 GPT-3.5-turbo，建议不要激活那些会修改系统提示符的插件，或者在设置中确保插件处于禁用状态，以防止它们干扰模型的正常运行。",[176,181,186,191,196,201,206,211,216,221,226,231,236,241,246,251,256,261,266,271],{"id":177,"version":178,"summary_zh":179,"released_at":180},342354,"v2.7.12","- xAI SDK 升级至 v1.6.1。 - 在 xAI 提供商中新增视频生成支持。 - 新增模型：grok-imagine-image 和 grok-imagine-video。 - 优化了用户界面，使标签页和列之间的导航更加流畅。 - 优化了附件添加功能，新增仅在 API 端存储消息时才允许上传一次附件的选项，以及强制仅上传一次附件的选项。 - 更新了相关库。","2026-02-06T03:08:43",{"id":182,"version":183,"summary_zh":184,"released_at":185},342355,"v2.7.11","- 改进了列之间的焦点处理。- 修复了模型调试器的更新问题。","2026-02-05T02:50:49",{"id":187,"version":188,"summary_zh":189,"released_at":190},342356,"v2.7.10","- 修复了在创建新预设时头像可能被覆盖的问题。  \n- 修复了在第二列中打开新标签页时未创建新上下文的问题。  \n- 在输入框中新增了提示历史导航功能（Ctrl + 上\u002F下方向键）。  \n- 在加载图片查看器时，新增了图片初始居中显示功能。  \n- 为记事本小部件新增了标记\u002F取消标记功能。  \n- 新增18种语言：阿拉伯语（ar）、保加利亚语（bg）、捷克语（cs）、丹麦语（da）、芬兰语（fi）、希伯来语（he）、印地语（hi）、匈牙利语（hu）、日语（ja）、韩语（ko）、荷兰语（nl）、挪威语（no）、葡萄牙语（pt）、罗马尼亚语（ro）、俄语（ru）、斯洛伐克语（sk）、瑞典语（sv）、土耳其语（tr）。","2026-02-03T20:18:39",{"id":192,"version":193,"summary_zh":194,"released_at":195},342357,"v2.7.9","- 改进了实时音频模式。 - 在实时音频模式中新增了 xAI 服务提供商和 Grok 支持。","2026-01-08T00:18:36",{"id":197,"version":198,"summary_zh":199,"released_at":200},342358,"v2.7.8","- 新增 xAI Collections 远程工具，并将集合管理功能整合至 Remote Vector Stores 工具中。\n- 将 Remote Vector Stores 工具统一为适用于所有提供商的单一工具。\n- 新增 xAI Grok 音频输入输出提供商（Grok 的实时音频支持即将上线！）。\n- 优化了图像查看器工具，新增包含“上一张\u002F下一张”等功能的工具箱。","2026-01-06T21:51:24",{"id":202,"version":203,"summary_zh":204,"released_at":205},342359,"v2.7.7","- 在 xAI 中新增了对 Responses API 的支持。\n- 新增了 xAI 远程工具：远程 MCP、代码执行。\n- 新增了 Anthropic 远程工具：远程 MCP、网页抓取、代码执行。","2026-01-05T01:11:00",{"id":207,"version":208,"summary_zh":209,"released_at":210},342360,"v2.7.6","- 修复了与 xAI SDK 的兼容性问题，并解决了 Grok 模型返回空响应的问题。\n- 修复了 Snap 软件包中缺失的库文件。\n- 在图像查看器中新增了缩放和拖动功能。\n- 为文本输入框和 Web 小组件添加了缩放菜单。\n- 新增了使用鼠标中键点击关闭标签页的功能。","2026-01-03T21:40:22",{"id":212,"version":213,"summary_zh":214,"released_at":215},342361,"v2.7.5","- 在“计算机使用”模式中新增了 Sandbox\u002FPlaywright 选项。- 在“计算机使用”模式中增加了对 Google 模型的支持，并推出了一款新模型：gemini-2.5-computer-use-preview-10-2025。- 在“研究”模式中增加了对 Google 模型的支持，并推出了一款新模型：deep-research-pro-preview-12-2025。- 新增了 Google 向量存储工具。","2026-01-03T02:46:01",{"id":217,"version":218,"summary_zh":219,"released_at":220},342362,"v2.7.4","- 添加了启动画面。- 为图片和视频输出添加了预览和下载链接。- 在图片和视频模式中增加了负面提示输入框。- 改进了焦点处理。- 用户界面优化。","2025-12-31T21:17:19",{"id":222,"version":223,"summary_zh":224,"released_at":225},342363,"v2.7.3","- 在图像和视频生成模式中新增了“混音\u002F扩展”选项。此功能允许将先前生成的图像或视频用作参考素材，从而在不从头开始创建新内容的情况下，对已有图像或视频中的元素进行添加或更改。详情请参阅文档：`模式 -> 图像和视频生成 -> 混音、编辑或扩展`。","2025-12-30T23:24:04",{"id":227,"version":228,"summary_zh":229,"released_at":230},342364,"v2.7.2","- Fixed: non-searchable combobox width.\r\n- Improved updater.\r\n- Added .AppImage build.","2025-12-29T23:31:31",{"id":232,"version":233,"summary_zh":234,"released_at":235},342365,"v2.7.1","- Improved UI elements.\r\n- Optimized Painter rendering and redraw functions.\r\n- Added Pack\u002FUnpack feature to File Explorer.\r\n- Fixed: image restoration in Painter.\r\n- Fixed: tab title updating upon context deletion.","2025-12-28T19:14:59",{"id":237,"version":238,"summary_zh":239,"released_at":240},342366,"v2.5.97","- Fix: attribute error in prev ctx.","2025-08-11T04:07:23",{"id":242,"version":243,"summary_zh":244,"released_at":245},342367,"v2.7.0","- Added multi-select functionality using CTRL or SHIFT and batch actions to the context list, preset list, attachments list, and other list-based widgets.\r\n- Added a search field to comboboxes, such as the model selector.\r\n- Added a Duplicate option to the models editor.\r\n- Added drag-and-drop to context list.\r\n- Added multi-select, drag-and-drop, Cut, Copy, and Paste features to the File Explorer.\r\n- Fix: scroll restoration after actions in the context list.\r\n- Fix: 'Use as image' option in the File Explorer.\r\n- Fix: current preset system prompt disappearing on profile change.\r\n- Other UI fixes\u002Fimprovements.","2025-12-28T07:06:10",{"id":247,"version":248,"summary_zh":249,"released_at":250},342368,"v2.6.67","- Added a provider filter to the models editor.\r\n- Added video options (resolution, duration) to the toolbox.\r\n- Updated the models configuration.","2025-12-26T14:39:09",{"id":252,"version":253,"summary_zh":254,"released_at":255},342369,"v2.6.66","- Added Sora 2 support - #155.\r\n- Added Nano Banana support.\r\n- Added Qdrant Vector Store - merged PR #147 by @Anush008.\r\n- Added models: gpt-5.2, gpt-image-1.5, gemini-3, nano-banana-pro, sora-2, claude-sonnet-4.5, claude-opus-4.5, veo-3.1.\r\n- Added Select\u002Funselect All option in checkbox lists.\r\n- OpenAI SDK upgraded to 2.14.0, Anthropic SDK upgraded to 0.75.0, xAI SDK upgraded to 1.5.0, Google GenAI upgraded to 1.56.0, LlamaIndex upgraded to 0.14.10.\r\n- Fix: charset-normalizer 3.2.0 circular import - #152.\r\n- Fix: Google client closed state.","2025-12-26T03:21:48",{"id":257,"version":258,"summary_zh":259,"released_at":260},342370,"v2.6.65","- Added drag and drop functionality for files and directories from the filesystem in attachments and file explorer.\r\n- Added automatic thumbnail generation when uploading avatars.\r\n- Added a last status timer.\r\n- Added a fade effect to collapsed user messages.\r\n- Added a scroll area to the agent options in the presets editor.\r\n- Added a hover effect to lists.\r\n- Improved UI\u002FUX.","2025-09-28T09:19:42",{"id":262,"version":263,"summary_zh":264,"released_at":265},342371,"v2.6.64","- Added translations to agent headers.\r\n- Improved presets tabs.\r\n- Added support for music (Lyria) in both image and video modes (beta).","2025-09-27T17:32:52",{"id":267,"version":268,"summary_zh":269,"released_at":270},342372,"v2.6.63","- Improved agents' workflows.\r\n- Enhanced the display of agents' steps in the UI.","2025-09-27T10:01:23",{"id":272,"version":273,"summary_zh":274,"released_at":275},342373,"v2.6.62","- Enhanced agent workflow execution.\r\n- Improved preset list handling by adding a drop field indicator and fixing auto-scroll.\r\n- Added middle-mouse button panning to Painter.\r\n- Added an input character counter.","2025-09-26T20:00:16"]