[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-livekit--agents":3,"tool-livekit--agents":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",144730,2,"2026-04-07T23:26:32",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",107888,"2026-04-06T11:32:50",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},4721,"markitdown","microsoft\u002Fmarkitdown","MarkItDown 是一款由微软 AutoGen 团队打造的轻量级 Python 工具，专为将各类文件高效转换为 Markdown 格式而设计。它支持 PDF、Word、Excel、PPT、图片（含 OCR）、音频（含语音转录）、HTML 乃至 YouTube 链接等多种格式的解析，能够精准提取文档中的标题、列表、表格和链接等关键结构信息。\n\n在人工智能应用日益普及的今天，大语言模型（LLM）虽擅长处理文本，却难以直接读取复杂的二进制办公文档。MarkItDown 恰好解决了这一痛点，它将非结构化或半结构化的文件转化为模型“原生理解”且 Token 效率极高的 Markdown 格式，成为连接本地文件与 AI 分析 pipeline 的理想桥梁。此外，它还提供了 MCP（模型上下文协议）服务器，可无缝集成到 Claude Desktop 等 LLM 应用中。\n\n这款工具特别适合开发者、数据科学家及 AI 研究人员使用，尤其是那些需要构建文档检索增强生成（RAG）系统、进行批量文本分析或希望让 AI 助手直接“阅读”本地文件的用户。虽然生成的内容也具备一定可读性，但其核心优势在于为机器",93400,"2026-04-06T19:52:38",[52,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":10,"last_commit_at":59,"category_tags":60,"status":17},4487,"LLMs-from-scratch","rasbt\u002FLLMs-from-scratch","LLMs-from-scratch 是一个基于 PyTorch 的开源教育项目，旨在引导用户从零开始一步步构建一个类似 ChatGPT 的大型语言模型（LLM）。它不仅是同名技术著作的官方代码库，更提供了一套完整的实践方案，涵盖模型开发、预训练及微调的全过程。\n\n该项目主要解决了大模型领域“黑盒化”的学习痛点。许多开发者虽能调用现成模型，却难以深入理解其内部架构与训练机制。通过亲手编写每一行核心代码，用户能够透彻掌握 Transformer 架构、注意力机制等关键原理，从而真正理解大模型是如何“思考”的。此外，项目还包含了加载大型预训练权重进行微调的代码，帮助用户将理论知识延伸至实际应用。\n\nLLMs-from-scratch 特别适合希望深入底层原理的 AI 开发者、研究人员以及计算机专业的学生。对于不满足于仅使用 API，而是渴望探究模型构建细节的技术人员而言，这是极佳的学习资源。其独特的技术亮点在于“循序渐进”的教学设计：将复杂的系统工程拆解为清晰的步骤，配合详细的图表与示例，让构建一个虽小但功能完备的大模型变得触手可及。无论你是想夯实理论基础，还是为未来研发更大规模的模型做准备",90106,"2026-04-06T11:19:32",[35,15,13,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":72,"owner_name":73,"owner_avatar_url":74,"owner_bio":75,"owner_company":76,"owner_location":76,"owner_email":76,"owner_twitter":72,"owner_website":77,"owner_url":78,"languages":79,"stars":99,"forks":100,"last_commit_at":101,"license":102,"difficulty_score":10,"env_os":103,"env_gpu":104,"env_ram":105,"env_deps":106,"category_tags":116,"github_topics":119,"view_count":32,"oss_zip_url":76,"oss_zip_packed_at":76,"status":17,"created_at":125,"updated_at":126,"faqs":127,"releases":156},5467,"livekit\u002Fagents","agents","A framework for building realtime voice AI agents 🤖🎙️📹 ","agents 是一个专为构建实时语音 AI 助手而设计的开源框架，让开发者能够轻松创建能“看、听、理解”的多模态智能体。它主要解决了在服务器端开发低延迟、高互动性语音应用时的技术复杂性，帮助开发者快速整合语音识别、大语言模型和语音合成等能力，无需从零搭建底层架构。\n\n这款工具非常适合希望开发实时对话系统的软件工程师、AI 研究者以及需要构建智能客服或虚拟助理的企业技术团队。无论是打造能进行自然对话的虚拟人，还是集成电话通信功能的智能系统，agents 都能提供强大支持。\n\n其独特亮点在于灵活的生态集成能力，允许用户自由组合不同的技术组件；内置的任务调度系统可高效连接终端用户与智能体；同时支持语义化发言检测，显著减少对话中的打断现象。此外，它还原生支持 MCP 协议，方便集成各类外部工具，并配备完整的测试框架以确保应用稳定性。作为完全开源的项目，agents 允许用户在自有服务器上部署整套系统，包括广泛使用的 LiveKit WebRTC 媒体服务器，为注重数据隐私和定制化的团队提供了理想选择。","\u003C!--BEGIN_BANNER_IMAGE-->\n\n\u003Cpicture>\n  \u003Csource media=\"(prefers-color-scheme: dark)\" srcset=\"\u002F.github\u002Fbanner_dark.png\">\n  \u003Csource media=\"(prefers-color-scheme: light)\" srcset=\"\u002F.github\u002Fbanner_light.png\">\n  \u003Cimg style=\"width:100%;\" alt=\"The LiveKit icon, the name of the repository and some sample code in the background.\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Flivekit_agents_readme_28ff49a40f32.png\">\n\u003C\u002Fpicture>\n\n\u003C!--END_BANNER_IMAGE-->\n\u003Cbr \u002F>\n\n![PyPI - Version](https:\u002F\u002Fimg.shields.io\u002Fpypi\u002Fv\u002Flivekit-agents)\n[![PyPI Downloads](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Flivekit_agents_readme_08d74b83d284.png)](https:\u002F\u002Fpepy.tech\u002Fprojects\u002Flivekit-agents)\n[![Slack community](https:\u002F\u002Fimg.shields.io\u002Fendpoint?url=https%3A%2F%2Flivekit.io%2Fbadges%2Fslack)](https:\u002F\u002Flivekit.io\u002Fjoin-slack)\n[![Twitter Follow](https:\u002F\u002Fimg.shields.io\u002Ftwitter\u002Ffollow\u002Flivekit)](https:\u002F\u002Ftwitter.com\u002Flivekit)\n[![Ask DeepWiki for understanding the codebase](https:\u002F\u002Fdeepwiki.com\u002Fbadge.svg)](https:\u002F\u002Fdeepwiki.com\u002Flivekit\u002Fagents)\n[![License](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Flicense\u002Flivekit\u002Flivekit)](https:\u002F\u002Fgithub.com\u002Flivekit\u002Flivekit\u002Fblob\u002Fmaster\u002FLICENSE)\n\n\u003Cbr \u002F>\n\nLooking for the JS\u002FTS library? Check out [AgentsJS](https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents-js)\n\n## What is Agents?\n\n\u003C!--BEGIN_DESCRIPTION-->\n\nThe Agent Framework is designed for building realtime, programmable participants\nthat run on servers. Use it to create conversational, multi-modal voice\nagents that can see, hear, and understand.\n\n\u003C!--END_DESCRIPTION-->\n\n## Features\n\n- **Flexible integrations**: A comprehensive ecosystem to mix and match the right STT, LLM, TTS, and Realtime API to suit your use case.\n- **Integrated job scheduling**: Built-in task scheduling and distribution with [dispatch APIs](https:\u002F\u002Fdocs.livekit.io\u002Fagents\u002Fbuild\u002Fdispatch\u002F) to connect end users to agents.\n- **Extensive WebRTC clients**: Build client applications using LiveKit's open-source SDK ecosystem, supporting all major platforms.\n- **Telephony integration**: Works seamlessly with LiveKit's [telephony stack](https:\u002F\u002Fdocs.livekit.io\u002Fsip\u002F), allowing your agent to make calls to or receive calls from phones.\n- **Exchange data with clients**: Use [RPCs](https:\u002F\u002Fdocs.livekit.io\u002Fhome\u002Fclient\u002Fdata\u002Frpc\u002F) and other [Data APIs](https:\u002F\u002Fdocs.livekit.io\u002Fhome\u002Fclient\u002Fdata\u002F) to seamlessly exchange data with clients.\n- **Semantic turn detection**: Uses a transformer model to detect when a user is done with their turn, helps to reduce interruptions.\n- **MCP support**: Native support for MCP. Integrate tools provided by MCP servers with one loc.\n- **Builtin test framework**: Write tests and use judges to ensure your agent is performing as expected.\n- **Open-source**: Fully open-source, allowing you to run the entire stack on your own servers, including [LiveKit server](https:\u002F\u002Fgithub.com\u002Flivekit\u002Flivekit), one of the most widely used WebRTC media servers.\n\n## Installation\n\nTo install the core Agents library, along with plugins for popular model providers:\n\n```bash\npip install \"livekit-agents[openai,silero,deepgram,cartesia,turn-detector]~=1.4\"\n```\n\n## Docs and guides\n\nDocumentation on the framework and how to use it can be found [here](https:\u002F\u002Fdocs.livekit.io\u002Fagents\u002F)\n\n### Building with AI coding agents\n\nIf you're using an AI coding assistant to build with LiveKit Agents, we recommend the following setup for the best results:\n\n1. **Install the [LiveKit Docs MCP server](https:\u002F\u002Fdocs.livekit.io\u002Fmcp)** — Gives your coding agent access to up-to-date LiveKit documentation, code search across LiveKit repositories, and working examples.\n\n2. **Install the [LiveKit Agent Skill](https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagent-skills)** — Provides your coding agent with architectural guidance and best practices for building voice AI applications, including workflow design, handoffs, tasks, and testing patterns.\n\n   ```shell\n   npx skills add livekit\u002Fagent-skills --skill livekit-agents\n   ```\n\nThe Agent Skill works best alongside the MCP server: the skill teaches your agent *how to approach* building with LiveKit, while the MCP server provides the *current API details* to implement it correctly.\n\n## Core concepts\n\n- Agent: An LLM-based application with defined instructions.\n- AgentSession: A container for agents that manages interactions with end users.\n- entrypoint: The starting point for an interactive session, similar to a request handler in a web server.\n- AgentServer: The main process that coordinates job scheduling and launches agents for user sessions.\n\n## Usage\n\n### Simple voice agent\n\n---\n\n```python\nfrom livekit.agents import (\n    Agent,\n    AgentServer,\n    AgentSession,\n    JobContext,\n    RunContext,\n    cli,\n    function_tool,\n    inference,\n)\nfrom livekit.plugins import silero\n\n\n@function_tool\nasync def lookup_weather(\n    context: RunContext,\n    location: str,\n):\n    \"\"\"Used to look up weather information.\"\"\"\n\n    return {\"weather\": \"sunny\", \"temperature\": 70}\n\n\nserver = AgentServer()\n\n\n@server.rtc_session()\nasync def entrypoint(ctx: JobContext):\n    session = AgentSession(\n        vad=silero.VAD.load(),\n        # any combination of STT, LLM, TTS, or realtime API can be used\n        # this example shows LiveKit Inference, a unified API to access different models via LiveKit Cloud\n        # to use model provider keys directly, replace with the following:\n        # from livekit.plugins import deepgram, openai, cartesia\n        # stt=deepgram.STT(model=\"nova-3\"),\n        # llm=openai.LLM(model=\"gpt-4.1-mini\"),\n        # tts=cartesia.TTS(model=\"sonic-3\", voice=\"9626c31c-bec5-4cca-baa8-f8ba9e84c8bc\"),\n        stt=inference.STT(\"deepgram\u002Fnova-3\", language=\"multi\"),\n        llm=inference.LLM(\"openai\u002Fgpt-4.1-mini\"),\n        tts=inference.TTS(\"cartesia\u002Fsonic-3\", voice=\"9626c31c-bec5-4cca-baa8-f8ba9e84c8bc\"),\n    )\n\n    agent = Agent(\n        instructions=\"You are a friendly voice assistant built by LiveKit.\",\n        tools=[lookup_weather],\n    )\n\n    await session.start(agent=agent, room=ctx.room)\n    await session.generate_reply(instructions=\"greet the user and ask about their day\")\n\n\nif __name__ == \"__main__\":\n    cli.run_app(server)\n```\n\nYou'll need the following environment variables for this example:\n\n- LIVEKIT_URL\n- LIVEKIT_API_KEY\n- LIVEKIT_API_SECRET\n\n### Multi-agent handoff\n\n---\n\nThis code snippet is abbreviated. For the full example, see [multi_agent.py](examples\u002Fvoice_agents\u002Fmulti_agent.py)\n\n```python\n...\nclass IntroAgent(Agent):\n    def __init__(self) -> None:\n        super().__init__(\n            instructions=f\"You are a story teller. Your goal is to gather a few pieces of information from the user to make the story personalized and engaging.\"\n            \"Ask the user for their name and where they are from\"\n        )\n\n    async def on_enter(self):\n        self.session.generate_reply(instructions=\"greet the user and gather information\")\n\n    @function_tool\n    async def information_gathered(\n        self,\n        context: RunContext,\n        name: str,\n        location: str,\n    ):\n        \"\"\"Called when the user has provided the information needed to make the story personalized and engaging.\n\n        Args:\n            name: The name of the user\n            location: The location of the user\n        \"\"\"\n\n        context.userdata.name = name\n        context.userdata.location = location\n\n        story_agent = StoryAgent(name, location)\n        return story_agent, \"Let's start the story!\"\n\n\nclass StoryAgent(Agent):\n    def __init__(self, name: str, location: str) -> None:\n        super().__init__(\n            instructions=f\"You are a storyteller. Use the user's information in order to make the story personalized.\"\n            f\"The user's name is {name}, from {location}\"\n            # override the default model, switching to Realtime API from standard LLMs\n            llm=openai.realtime.RealtimeModel(voice=\"echo\"),\n            chat_ctx=chat_ctx,\n        )\n\n    async def on_enter(self):\n        self.session.generate_reply()\n\n\n@server.rtc_session()\nasync def entrypoint(ctx: JobContext):\n    userdata = StoryData()\n    session = AgentSession[StoryData](\n        vad=silero.VAD.load(),\n        stt=\"deepgram\u002Fnova-3\",\n        llm=\"openai\u002Fgpt-4.1-mini\",\n        tts=\"cartesia\u002Fsonic-3:9626c31c-bec5-4cca-baa8-f8ba9e84c8bc\",\n        userdata=userdata,\n    )\n\n    await session.start(\n        agent=IntroAgent(),\n        room=ctx.room,\n    )\n...\n```\n\n### Testing\n\nAutomated tests are essential for building reliable agents, especially with the non-deterministic behavior of LLMs. LiveKit Agents include native test integration to help you create dependable agents.\n\n```python\n@pytest.mark.asyncio\nasync def test_no_availability() -> None:\n    llm = google.LLM()\n    async AgentSession(llm=llm) as sess:\n        await sess.start(MyAgent())\n        result = await sess.run(\n            user_input=\"Hello, I need to place an order.\"\n        )\n        result.expect.skip_next_event_if(type=\"message\", role=\"assistant\")\n        result.expect.next_event().is_function_call(name=\"start_order\")\n        result.expect.next_event().is_function_call_output()\n        await (\n            result.expect.next_event()\n            .is_message(role=\"assistant\")\n            .judge(llm, intent=\"assistant should be asking the user what they would like\")\n        )\n\n```\n\n## Examples\n\nFor more examples and detailed setup instructions, see the [examples directory](examples\u002F). For even more examples, see the [python-agents-examples](https:\u002F\u002Fgithub.com\u002Flivekit-examples\u002Fpython-agents-examples) repository.\n\n\u003Ctable>\n\u003Ctr>\n\u003Ctd width=\"50%\">\n\u003Ch3>🎙️ Starter Agent\u003C\u002Fh3>\n\u003Cp>A starter agent optimized for voice conversations.\u003C\u002Fp>\n\u003Cp>\n\u003Ca href=\"examples\u002Fvoice_agents\u002Fbasic_agent.py\">Code\u003C\u002Fa>\n\u003C\u002Fp>\n\u003C\u002Ftd>\n\u003Ctd width=\"50%\">\n\u003Ch3>🔄 Multi-user push to talk\u003C\u002Fh3>\n\u003Cp>Responds to multiple users in the room via push-to-talk.\u003C\u002Fp>\n\u003Cp>\n\u003Ca href=\"examples\u002Fvoice_agents\u002Fpush_to_talk.py\">Code\u003C\u002Fa>\n\u003C\u002Fp>\n\u003C\u002Ftd>\n\u003C\u002Ftr>\n\n\u003Ctr>\n\u003Ctd width=\"50%\">\n\u003Ch3>🎵 Background audio\u003C\u002Fh3>\n\u003Cp>Background ambient and thinking audio to improve realism.\u003C\u002Fp>\n\u003Cp>\n\u003Ca href=\"examples\u002Fvoice_agents\u002Fbackground_audio.py\">Code\u003C\u002Fa>\n\u003C\u002Fp>\n\u003C\u002Ftd>\n\u003Ctd width=\"50%\">\n\u003Ch3>🛠️ Dynamic tool creation\u003C\u002Fh3>\n\u003Cp>Creating function tools dynamically.\u003C\u002Fp>\n\u003Cp>\n\u003Ca href=\"examples\u002Fvoice_agents\u002Fdynamic_tool_creation.py\">Code\u003C\u002Fa>\n\u003C\u002Fp>\n\u003C\u002Ftd>\n\u003C\u002Ftr>\n\n\u003Ctr>\n\u003Ctd width=\"50%\">\n\u003Ch3>☎️ Outbound caller\u003C\u002Fh3>\n\u003Cp>Agent that makes outbound phone calls\u003C\u002Fp>\n\u003Cp>\n\u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit-examples\u002Foutbound-caller-python\">Code\u003C\u002Fa>\n\u003C\u002Fp>\n\u003C\u002Ftd>\n\u003Ctd width=\"50%\">\n\u003Ch3>📋 Structured output\u003C\u002Fh3>\n\u003Cp>Using structured output from LLM to guide TTS tone.\u003C\u002Fp>\n\u003Cp>\n\u003Ca href=\"examples\u002Fvoice_agents\u002Fstructured_output.py\">Code\u003C\u002Fa>\n\u003C\u002Fp>\n\u003C\u002Ftd>\n\u003C\u002Ftr>\n\n\u003Ctr>\n\u003Ctd width=\"50%\">\n\u003Ch3>🔌 MCP support\u003C\u002Fh3>\n\u003Cp>Use tools from MCP servers\u003C\u002Fp>\n\u003Cp>\n\u003Ca href=\"examples\u002Fvoice_agents\u002Fmcp\">Code\u003C\u002Fa>\n\u003C\u002Fp>\n\u003C\u002Ftd>\n\u003Ctd width=\"50%\">\n\u003Ch3>💬 Text-only agent\u003C\u002Fh3>\n\u003Cp>Skip voice altogether and use the same code for text-only integrations\u003C\u002Fp>\n\u003Cp>\n\u003Ca href=\"examples\u002Fother\u002Ftext_only.py\">Code\u003C\u002Fa>\n\u003C\u002Fp>\n\u003C\u002Ftd>\n\u003C\u002Ftr>\n\n\u003Ctr>\n\u003Ctd width=\"50%\">\n\u003Ch3>📝 Multi-user transcriber\u003C\u002Fh3>\n\u003Cp>Produce transcriptions from all users in the room\u003C\u002Fp>\n\u003Cp>\n\u003Ca href=\"examples\u002Fother\u002Ftranscription\u002Fmulti-user-transcriber.py\">Code\u003C\u002Fa>\n\u003C\u002Fp>\n\u003C\u002Ftd>\n\u003Ctd width=\"50%\">\n\u003Ch3>🎥 Video avatars\u003C\u002Fh3>\n\u003Cp>Add an AI avatar with Tavus, Hedra, Bithuman, LemonSlice, and more\u003C\u002Fp>\n\u003Cp>\n\u003Ca href=\"examples\u002Favatar_agents\u002F\">Code\u003C\u002Fa>\n\u003C\u002Fp>\n\u003C\u002Ftd>\n\u003C\u002Ftr>\n\n\u003Ctr>\n\u003Ctd width=\"50%\">\n\u003Ch3>🍽️ Restaurant ordering and reservations\u003C\u002Fh3>\n\u003Cp>Full example of an agent that handles calls for a restaurant.\u003C\u002Fp>\n\u003Cp>\n\u003Ca href=\"examples\u002Fvoice_agents\u002Frestaurant_agent.py\">Code\u003C\u002Fa>\n\u003C\u002Fp>\n\u003C\u002Ftd>\n\u003Ctd width=\"50%\">\n\u003Ch3>👁️ Gemini Live vision\u003C\u002Fh3>\n\u003Cp>Full example (including iOS app) of Gemini Live agent that can see.\u003C\u002Fp>\n\u003Cp>\n\u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit-examples\u002Fvision-demo\">Code\u003C\u002Fa>\n\u003C\u002Fp>\n\u003C\u002Ftd>\n\u003C\u002Ftr>\n\n\u003C\u002Ftable>\n\n## Running your agent\n\n### Testing in terminal\n\n```shell\npython myagent.py console\n```\n\nRuns your agent in terminal mode, enabling local audio input and output for testing.\nThis mode doesn't require external servers or dependencies and is useful for quickly validating behavior.\n\n### Developing with LiveKit clients\n\n```shell\npython myagent.py dev\n```\n\nStarts the agent server and enables hot reloading when files change. This mode allows each process to host multiple concurrent agents efficiently.\n\nThe agent connects to LiveKit Cloud or your self-hosted server. Set the following environment variables:\n- LIVEKIT_URL\n- LIVEKIT_API_KEY\n- LIVEKIT_API_SECRET\n\nYou can connect using any LiveKit client SDK or telephony integration.\nTo get started quickly, try the [Agents Playground](https:\u002F\u002Fagents-playground.livekit.io\u002F).\n\n### Running for production\n\n```shell\npython myagent.py start\n```\n\nRuns the agent with production-ready optimizations.\n\n## Contributing\n\nThe Agents framework is under active development in a rapidly evolving field. We welcome and appreciate contributions of any kind, be it feedback, bugfixes, features, new plugins and tools, or better documentation. You can file issues under this repo, open a PR, or chat with us in the [LiveKit community](https:\u002F\u002Fdocs.livekit.io\u002Fintro\u002Fcommunity\u002F).\n\n### Development setup\n\nThis project uses [uv](https:\u002F\u002Fdocs.astral.sh\u002Fuv\u002F) for package management. To install dependencies for development:\n\n```shell\nuv sync --all-extras --dev\n```\n\n### Examples\n\nThis project includes many examples in the [`examples`](examples\u002F) directory. To run them, create the file `examples\u002F.env` with credentials for LiveKit Server and any necessary model providers (see `examples\u002F.env.example`), then run:\n\n```shell\nuv run examples\u002Fvoice_agents\u002Fbasic_agent.py dev\n```\n\nFor more information, see the [examples README](examples\u002FREADME.md).\n\n### Tests\n\nUnit tests are in the `tests` directory and can be run with:\n\n```shell\nuv run pytest tests\u002Ftest_tools.py\n```\n\nIntegration tests for each plugin require various API credentials and run automatically in GitHub CI for PRs submitted by project maintainers. See the [tests workflow](.github\u002Fworkflows\u002Ftests.yml) for details.\n\n### Formatting\n\nThis project uses [ruff](https:\u002F\u002Fgithub.com\u002Fastral-sh\u002Fruff) for formatting and linting:\n\n```shell\nuv run ruff format\nuv run ruff check --fix\n```\n\n### Documentation\n\nTo generate docs locally with [pdoc](https:\u002F\u002Fgithub.com\u002Fpdoc3\u002Fpdoc):\n\n```shell\nuv sync --all-extras --group docs\nuv run --active pdoc --skip-errors --html --output-dir=docs livekit\n```\n\n\u003C!--BEGIN_REPO_NAV-->\n\u003Cbr\u002F>\u003Ctable>\n\u003Cthead>\u003Ctr>\u003Cth colspan=\"2\">LiveKit Ecosystem\u003C\u002Fth>\u003C\u002Ftr>\u003C\u002Fthead>\n\u003Ctbody>\n\u003Ctr>\u003Ctd>Agents SDKs\u003C\u002Ftd>\u003Ctd>\u003Cb>Python\u003C\u002Fb> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents-js\">Node.js\u003C\u002Fa>\u003C\u002Ftd>\u003C\u002Ftr>\u003Ctr>\u003C\u002Ftr>\n\u003Ctr>\u003Ctd>LiveKit SDKs\u003C\u002Ftd>\u003Ctd>\u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fclient-sdk-js\">Browser\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fclient-sdk-swift\">Swift\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fclient-sdk-android\">Android\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fclient-sdk-flutter\">Flutter\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fclient-sdk-react-native\">React Native\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Frust-sdks\">Rust\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fnode-sdks\">Node.js\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fpython-sdks\">Python\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fclient-sdk-unity\">Unity\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fclient-sdk-unity-web\">Unity (WebGL)\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fclient-sdk-esp32\">ESP32\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fclient-sdk-cpp\">C++\u003C\u002Fa>\u003C\u002Ftd>\u003C\u002Ftr>\u003Ctr>\u003C\u002Ftr>\n\u003Ctr>\u003Ctd>Starter Apps\u003C\u002Ftd>\u003Ctd>\u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit-examples\u002Fagent-starter-python\">Python Agent\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit-examples\u002Fagent-starter-node\">TypeScript Agent\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit-examples\u002Fagent-starter-react\">React App\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit-examples\u002Fagent-starter-swift\">SwiftUI App\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit-examples\u002Fagent-starter-android\">Android App\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit-examples\u002Fagent-starter-flutter\">Flutter App\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit-examples\u002Fagent-starter-react-native\">React Native App\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit-examples\u002Fagent-starter-embed\">Web Embed\u003C\u002Fa>\u003C\u002Ftd>\u003C\u002Ftr>\u003Ctr>\u003C\u002Ftr>\n\u003Ctr>\u003Ctd>UI Components\u003C\u002Ftd>\u003Ctd>\u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fcomponents-js\">React\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fcomponents-android\">Android Compose\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fcomponents-swift\">SwiftUI\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fcomponents-flutter\">Flutter\u003C\u002Fa>\u003C\u002Ftd>\u003C\u002Ftr>\u003Ctr>\u003C\u002Ftr>\n\u003Ctr>\u003Ctd>Server APIs\u003C\u002Ftd>\u003Ctd>\u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fnode-sdks\">Node.js\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fserver-sdk-go\">Golang\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fserver-sdk-ruby\">Ruby\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fserver-sdk-kotlin\">Java\u002FKotlin\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fpython-sdks\">Python\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Frust-sdks\">Rust\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fagence104\u002Flivekit-server-sdk-php\">PHP (community)\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FpabloFuente\u002Flivekit-server-sdk-dotnet\">.NET (community)\u003C\u002Fa>\u003C\u002Ftd>\u003C\u002Ftr>\u003Ctr>\u003C\u002Ftr>\n\u003Ctr>\u003Ctd>Resources\u003C\u002Ftd>\u003Ctd>\u003Ca href=\"https:\u002F\u002Fdocs.livekit.io\">Docs\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fdocs.livekit.io\u002Fmcp\">Docs MCP Server\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Flivekit-cli\">CLI\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fcloud.livekit.io\">LiveKit Cloud\u003C\u002Fa>\u003C\u002Ftd>\u003C\u002Ftr>\u003Ctr>\u003C\u002Ftr>\n\u003Ctr>\u003Ctd>LiveKit Server OSS\u003C\u002Ftd>\u003Ctd>\u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Flivekit\">LiveKit server\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fegress\">Egress\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fingress\">Ingress\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fsip\">SIP\u003C\u002Fa>\u003C\u002Ftd>\u003C\u002Ftr>\u003Ctr>\u003C\u002Ftr>\n\u003Ctr>\u003Ctd>Community\u003C\u002Ftd>\u003Ctd>\u003Ca href=\"https:\u002F\u002Fcommunity.livekit.io\">Developer Community\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Flivekit.io\u002Fjoin-slack\">Slack\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fx.com\u002Flivekit\">X\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fwww.youtube.com\u002F@livekit_io\">YouTube\u003C\u002Fa>\u003C\u002Ftd>\u003C\u002Ftr>\n\u003C\u002Ftbody>\n\u003C\u002Ftable>\n\u003C!--END_REPO_NAV-->\n","\u003C!--BEGIN_BANNER_IMAGE-->\n\n\u003Cpicture>\n  \u003Csource media=\"(prefers-color-scheme: dark)\" srcset=\"\u002F.github\u002Fbanner_dark.png\">\n  \u003Csource media=\"(prefers-color-scheme: light)\" srcset=\"\u002F.github\u002Fbanner_light.png\">\n  \u003Cimg style=\"width:100%;\" alt=\"LiveKit 的图标、仓库名称以及背景中的示例代码。\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Flivekit_agents_readme_28ff49a40f32.png\">\n\u003C\u002Fpicture>\n\n\u003C!--END_BANNER_IMAGE-->\n\u003Cbr \u002F>\n\n![PyPI - 版本](https:\u002F\u002Fimg.shields.io\u002Fpypi\u002Fv\u002Flivekit-agents)\n[![PyPI 下载量](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Flivekit_agents_readme_08d74b83d284.png)](https:\u002F\u002Fpepy.tech\u002Fprojects\u002Flivekit-agents)\n[![Slack 社区](https:\u002F\u002Fimg.shields.io\u002Fendpoint?url=https%3A%2F%2Flivekit.io%2Fbadges%2Fslack)](https:\u002F\u002Flivekit.io\u002Fjoin-slack)\n[![Twitter 关注](https:\u002F\u002Fimg.shields.io\u002Ftwitter\u002Ffollow\u002Flivekit)](https:\u002F\u002Ftwitter.com\u002Flivekit)\n[![使用 DeepWiki 理解代码库](https:\u002F\u002Fdeepwiki.com\u002Fbadge.svg)](https:\u002F\u002Fdeepwiki.com\u002Flivekit\u002Fagents)\n[![许可证](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Flicense\u002Flivekit\u002Flivekit)](https:\u002F\u002Fgithub.com\u002Flivekit\u002Flivekit\u002Fblob\u002Fmaster\u002FLICENSE)\n\n\u003Cbr \u002F>\n\n正在寻找 JS\u002FTS 库？请查看 [AgentsJS](https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents-js)。\n\n## 什么是 Agents？\n\n\u003C!--BEGIN_DESCRIPTION-->\n\nAgent 框架旨在构建运行在服务器上的实时可编程参与者。利用它，您可以创建能够看、听并理解的多模态对话式语音代理。\n\n\u003C!--END_DESCRIPTION-->\n\n## 特性\n\n- **灵活的集成**：全面的生态系统，允许您根据具体用例自由组合合适的 STT、LLM、TTS 和实时 API。\n- **集成的任务调度**：内置任务调度与分发功能，通过 [dispatch APIs](https:\u002F\u002Fdocs.livekit.io\u002Fagents\u002Fbuild\u002Fdispatch\u002F) 将终端用户连接到代理。\n- **丰富的 WebRTC 客户端**：使用 LiveKit 的开源 SDK 生态系统构建客户端应用，支持所有主流平台。\n- **电话集成**：与 LiveKit 的 [电话堆栈](https:\u002F\u002Fdocs.livekit.io\u002Fsip\u002F) 无缝协作，使您的代理能够拨打或接听电话。\n- **与客户端交换数据**：使用 [RPCs](https:\u002F\u002Fdocs.livekit.io\u002Fhome\u002Fclient\u002Fdata\u002Frpc\u002F) 和其他 [Data APIs](https:\u002F\u002Fdocs.livekit.io\u002Fhome\u002Fclient\u002Fdata\u002F) 与客户端顺畅地交换数据。\n- **语义轮次检测**：采用 Transformer 模型检测用户何时结束发言，有助于减少打断。\n- **MCP 支持**：原生支持 MCP。只需一处即可集成 MCP 服务器提供的工具。\n- **内置测试框架**：编写测试并使用评判器确保您的代理按预期运行。\n- **开源**：完全开源，允许您在自己的服务器上运行整个堆栈，包括 [LiveKit 服务器](https:\u002F\u002Fgithub.com\u002Flivekit\u002Flivekit)，这是最常用的 WebRTC 媒体服务器之一。\n\n## 安装\n\n要安装核心 Agents 库以及常用模型提供商的插件：\n\n```bash\npip install \"livekit-agents[openai,silero,deepgram,cartesia,turn-detector]~=1.4\"\n```\n\n## 文档与指南\n\n有关该框架及其使用方法的文档，请参阅 [这里](https:\u002F\u002Fdocs.livekit.io\u002Fagents\u002F)。\n\n### 使用 AI 编码代理进行开发\n\n如果您正在使用 AI 编码助手来基于 LiveKit Agents 进行开发，我们建议采用以下设置以获得最佳效果：\n\n1. **安装 [LiveKit Docs MCP 服务器](https:\u002F\u002Fdocs.livekit.io\u002Fmcp)** — 为您的编码代理提供最新的 LiveKit 文档、跨 LiveKit 仓库的代码搜索功能以及实用示例。\n\n2. **安装 [LiveKit Agent Skill](https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagent-skills)** — 为您的编码代理提供构建语音 AI 应用程序的架构指导和最佳实践，包括工作流设计、交接流程、任务分配及测试模式。\n\n   ```shell\n   npx skills add livekit\u002Fagent-skills --skill livekit-agents\n   ```\n\nAgent Skill 与 MCP 服务器配合使用效果最佳：Skill 教导您的代理 *如何着手* 使用 LiveKit 进行开发，而 MCP 服务器则提供 *当前的 API 细节* 以便正确实现。\n\n## 核心概念\n\n- 代理：基于 LLM 的应用程序，具有明确的指令。\n- 代理会话：用于管理与终端用户交互的容器。\n- 入口点：交互式会话的起点，类似于 Web 服务器中的请求处理器。\n- 代理服务器：负责协调任务调度并启动用户会话代理的主要进程。\n\n## 使用方法\n\n### 简单的语音代理\n\n---\n\n```python\nfrom livekit.agents import (\n    Agent,\n    AgentServer,\n    AgentSession,\n    JobContext,\n    RunContext,\n    cli,\n    function_tool,\n    inference,\n)\nfrom livekit.plugins import silero\n\n\n@function_tool\nasync def lookup_weather(\n    context: RunContext,\n    location: str,\n):\n    \"\"\"用于查询天气信息。\"\"\"\n\n    return {\"weather\": \"sunny\", \"temperature\": 70}\n\n\nserver = AgentServer()\n\n\n@server.rtc_session()\nasync def entrypoint(ctx: JobContext):\n    session = AgentSession(\n        vad=silero.VAD.load(),\n        # 可以使用 STT、LLM、TTS 或实时 API 的任意组合\n        # 本示例展示了 LiveKit Inference，这是一个通过 LiveKit Cloud 访问不同模型的统一 API\n        # 如果需要直接使用模型提供商的密钥，可以替换为以下内容：\n        # from livekit.plugins import deepgram、openai、cartesia\n        # stt=deepgram.STT(model=\"nova-3\"),\n        # llm=openai.LLM(model=\"gpt-4.1-mini\"),\n        # tts=cartesia.TTS(model=\"sonic-3\", voice=\"9626c31c-bec5-4cca-baa8-f8ba9e84c8bc\"),\n        stt=inference.STT(\"deepgram\u002Fnova-3\", language=\"multi\"),\n        llm=inference.LLM(\"openai\u002Fgpt-4.1-mini\"),\n        tts=inference.TTS(\"cartesia\u002Fsonic-3\", voice=\"9626c31c-bec5-4cca-baa8-f8ba9e84c8bc\"),\n    )\n\n    agent = Agent(\n        instructions=\"您是 LiveKit 构建的友好语音助手。\",\n        tools=[lookup_weather],\n    )\n\n    await session.start(agent=agent, room=ctx.room)\n    await session.generate_reply(instructions=\"向用户问好并询问他们今天过得如何\")\n\n\nif __name__ == \"__main__\":\n    cli.run_app(server)\n```\n\n此示例需要以下环境变量：\n\n- LIVEKIT_URL\n- LIVEKIT_API_KEY\n- LIVEKIT_API_SECRET\n\n### 多智能体交接\n\n---\n\n这段代码片段已简化。完整示例请参见 [multi_agent.py](examples\u002Fvoice_agents\u002Fmulti_agent.py)。\n\n```python\n...\nclass IntroAgent(Agent):\n    def __init__(self) -> None:\n        super().__init__(\n            instructions=f\"你是一名讲故事的人。你的目标是从用户那里收集一些信息，使故事更加个性化和引人入胜。\"\n            \"请询问用户的姓名和来自哪里\"\n        )\n\n    async def on_enter(self):\n        self.session.generate_reply(instructions=\"向用户问好并收集信息\")\n\n    @function_tool\n    async def information_gathered(\n        self,\n        context: RunContext,\n        name: str,\n        location: str,\n    ):\n        \"\"\"当用户提供了使故事个性化和引人入胜所需的信息时调用。\n\n        Args:\n            name: 用户的姓名\n            location: 用户所在的地方\n        \"\"\"\n\n        context.userdata.name = name\n        context.userdata.location = location\n\n        story_agent = StoryAgent(name, location)\n        return story_agent, \"让我们开始故事吧！\"\n\n\nclass StoryAgent(Agent):\n    def __init__(self, name: str, location: str) -> None:\n        super().__init__(\n            instructions=f\"你是一名说书人。利用用户的个人信息使故事更具个性化。\"\n            f\"用户的名字是{name}, 来自{location}\"\n            # 覆盖默认模型，切换到实时API而非标准LLM\n            llm=openai.realtime.RealtimeModel(voice=\"echo\"),\n            chat_ctx=chat_ctx,\n        )\n\n    async def on_enter(self):\n        self.session.generate_reply()\n\n\n@server.rtc_session()\nasync def entrypoint(ctx: JobContext):\n    userdata = StoryData()\n    session = AgentSession[StoryData](\n        vad=silero.VAD.load(),\n        stt=\"deepgram\u002Fnova-3\",\n        llm=\"openai\u002Fgpt-4.1-mini\",\n        tts=\"cartesia\u002Fsonic-3:9626c31c-bec5-4cca-baa8-f8ba9e84c8bc\",\n        userdata=userdata,\n    )\n\n    await session.start(\n        agent=IntroAgent(),\n        room=ctx.room,\n    )\n...\n```\n\n### 测试\n\n自动化测试对于构建可靠的智能体至关重要，尤其是在LLM表现出非确定性行为的情况下。LiveKit Agents内置了测试集成，可帮助您创建可靠的智能体。\n\n```python\n@pytest.mark.asyncio\nasync def test_no_availability() -> None:\n    llm = google.LLM()\n    async AgentSession(llm=llm) as sess:\n        await sess.start(MyAgent())\n        result = await sess.run(\n            user_input=\"你好，我想下单。\"\n        )\n        result.expect.skip_next_event_if(type=\"message\", role=\"assistant\")\n        result.expect.next_event().is_function_call(name=\"start_order\")\n        result.expect.next_event().is_function_call_output()\n        await (\n            result.expect.next_event()\n            .is_message(role=\"assistant\")\n            .judge(llm, intent=\"助手应该询问用户想要什么\")\n        )\n\n```\n\n## 示例\n\n更多示例及详细的设置说明，请参阅 [examples目录](examples\u002F)。如需更多示例，请访问 [python-agents-examples](https:\u002F\u002Fgithub.com\u002Flivekit-examples\u002Fpython-agents-examples) 仓库。\n\n\u003Ctable>\n\u003Ctr>\n\u003Ctd width=\"50%\">\n\u003Ch3>🎙️ 入门级智能体\u003C\u002Fh3>\n\u003Cp>专为语音对话优化的入门级智能体。\u003C\u002Fp>\n\u003Cp>\n\u003Ca href=\"examples\u002Fvoice_agents\u002Fbasic_agent.py\">代码\u003C\u002Fa>\n\u003C\u002Fp>\n\u003C\u002Ftd>\n\u003Ctd width=\"50%\">\n\u003Ch3>🔄 多用户对讲功能\u003C\u002Fh3>\n\u003Cp>通过按住说话的方式响应房间内的多个用户。\u003C\u002Fp>\n\u003Cp>\n\u003Ca href=\"examples\u002Fvoice_agents\u002Fpush_to_talk.py\">代码\u003C\u002Fa>\n\u003C\u002Fp>\n\u003C\u002Ftd>\n\u003C\u002Ftr>\n\n\u003Ctr>\n\u003Ctd width=\"50%\">\n\u003Ch3>🎵 背景音频\u003C\u002Fh3>\n\u003Cp>背景环境音和思考音效，以提升真实感。\u003C\u002Fp>\n\u003Cp>\n\u003Ca href=\"examples\u002Fvoice_agents\u002Fbackground_audio.py\">代码\u003C\u002Fa>\n\u003C\u002Fp>\n\u003C\u002Ftd>\n\u003Ctd width=\"50%\">\n\u003Ch3>🛠️ 动态工具创建\u003C\u002Fh3>\n\u003Cp>动态创建函数工具。\u003C\u002Fp>\n\u003Cp>\n\u003Ca href=\"examples\u002Fvoice_agents\u002Fdynamic_tool_creation.py\">代码\u003C\u002Fa>\n\u003C\u002Fp>\n\u003C\u002Ftd>\n\u003C\u002Ftr>\n\n\u003Ctr>\n\u003Ctd width=\"50%\">\n\u003Ch3>☎️ 外拨呼叫器\u003C\u002Fh3>\n\u003Cp>能够发起外拨电话的智能体。\u003C\u002Fp>\n\u003Cp>\n\u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit-examples\u002Foutbound-caller-python\">代码\u003C\u002Fa>\n\u003C\u002Fp>\n\u003C\u002Ftd>\n\u003Ctd width=\"50%\">\n\u003Ch3>📋 结构化输出\u003C\u002Fh3>\n\u003Cp>使用LLM的结构化输出来指导TTS的语气。\u003C\u002Fp>\n\u003Cp>\n\u003Ca href=\"examples\u002Fvoice_agents\u002Fstructured_output.py\">代码\u003C\u002Fa>\n\u003C\u002Fp>\n\u003C\u002Ftd>\n\u003C\u002Ftr>\n\n\u003Ctr>\n\u003Ctd width=\"50%\">\n\u003Ch3>🔌 MCP支持\u003C\u002Fh3>\n\u003Cp>使用MCP服务器上的工具。\u003C\u002Fp>\n\u003Cp>\n\u003Ca href=\"examples\u002Fvoice_agents\u002Fmcp\">代码\u003C\u002Fa>\n\u003C\u002Fp>\n\u003C\u002Ftd>\n\u003Ctd width=\"50%\">\n\u003Ch3>💬 文本专用智能体\u003C\u002Fh3>\n\u003Cp>完全跳过语音部分，将相同代码用于纯文本集成。\u003C\u002Fp>\n\u003Cp>\n\u003Ca href=\"examples\u002Fother\u002Ftext_only.py\">代码\u003C\u002Fa>\n\u003C\u002Fp>\n\u003C\u002Ftd>\n\u003C\u002Ftr>\n\n\u003Ctr>\n\u003Ctd width=\"50%\">\n\u003Ch3>📝 多用户转录器\u003C\u002Fh3>\n\u003Cp>为房间内所有用户生成转录文本。\u003C\u002Fp>\n\u003Cp>\n\u003Ca href=\"examples\u002Fother\u002Ftranscription\u002Fmulti-user-transcriber.py\">代码\u003C\u002Fa>\n\u003C\u002Fp>\n\u003C\u002Ftd>\n\u003Ctd width=\"50%\">\n\u003Ch3>🎥 视频化身\u003C\u002Fh3>\n\u003Cp>可以添加由Tavus、Hedra、Bithuman、LemonSlice等提供的AI化身。\u003C\u002Fp>\n\u003Cp>\n\u003Ca href=\"examples\u002Favatar_agents\u002F\">代码\u003C\u002Fa>\n\u003C\u002Fp>\n\u003C\u002Ftd>\n\u003C\u002Ftr>\n\n\u003Ctr>\n\u003Ctd width=\"50%\">\n\u003Ch3>🍽️ 餐厅点餐与预订\u003C\u002Fh3>\n\u003Cp>一个完整的餐厅接线员智能体示例。\u003C\u002Fp>\n\u003Cp>\n\u003Ca href=\"examples\u002Fvoice_agents\u002Frestaurant_agent.py\">代码\u003C\u002Fa>\n\u003C\u002Fp>\n\u003C\u002Ftd>\n\u003Ctd width=\"50%\">\n\u003Ch3>👁️ Gemini Live视觉\u003C\u002Fh3>\n\u003Cp>一个完整的Gemini Live视觉智能体示例（包括iOS应用）。\u003C\u002Fp>\n\u003Cp>\n\u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit-examples\u002Fvision-demo\">代码\u003C\u002Fa>\n\u003C\u002Fp>\n\u003C\u002Ftd>\n\u003C\u002Ftr>\n\n\u003C\u002Ftable>\n\n## 运行您的智能体\n\n### 在终端中测试\n\n```shell\npython myagent.py console\n```\n\n以终端模式运行您的智能体，支持本地音频输入和输出，便于测试。此模式无需外部服务器或依赖项，非常适合快速验证行为。\n\n### 使用LiveKit客户端开发\n\n```shell\npython myagent.py dev\n```\n\n启动智能体服务器，并在文件更改时启用热重载。此模式允许每个进程高效地托管多个并发智能体。\n\n智能体会连接到LiveKit Cloud或您自建的服务器。请设置以下环境变量：\n- LIVEKIT_URL\n- LIVEKIT_API_KEY\n- LIVEKIT_API_SECRET\n\n您可以使用任何LiveKit客户端SDK或电话集成进行连接。若想快速上手，不妨试试 [Agents Playground](https:\u002F\u002Fagents-playground.livekit.io\u002F)。\n\n### 生产环境运行\n\n```shell\npython myagent.py start\n```\n\n以生产就绪的优化方式运行智能体。\n\n## 贡献\n\nAgents框架正处于快速发展阶段，我们欢迎并感谢任何形式的贡献，无论是反馈、错误修复、新功能、新插件和工具，还是更好的文档。您可以在本仓库提交问题、打开PR，或在 [LiveKit社区](https:\u002F\u002Fdocs.livekit.io\u002Fintro\u002Fcommunity\u002F) 中与我们交流。\n\n### 开发环境配置\n\n本项目使用 [uv](https:\u002F\u002Fdocs.astral.sh\u002Fuv\u002F) 进行包管理。要安装开发所需的依赖，请运行以下命令：\n\n```shell\nuv sync --all-extras --dev\n```\n\n### 示例\n\n本项目在 [`examples`](examples\u002F) 目录中包含许多示例。要运行这些示例，首先创建 `examples\u002F.env` 文件，填入 LiveKit 服务器及所需模型提供商的凭据（请参考 `examples\u002F.env.example`），然后运行：\n\n```shell\nuv run examples\u002Fvoice_agents\u002Fbasic_agent.py dev\n```\n\n更多详细信息，请参阅 [示例 README](examples\u002FREADME.md)。\n\n### 测试\n\n单元测试位于 `tests` 目录下，可以通过以下命令运行：\n\n```shell\nuv run pytest tests\u002Ftest_tools.py\n```\n\n每个插件的集成测试需要各种 API 凭据，并且会在 GitHub CI 中自动运行，仅针对由项目维护者提交的 Pull Request。有关详细信息，请查看 [测试工作流](.github\u002Fworkflows\u002Ftests.yml)。\n\n### 代码格式化\n\n本项目使用 [ruff](https:\u002F\u002Fgithub.com\u002Fastral-sh\u002Fruff) 进行代码格式化和 lint 检查：\n\n```shell\nuv run ruff format\nuv run ruff check --fix\n```\n\n### 文档生成\n\n要使用 [pdoc](https:\u002F\u002Fgithub.com\u002Fpdoc3\u002Fpdoc) 在本地生成文档，请执行以下命令：\n\n```shell\nuv sync --all-extras --group docs\nuv run --active pdoc --skip-errors --html --output-dir=docs livekit\n```\n\n\u003C!--BEGIN_REPO_NAV-->\n\u003Cbr\u002F>\u003Ctable>\n\u003Cthead>\u003Ctr>\u003Cth colspan=\"2\">LiveKit 生态系统\u003C\u002Fth>\u003C\u002Ftr>\u003C\u002Fthead>\n\u003Ctbody>\n\u003Ctr>\u003Ctd>Agents SDKs\u003C\u002Ftd>\u003Ctd>\u003Cb>Python\u003C\u002Fb> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents-js\">Node.js\u003C\u002Fa>\u003C\u002Ftd>\u003C\u002Ftr>\u003Ctr>\u003C\u002Ftr>\n\u003Ctr>\u003Ctd>LiveKit SDKs\u003C\u002Ftd>\u003Ctd>\u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fclient-sdk-js\">浏览器\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fclient-sdk-swift\">Swift\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fclient-sdk-android\">Android\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fclient-sdk-flutter\">Flutter\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fclient-sdk-react-native\">React Native\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Frust-sdks\">Rust\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fnode-sdks\">Node.js\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fpython-sdks\">Python\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fclient-sdk-unity\">Unity\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fclient-sdk-unity-web\">Unity (WebGL)\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fclient-sdk-esp32\">ESP32\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fclient-sdk-cpp\">C++\u003C\u002Fa>\u003C\u002Ftd>\u003C\u002Ftr>\u003Ctr>\u003C\u002Ftr>\n\u003Ctr>\u003Ctd>入门应用\u003C\u002Ftd>\u003Ctd>\u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit-examples\u002Fagent-starter-python\">Python Agent\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit-examples\u002Fagent-starter-node\">TypeScript Agent\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit-examples\u002Fagent-starter-react\">React 应用\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit-examples\u002Fagent-starter-swift\">SwiftUI 应用\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit-examples\u002Fagent-starter-android\">Android 应用\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit-examples\u002Fagent-starter-flutter\">Flutter 应用\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit-examples\u002Fagent-starter-react-native\">React Native 应用\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit-examples\u002Fagent-starter-embed\">网页嵌入\u003C\u002Fa>\u003C\u002Ftd>\u003C\u002Ftr>\u003Ctr>\u003C\u002Ftr>\n\u003Ctr>\u003Ctd>UI 组件\u003C\u002Ftd>\u003Ctd>\u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fcomponents-js\">React\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fcomponents-android\">Android Compose\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fcomponents-swift\">SwiftUI\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fcomponents-flutter\">Flutter\u003C\u002Fa>\u003C\u002Ftd>\u003C\u002Ftr>\u003Ctr>\u003C\u002Ftr>\n\u003Ctr>\u003Ctd>服务器 API\u003C\u002Ftd>\u003Ctd>\u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fnode-sdks\">Node.js\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fserver-sdk-go\">Golang\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fserver-sdk-ruby\">Ruby\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fserver-sdk-kotlin\">Java\u002FKotlin\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fpython-sdks\">Python\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Frust-sdks\">Rust\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fagence104\u002Flivekit-server-sdk-php\">PHP（社区版）\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FpabloFuente\u002Flivekit-server-sdk-dotnet\">.NET（社区版）\u003C\u002Fa>\u003C\u002Ftd>\u003C\u002Ftr>\u003Ctr>\u003C\u002Ftr>\n\u003Ctr>\u003Ctd>资源\u003C\u002Ftd>\u003Ctd>\u003Ca href=\"https:\u002F\u002Fdocs.livekit.io\">文档\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fdocs.livekit.io\u002Fmcp\">MCP 服务器文档\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Flivekit-cli\">CLI 工具\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fcloud.livekit.io\">LiveKit 云服务\u003C\u002Fa>\u003C\u002Ftd>\u003C\u002Ftr>\u003Ctr>\u003C\u002Ftr>\n\u003Ctr>\u003Ctd>LiveKit 服务器开源项目\u003C\u002Ftd>\u003Ctd>\u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Flivekit\">LiveKit 服务器\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fegress\">Egress\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fingress\">Ingress\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Flivekit\u002Fsip\">SIP\u003C\u002Fa>\u003C\u002Ftd>\u003C\u002Ftr>\u003Ctr>\u003C\u002Ftr>\n\u003Ctr>\u003Ctd>社区\u003C\u002Ftd>\u003Ctd>\u003Ca href=\"https:\u002F\u002Fcommunity.livekit.io\">开发者社区\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Flivekit.io\u002Fjoin-slack\">Slack 社区\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fx.com\u002Flivekit\">X 平台\u003C\u002Fa> · \u003Ca href=\"https:\u002F\u002Fwww.youtube.com\u002F@livekit_io\">YouTube 频道\u003C\u002Fa>\u003C\u002Ftd>\u003C\u002Ftr>\n\u003C\u002Ftbody>\n\u003C\u002Ftable>\n\u003C!--END_REPO_NAV-->","# LiveKit Agents 快速上手指南\n\nLiveKit Agents 是一个用于构建实时、可编程语音代理的框架。它支持多模态交互（听、说、理解），可轻松集成各类 STT、LLM 和 TTS 模型，并具备原生的 WebRTC 和电话集成能力。\n\n## 环境准备\n\n在开始之前，请确保满足以下要求：\n\n- **操作系统**：Linux、macOS 或 Windows\n- **Python 版本**：3.10 或更高版本\n- **依赖项**：`pip` 包管理工具\n- **LiveKit 账户**：需要获取 `LIVEKIT_URL`、`LIVEKIT_API_KEY` 和 `LIVEKIT_API_SECRET`（可在 [LiveKit Cloud](https:\u002F\u002Fcloud.livekit.io) 免费注册获取，或自建 LiveKit 服务器）\n\n> **提示**：国内开发者若访问 PyPI 较慢，可使用清华或阿里云镜像源加速安装。\n\n## 安装步骤\n\n使用 `pip` 安装核心库及常用插件（包含 OpenAI、Silero VAD、Deepgram STT、Cartesia TTS 等）：\n\n```bash\npip install \"livekit-agents[openai,silero,deepgram,cartesia,turn-detector]~=1.4\" -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n```\n\n如需使用其他模型提供商，可根据需求调整括号内的插件名称。\n\n## 基本使用\n\n以下是一个最简单的语音代理示例，具备天气查询功能并能主动问候用户。\n\n### 1. 创建代理文件\n\n新建文件 `my_agent.py`，填入以下代码：\n\n```python\nfrom livekit.agents import (\n    Agent,\n    AgentServer,\n    AgentSession,\n    JobContext,\n    RunContext,\n    cli,\n    function_tool,\n    inference,\n)\nfrom livekit.plugins import silero\n\n\n@function_tool\nasync def lookup_weather(\n    context: RunContext,\n    location: str,\n):\n    \"\"\"Used to look up weather information.\"\"\"\n\n    return {\"weather\": \"sunny\", \"temperature\": 70}\n\n\nserver = AgentServer()\n\n\n@server.rtc_session()\nasync def entrypoint(ctx: JobContext):\n    session = AgentSession(\n        vad=silero.VAD.load(),\n        # 使用 LiveKit Inference 统一接口调用模型\n        stt=inference.STT(\"deepgram\u002Fnova-3\", language=\"multi\"),\n        llm=inference.LLM(\"openai\u002Fgpt-4.1-mini\"),\n        tts=inference.TTS(\"cartesia\u002Fsonic-3\", voice=\"9626c31c-bec5-4cca-baa8-f8ba9e84c8bc\"),\n    )\n\n    agent = Agent(\n        instructions=\"You are a friendly voice assistant built by LiveKit.\",\n        tools=[lookup_weather],\n    )\n\n    await session.start(agent=agent, room=ctx.room)\n    await session.generate_reply(instructions=\"greet the user and ask about their day\")\n\n\nif __name__ == \"__main__\":\n    cli.run_app(server)\n```\n\n### 2. 配置环境变量\n\n在终端中设置以下环境变量（替换为你的实际值）：\n\n```bash\nexport LIVEKIT_URL=wss:\u002F\u002Fyour-project.livekit.cloud\nexport LIVEKIT_API_KEY=your_api_key\nexport LIVEKIT_API_SECRET=your_api_secret\n```\n\n> Windows PowerShell 用户使用 `$env:LIVEKIT_URL=\"...\"` 格式。\n\n### 3. 运行代理\n\n在终端中启动代理并进行本地测试：\n\n```bash\npython my_agent.py console\n```\n\n该命令将以控制台模式运行，允许你通过麦克风输入语音并听到代理的语音回复，无需额外部署服务器即可快速验证功能。","一家初创医疗科技公司正在开发一款能通过电话为老年患者提供用药提醒和健康咨询的 24 小时语音助手。\n\n### 没有 agents 时\n- **延迟高且体验割裂**：自行拼接 STT、LLM 和 TTS 服务导致端到端响应延迟超过 2 秒，老人常因等待过久而重复说话，造成对话混乱。\n- **打断处理生硬**：缺乏智能的语义轮次检测，系统无法识别用户中途插话，经常自顾自说完预设台词，显得极不自然。\n- **电话接入复杂**：传统电话网络（PSTN）与 WebRTC 架构打通困难，需额外开发复杂的 SIP 网关适配层，耗时数周。\n- **多模态扩展受限**：若未来想增加“视频看诊”功能，需重构整个底层通信架构，无法平滑升级。\n\n### 使用 agents 后\n- **实时流畅交互**：agents 框架内置优化的流水线与边缘节点部署，将响应延迟压缩至毫秒级，老人话音刚落即可得到自然回应。\n- **智能语义打断**：利用内置的 Transformer 语义轮次检测模型，精准识别用户意图，支持随时自然插话，对话节奏如同真人交流。\n- **一键电话集成**：直接调用 agents 集成的 LiveKit 电话栈，无需编写底层代码即可实现从手机固话到 AI 助手的无缝连接。\n- **原生多模态支持**：基于 WebRTC 架构，仅需几行配置即可在语音基础上开启视频流，轻松演进为面对面视频健康顾问。\n\nagents 让开发者从繁琐的实时通信基建中解放出来，专注于构建真正懂人性、低延迟的医疗陪伴体验。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Flivekit_agents_28ff49a4.png","livekit","LiveKit","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Flivekit_203a61cb.png","Open source WebRTC and realtime AI infrastructure",null,"https:\u002F\u002Flivekit.io","https:\u002F\u002Fgithub.com\u002Flivekit",[80,84,88,92,96],{"name":81,"color":82,"percentage":83},"Python","#3572A5",98.8,{"name":85,"color":86,"percentage":87},"C","#555555",0.8,{"name":89,"color":90,"percentage":91},"Makefile","#427819",0.3,{"name":93,"color":94,"percentage":95},"C++","#f34b7d",0.1,{"name":97,"color":98,"percentage":95},"CMake","#DA3434",9962,2994,"2026-04-08T04:39:01","Apache-2.0","Linux, macOS, Windows","未说明 (主要依赖云端 API 或本地 CPU 推理，插件如 Silero VAD 通常可在 CPU 运行)","未说明",{"notes":107,"python":108,"dependencies":109},"该框架主要用于构建实时语音代理，默认通过 API 调用外部模型（如 OpenAI, Deepgram, Cartesia），因此对本地 GPU 无强制要求。运行需要配置 LiveKit 服务器环境变量 (LIVEKIT_URL, LIVEKIT_API_KEY, LIVEKIT_API_SECRET)。支持通过 pip 安装核心库及特定插件。包含内置的测试框架和 MCP 支持。","3.9+",[110,111,112,113,114,115],"livekit-agents~=1.4","livekit-plugins-openai","livekit-plugins-silero","livekit-plugins-deepgram","livekit-plugins-cartesia","livekit-plugins-turn-detector",[117,14,118,13,15],"音频","视频",[120,121,122,123,64,124],"ai","real-time","voice","video","openai","2026-03-27T02:49:30.150509","2026-04-08T17:09:22.175777",[128,133,138,143,148,152],{"id":129,"question_zh":130,"answer_zh":131,"source_url":132},24824,"升级后出现 'process is unresponsive, killing process' 错误导致应用崩溃怎么办？","该问题通常发生在 TTS 节点处理完最后一个音频帧后，进程在 59-60 秒（ping pong 超时）后被杀死。可能的原因包括：\n1. 在 ShutdownCallback 中执行了 aiohttp 请求导致阻塞。\n2. 会话未正常关闭（缺少 'session closed' 日志）。\n解决方案：\n- 检查并移除 ShutdownCallback 中的同步或阻塞网络请求。\n- 启用调试日志，确认是否有 'session closed' 记录。如果没有，说明会话未优雅关闭，需检查代码逻辑确保在 TTS 结束后正确终止会话。\n- 如果问题持续，可暂时回退到稳定版本（如 1.2.7）。","https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fissues\u002F3637",{"id":134,"question_zh":135,"answer_zh":136,"source_url":137},24825,"Agent 偶尔会对同一次用户输入生成两次重复的回复（LLM 推理 + TTS 音频），如何解决？","这是一个已知问题，尤其在旧版本中较为常见。表现为第二次 LLM 推理通常在第一次 TTS 完成后才触发，且可能使用完整的用户输入。\n建议方案：\n- 由于该 Issue 创建时间较早且版本差异巨大，维护者建议不要在此处追踪，而是使用最新版本复现并提交带有详细日志的新 Issue。\n- 检查是否开启了 allow_interruptions，并确认 VAD（语音活动检测）配置是否正确，避免将同一句话误判为多次输入。\n- 确保在使用 Gemini 或 GPT-4o 等模型时，SDK 已更新至最新以修复潜在的竞态条件。","https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fissues\u002F323",{"id":139,"question_zh":140,"answer_zh":141,"source_url":142},24826,"使用 MultimodalAgent 调用函数时报错 'Conversation already has an active response' 怎么处理？","此错误通常发生在 OpenAI Realtime API 在用户说完话之前已经自动生成回复，而代码随后又尝试手动生成回复（generate_reply）时产生冲突。\n解决方案：\n1. 移除 on_enter 事件中的 generate_reply 调用。\n2. 或者，在调用 generate_reply 之前先执行 session.interrupt() 来中断当前正在进行的自动回复。\n代码示例：\nawait self.session.interrupt()\nawait self.session.generate_reply(...)","https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fissues\u002F1056",{"id":144,"question_zh":145,"answer_zh":146,"source_url":147},24827,"在 Gemini Realtime 中调用工具函数时，await self.session.generate_reply 不工作或报错怎么办？","在 Gemini Realtime 模式下，如果在工具函数返回前想要主动生成回复，直接 await 可能会导致错误或时序问题。\n推荐的解决模式如下：\n1. 正常初始化 realtime llm 和 agent。\n2. 启动会话：await self.session.start(...)\n3. 关键步骤：立即调用 self.session.interrupt() 中断可能由系统自动触发的初始回复。\n4. 然后执行带指令的回复生成：await self.session.generate_reply(instructions=self.config.instructions)\n这样可以确保在工具调用前后正确控制对话流。","https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fissues\u002F2165",{"id":149,"question_zh":150,"answer_zh":151,"source_url":132},24828,"如何诊断 Agent 进程阻塞导致无法启动新会话的问题？","如果观察到整个 Agent 进程阻塞一段时间，期间无法启动新会话，并伴随 'connection closed unexpectedly' 或 recv_task 错误，可能是底层插件（如 STT 服务）连接异常断开导致的死锁。\n排查步骤：\n1. 检查日志中是否有特定插件（如 aliyun\u002Fstt.py）抛出的 APIStatusError。\n2. 确认是否在 ShutdownCallback 或其他清理阶段执行了耗时的网络请求（如 aiohttp），这会阻塞事件循环。\n3. 确保所有异步操作都正确使用了 await，避免同步阻塞代码块。",{"id":153,"question_zh":154,"answer_zh":155,"source_url":142},24829,"Azure OpenAI Realtime 模型是否也适用 'Conversation already has an active response' 的修复方案？","是的，虽然该错误最初在 OpenAI S2S 中被报告，但其根本原因是并发响应冲突。对于 Azure OpenAI Realtime 模型，同样建议在手动触发回复生成前，先调用 session.interrupt() 以确保清除任何挂起的自动响应，从而避免 'Conversation already has an active response' 错误。",[157,162,167,172,177,182,187,192,197,202,207,212,217,222,227,232,237,242,247,252],{"id":158,"version":159,"summary_zh":160,"released_at":161},154322,"livekit-agents@1.3.9","## What's Changed\r\n* chore(xai): update voice names according to docs by @davidzhao in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4295\r\n* Add local dev commands for linking to rtc-sdk by @lukasIO in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4258\r\n* Ensure makefile checks for livekit_lib_path by @lukasIO in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4298\r\n* feat(gemini3) : Add Gemini 3 support with thinking_level and thought_signature by @varghesepaul in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4027\r\n* fix list mutation during iteration by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4304\r\n* add gemini 3 flash model by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4301\r\n* Websockets improvement by @cshape in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4303\r\n* Allow for Cartesia TTS language auto-detection by @yuyuma in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4300\r\n* Add Amazon Nova 2.0 Sonic Support with Text Input and Enhanced Features by @kachenjr in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4176\r\n* fix dynamic FieldInfo for pydantic 2.12 by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4290\r\n* re-export TurnDetection for xAI by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4306\r\n* fix commit_user_turn when last_final_transcript_time is None by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4308\r\n* feat(soniox): add language_hints_strict option for STT by @cateet in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4281\r\n* feat(google-tts): add prompt to normal synthesize for Gemini TTS by @NXV5111 in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4208\r\n* Adding extra content to OpenAI LLM. Improving function call grouping. by @russellmartin-livekit in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4170\r\n* feat(gemini3) use low latency thinking_level by default for gemini 3 models by @pushkar-nurix in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4311\r\n* fix handoff to Realtime model with existing session context by @davidzhao in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4310\r\n* tts metrics update by @dhruvladia-sarvam in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4117\r\n* AGT-2302: add aligned_transcript to STT by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4155\r\n* Minor readme doc fixes by @kachenjr in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4320\r\n* vad enabled by @dhruvladia-sarvam in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4321\r\n* handle exceptions in task_results by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4323\r\n* add `livekit-durable` functions by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4272\r\n* fix py3.10-py3.12 &`livekit-durable` cibw by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4324\r\n* stringify cartesia error to be pickleable by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4328\r\n* livekit-agents 1.3.9 by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4329\r\n\r\n## New Contributors\r\n* @varghesepaul made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4027\r\n* @NXV5111 made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4208\r\n* @russellmartin-livekit made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4170\r\n* @dhruvladia-sarvam made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4117\r\n\r\n**Full Changelog**: https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fcompare\u002Flivekit-agents@1.3.8...livekit-agents@1.3.9","2025-12-19T06:27:05",{"id":163,"version":164,"summary_zh":165,"released_at":166},154323,"livekit-agents@1.3.8","## What's Changed\r\n* add init for xai by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4286\r\n* fix(xai): list openai as a dependency, fix exports by @davidzhao in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4287\r\n* fix(xai): a few more exports by @davidzhao in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4288\r\n* Update default model by @gyang-xai in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4289\r\n* chore(xai): list supported voices by @davidzhao in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4292\r\n\r\n## New Contributors\r\n* @gyang-xai made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4289\r\n\r\n**Full Changelog**: https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fcompare\u002Flivekit-agents@1.3.7...livekit-agents@1.3.8","2025-12-17T06:28:24",{"id":168,"version":169,"summary_zh":170,"released_at":171},154324,"livekit-agents@1.3.7","## What's Changed\r\n* fix OTEL types by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4164\r\n* feat(background-audio): add several builtin audio clips by @rektdeckard in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4165\r\n* fix gemini function tool parameter enum typing by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4166\r\n* use inference gateway in the readme by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F3665\r\n* update warm transfer readme and extra instructions by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4168\r\n* terminate on `JobRequest.reject` by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4172\r\n* add terminate argument to JobRequest.reject by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4173\r\n* update documentation link for LiveAvatar by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4178\r\n* fix logging style format is not respected by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4169\r\n* fix _on_reject when no answer by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4180\r\n* expose elevenlabs TTS error message by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4182\r\n* fix(aws): Handle nested schema in Nova Sonic tool parameter extraction by @somoore in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4177\r\n* Restore otel chat message by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4118\r\n* fix record.exc_info is not pickable when using LogQueueHandler by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4185\r\n* Feat\u002Fmistralai models update by @fabitokki in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4156\r\n* feat(rime): expand update_options to accept all TTS parameters by @gokuljs in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4095\r\n* Fallback API for Inference by @adrian-cowham in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4099\r\n* Add LiveAvatar Stop Session API Call + README Fix by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4195\r\n* feat(google): add streaming support for Gemini TTS models by @plumber0 in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4189\r\n* fix watchfiles prevent agent prcoess exit on sigterm by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4194\r\n* fix race condition when stop background audio play handle by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4197\r\n* Inference: Rename fallback model name param by @adrian-cowham in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4202\r\n* fix inworld punctuation handling by @cshape in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4215\r\n* ensure playback_segments_count is consistent in the audio output chain by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4211\r\n* clear _q_updated right after await to avoid race conditions by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4209\r\n* fix blocked send task in liveavatar plugin by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4214\r\n* feat(warm-transfer): add sip_number parameter for outbound caller ID by @Hormold in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4216\r\n* add keep alive task for liveavatar plugin by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4231\r\n* turn-detector: remove english model from readme by @lwestn in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4233\r\n* feature: GPT-5.2 support by @pushkar-nurix in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4235\r\n* disable interruptions for agent greeting by @hiroshihorie in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4223\r\n* AGT-2328: negative threshold in silero by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4228\r\n* fix: image token usage not being tracked for OpenAI realtime models by @GigaDroid in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4238\r\n* check for type key in _ensure_strict_json_schema by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4236\r\n* fix(openai): migrate realtime STT to GA API by @Hormold in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4232\r\n* fix(google): handle content blocking and generation failures by @davidzhao in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4249\r\n* feat(google): update default realtime model to gemini-2.5 12-2025 by @davidzhao in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4248\r\n* fix generate_reply timeout for gemini by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4237\r\n* fix: correct sample count calculation in AudioByteStream.flush() for multi-channel audio by @darshankparmar in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4245\r\n* Fix AudioByteStream buffer slicing performance issue by @darshankparmar in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4247\r\n* AGT-2317: wait for user silence before speaking by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4102\r\n* Add Proactive Session Recycling for Nova Sonic resume by @kachenjr in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4250\r\n* feat(tts): Support dynamic base URL updates via update_options in Rime TTS plugin by @gokuljs in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4257\r\n* Auto assign reviewer for internal PRs by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4230\r\n* fix(aws): set aw","2025-12-16T22:06:48",{"id":173,"version":174,"summary_zh":175,"released_at":176},154308,"livekit-agents@1.5.1","> [!NOTE]  \r\n> **livekit-agents 1.5 引入了许多新功能。您可以在[这里](https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Freleases\u002Ftag\u002Flivekit-agents%401.5.0)查看变更日志。**\r\n\r\n\r\n## 变更内容\r\n* 修复 Azure OpenAI 实时支持，并由 @theomonnom 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5168 中添加实时模型测试\r\n* 修复（core）：由于合并不当导致的版本不匹配问题，由 @chenghao-mou 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5176 中修复\r\n* 修复（turn-detector）：放宽 transformers 的上限以支持 5.x 版本，由 @gdoermann 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5174 中完成\r\n* （gladia & soniox）：添加翻译支持，由 @tinalenguyen 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5148 中实现\r\n* 功能（agents）：支持 LIVEKIT_OBSERVABILITY_URL 用于自定义可观ility 端点，由 @theomonnom 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5179 中实现\r\n* （xai tts）：更新 WebSocket 端点，由 @tinalenguyen 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5180 中完成\r\n* 修复（core）：在 room IO 中恢复聊天主题支持，由 @chenghao-mou 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5181 中修复\r\n* 在任务组中总结之前取消跳过工具调用项，由 @toubatbrian 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5169 中完成\r\n* 为可观ility 添加 SessionReport 中的 sdk_version，由 @theomonnom 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5182 中实现\r\n* 功能（hamming）：添加 hamming 监控插件包，由 @duchammingai 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5135 中实现\r\n* 杂项（mypy）：在类型检查中启用 mypy 缓存，由 @chenghao-mou 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5192 中完成\r\n* 修复：公开 Chirp 3 Google STT 端点的灵敏度，由 @karlsonlee-livekit 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5196 中完成\r\n* 添加 MCPToolset，由 @longcw 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5138 中实现\r\n* 功能（personaplex 插件），由 @milanperovic 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4660 中实现\r\n* 修复：在 OpenAI 插件中跳过冗余的实时事件，由 @theomonnom 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5204 中完成\r\n* 功能：默认在 RoomInput 音频上启用 AGC，由 @theomonnom 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5185 中实现\r\n* 将最低 livekit SDK 版本提升至 1.1.3，由 @theomonnom 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5206 中完成\r\n* livekit-agents 1.5.1，由 @theomonnom 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5207 中发布\r\n\r\n## 新贡献者\r\n* @duchammingai 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5135 中完成了首次贡献\r\n* @karlsonlee-livekit 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5196 中完成了首次贡献\r\n* @milanperovic 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4660 中完成了首次贡献\r\n\r\n**完整变更日志**：https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fcompare\u002Flivekit-agents@1.5.0...livekit-agents@1.5.1","2026-03-23T22:52:43",{"id":178,"version":179,"summary_zh":180,"released_at":181},154309,"livekit-agents@1.5.0","## 亮点\n\n### 自适应打断处理\n\nv1.5.0 的核心功能：基于音频的机器学习模型，能够区分真实的用户打断与附带声音，例如回应性语气词（“嗯嗯”）、咳嗽、叹息或背景噪音。该功能默认启用，无需任何配置。\n\n关键指标：\n- 在 500 毫秒重叠语音情况下，**精确率 86%** 且 **召回率 100%**\n- 可拒绝 **51%** 的传统 VAD 误报\n- 检测真实打断的速度比仅使用 VAD 快 **64%**\n- 推理耗时不超过 **30 毫秒**\n\n当检测到误打断时，代理会自动从上次中断处继续播放内容，无需重新生成。\n\n如需禁用此功能并仅使用 VAD 进行打断检测：\n\n```python\nsession = AgentSession(\n    ...\n    turn_handling=TurnHandlingOptions(\n        interruption={\n            \"mode\": \"vad\",\n        },\n    ),\n)\n```\n\n博客文章：https:\u002F\u002Flivekit.com\u002Fblog\u002Fadaptive-interruption-handling\n\n### 动态端点检测\n\n端点检测的延迟现在会根据每次对话的自然节奏进行自适应调整。代理不再使用固定的静默阈值，而是通过暂停时长的指数移动平均值来动态判断用户的发言是否结束。\n\n```python\nsession = AgentSession(\n    ...\n    turn_handling=TurnHandlingOptions(\n        endpointing={\n            \"mode\": \"dynamic\",\n            \"min_delay\": 0.3,\n            \"max_delay\": 3.0,\n        },\n    ),\n)\n```\n\n### 新的 `TurnHandlingOptions` API\n\n端点检测和打断处理的相关设置现已整合到一个统一的 `TurnHandlingOptions` 字典中，并传递给 `AgentSession`。旧的关键字参数（如 `min_endpointing_delay`、`allow_interruptions` 等）仍可使用，但已被弃用，并会在运行时发出警告。\n\n```python\nsession = AgentSession(\n    turn_handling={\n        \"turn_detection\": \"vad\",\n        \"endpointing\": {\"min_delay\": 0.5, \"max_delay\": 3.0},\n        \"interruption\": {\"enabled\": True, \"mode\": \"adaptive\"},\n    },\n)\n```\n\n### 会话用量跟踪\n\n新增的 `SessionUsageUpdatedEvent` 提供结构化的按模型划分的用量数据——包括 token 数量、字符数和音频时长，并按服务提供商和模型细分：\n\n```python\n@session.on(\"session_usage_updated\")\ndef on_usage(ev: SessionUsageUpdatedEvent):\n    for usage in ev.usage.model_usage:\n        print(f\"{usage.provider}\u002F{usage.model}: {usage}\")\n```\n\n用量类型包括：`LLMModelUsage`、`TTSModelUsage`、`STTModelUsage` 和 `InterruptionModelUsage`。\n\n您还可以随时通过 `session.usage` 属性访问汇总用量：\n\n```python\nusage = session.usage\nfor model_usage in usage.model_usage:\n    print(model_usage)\n```\n\n用量数据同样包含在 `SessionReport` 中（通过 `model_usage` 字段），因此可在会话结束后直接用于遥测和报告。\n\n### 每轮延迟信息添加至 `ChatMessage.metrics`\n\n每个 `ChatMessage` 现在都包含一个 `metrics` 字段（`MetricsReport`），其中记录了每轮的延迟数据：\n- `transcription_delay` — 获取转录所需的时间","2026-03-19T17:01:15",{"id":183,"version":184,"summary_zh":185,"released_at":186},154310,"livekit-agents@1.4.6","## 变更内容\n* 修复(types)：在 is_given 中将 TypeGuard 替换为 TypeIs，以实现双向收窄，由 @longcw 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5079 中完成。\n* [inworld] WebSocket 的 _recv_loop 现在会立即刷新音频流，由 @ianbbqzy 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5071 中完成。\n* 修复：在可为空的枚举模式中，将 `null` 包含到枚举数组中，由 @MSameerAbbas 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5080 中完成。\n* (OpenAI 聊天补全)：当存在函数工具时，移除 reasoning_effort 参数，由 @tinalenguyen 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5088 中完成。\n* (Google 实时语音识别)：替换已弃用的 mediaChunks，由 @tinalenguyen 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5089 中完成。\n* 修复：当函数没有参数时，省略工具模式中的 `required` 字段，由 @longcw 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5082 中完成。\n* 修复(sarvam-tts)：将 mime_type 从 audio\u002Fmp3 更正为 audio\u002Fwav，由 @shmundada93 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5086 中完成。\n* 为 SIP 端点转移添加 trunk_config 到 WarmTransferTask 中，由 @longcw 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5016 中完成。\n* 医疗保健示例，由 @tinalenguyen 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5031 中完成。\n* 修复(openai)：仅在待处理的工具调用完成后才重用 previous_response_id，由 @longcw 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5094 中完成。\n* 功能(assemblyai)：添加说话人分离支持，由 @dlange-aai 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5074 中完成。\n* 修复：防止 _cancel_speech_pause 污染后续用户轮次，由 @giulio-leone 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5101 中完成。\n* 功能(google)：在 STT 和 TTS 的 credentials_file 中支持通用凭据类型，由 @rafallezanko 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5056 中完成。\n* 添加 Murf AI - TTS 插件支持，由 @gaurav-murf 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F3000 中完成。\n* 功能(voice)：添加可调用的 TextTransforms 支持，并内置 replace 转换功能，由 @longcw 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5104 中完成。\n* 修复(eou)：仅在没有新语音时才重置语音\u002F讲话时间，由 @chenghao-mou 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5083 中完成。\n* (xai)：添加 tts 功能，由 @tinalenguyen 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5120 中完成。\n* (xai tts)：添加语言参数，由 @tinalenguyen 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5122 中完成。\n* livekit-agents 1.4.6，由 @theomonnom 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5123 中发布。\n\n## 新贡献者\n* @shmundada93 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5086 中完成了首次贡献。\n* @dlange-aai 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5074 中完成了首次贡献。\n* @gaurav-murf 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F3000 中完成了首次贡献。\n\n**完整变更日志**：https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fcompare\u002Flivekit-agents@1.4.5...livekit-agents@1.4.6","2026-03-16T19:09:09",{"id":188,"version":189,"summary_zh":190,"released_at":191},154311,"livekit-agents@1.4.5","## 变更内容\n* 在使用 LemonSlice 头像时，将额外参数透传至 LemonSlice，由 @jp-lemon 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4984 中实现。\n* 修复 (Anthropic)：为 Claude 4.6+ 的末尾助手轮次添加占位用户消息，由 @giulio-leone 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4973 中实现。\n* (keyframe)：移除 py.typed 中的空白字符，由 @tinalenguyen 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4990 中实现。\n* 将 Phonic 插件添加到 LiveKit 代理中，由 @qionghuang6 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4980 中实现。\n* 修复数据轨道中内容的 E2EE 加密问题，由 @zelidrag-arbo 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4992 中实现。\n* 修复：当工具在 llm_node 内部被修改时，重新同步工具上下文，由 @longcw 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4994 中实现。\n* [🤖 readme-manager] 更新 README，由 @ladvoc 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4996 中实现。\n* 修复 (Google)：防止 function_call 文本泄露到 TTS 输出中，由 @BkSouX 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4999 中实现。\n* (OpenAI 响应)：添加 WebSocket 连接池，由 @tinalenguyen 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4985 中实现。\n* (OpenAI TTS)：关闭 OpenAI 客户端连接，由 @tinalenguyen 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5012 中实现。\n* NVIDIA STT：添加说话人分离支持，由 @longcw 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4997 中实现。\n* 当未设置 TTS 时更新错误信息，由 @longcw 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4998 中实现。\n* 在 init 中初始化 interval future，由 @tinalenguyen 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5013 中实现。\n* 修复\u002Felevenlabs 更新默认语音为非过期状态，由 @yusuf-eren 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5010 中实现。\n* [Inworld] 每次从服务器接收到音频块时，都进行刷新以清空解码器缓冲区，由 @ianbbqzy 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4983 中实现。\n* (Google)：支持通过实时和 LLM 传递凭据，由 @tinalenguyen 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5015 中实现。\n* 使用免费层级用户可访问的默认语音，由 @tmshapland 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5020 中实现。\n* 修改 commit_user_turn() 函数，使其返回包含音频转录的 Future，由 @longcw 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5019 中实现。\n* 将 GPT-5.4 添加到 OpenAI 插件中，由 @Topherhindman 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5022 中实现。\n* 生成并上传 Markdown 文档，由 @Topherhindman 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4993 中实现。\n* 添加对 GPT-5.4 和 GPT-5.3 Chat Latest 的支持，由 @Topherhindman 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5030 中实现。\n* 提升 Cartesia TTS 插件的音频生成质量，由 @tycartesia 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5032 中实现。\n* 修复 (elevenlabs)：处理 _to_timed_words 中的空词问题，由 @MonkeyLeeT 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5036 中实现。\n* 修复 (Deepgram)：为 STT v2 替代方案包含词语置信度，由 @inickt 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F5034 中实现。\n* 修复：当达到 max_tool_steps 时，生成最终的 LLM 响应，由 @IanSteno 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4747 中实现。\n* 修复：在语音代理调度中防范负数睡眠时长，由","2026-03-11T06:45:50",{"id":193,"version":194,"summary_zh":195,"released_at":196},154312,"livekit-agents@1.4.4","## 变更内容\n* 升级 Cartesia TTS 默认为 Sonic 3，由 @chongzluong 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4922 中完成\n* (google stt): 添加去噪器支持及显式适配参数，由 @tinalenguyen 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4918 中完成\n* 功能：添加 Telnyx STT 和 TTS 插件，由 @fmv1992 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4665 中完成\n* 功能：添加 livekit-plugins-sambanova 插件，支持 LLM，由 @mahimairaja 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4910 中完成\n* 跳过在运行结果完成后添加运行事件的操作，由 @longcw 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4925 中完成\n* 防止在恢复 AgentTask 的 allow_interruptions 时出现 RuntimeError，由 @longcw 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4930 中完成\n* 添加对 Gradium 发音 ID 的支持，由 @LaurentMazare 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4932 中完成\n* 功能：优化 WAV 解码，由 @davidzhao 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4905 中完成\n* 修复：在关闭 LogQueueListener 前清空缓冲的日志记录，由 @longcw 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4928 中完成\n* 修复（voice）：对于未知函数调用返回 ToolError，而不是 si…，由 @yusuf-eren 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4935 中完成\n* 更新 README，加入 mcp 和技能相关信息，由 @Topherhindman 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4937 中完成\n* 修复：将 HttpServer 迁移到 AppRunner，以正确管理连接生命周期，由 @longcw 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4945 中完成\n* 忽略来自 xai realtime 的未知工具，由 @longcw 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4941 中完成\n* soniox stt：从 token 元数据中填充时间信息和置信度，由 @longcw 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4939 中完成\n* 修复（openai）：在 update_chat_ctx 中保留实时模型的非指令系统消息，由 @longcw 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4942 中完成\n* 功能（openai）：将 gpt-realtime-1.5 添加到 RealtimeModels，由 @yusuf-eren 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4947 中完成\n* 统一语言处理方式，由 @davidzhao 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4926 中完成\n* 修复：避免在 _load_task 中无条件调用 psutil 导致事件循环阻塞，由 @msaelices 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4946 中完成\n* 添加 AEC 预热功能，以抑制首次语音中的误中断，由 @longcw 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4813 中完成\n* 初始版本，由 @dhruvladia-sarvam 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4923 中完成\n* 修复控制台模式下 asyncio.Future 崩溃问题，由 @davidzhao 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4952 中完成\n* 修复（11labs）：对于 CJK 字符集，默认使用原始对齐方式，由 @chenghao-mou 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4968 中完成\n* 支持 OpenAI 响应的 WebSocket 模式，由 @tinalenguyen 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4931 中完成\n* Keyframe Labs 插件，由 @kradkfl 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4950 中完成\n* 热修复：修复 `agent_worker.py` 中的导入问题，由 @kradkfl 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4970 中完成\n* 功能（stt）：在 Elevenlabs STT 插件中添加 keyterms 参数，由 @Arjun-A-I 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fage 中完成","2026-03-03T01:13:00",{"id":198,"version":199,"summary_zh":200,"released_at":201},154313,"livekit-agents@1.4.3","## 变更内容\n* 修复：在浏览器导航 RPC 中使用 data.payload，由 @theomonnom 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4871 中完成。\n* 调整 Speechmatics STT 的依赖版本要求，由 @sam-s10s 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4873 中完成。\n* 当为 Neuphonic 提供 JWT 令牌时不再抛出错误，由 @alexshelkov 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4874 中完成。\n* 修复：在实时会话中保留 FunctionCall 中的 OpenAI 项 ID，由 @StianHanssen 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4876 中完成。\n* 在会话关闭时优雅地停止 AgentTask 及其父代理，由 @longcw 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4730 中完成。\n* 新特性：向 WarmTransferTask 添加 sip_headers 参数，由 @theomonnom 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4890 中完成。\n* 向 AssemblyAI STT 插件添加 vad_threshold 参数，由 @AhmadIbrahiim 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4880 中完成。\n* 更新 Simli 集成端点，由 @Antonyesk601 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4894 中完成。\n* 将默认的得来速 LLM 模型升级为 gpt-5 mini，由 @chenghao-mou 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4897 中完成。\n* 杂项：移除将于 2026 年 3 月 19 日弃用的模型，由 @chenghao-mou 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4895 中完成。\n* 修复：当录制功能被禁用时，跳过 OTLP 日志导出器的设置，由 @theomonnom 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4892 中完成。\n* 移除推理模型中不支持的参数，由 @theomonnom 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4908 中完成。\n* 杂项：更新异步 API 基础 URL 和默认模型名称，由 @ashotbagh 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4896 中完成。\n* (inworld tts)：修复输出发射器的刷新问题，由 @tinalenguyen 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4912 中完成。\n* 在控制台模式下显示轮次指标，由 @theomonnom 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4916 中完成。\n* 修复（Google）：针对被阻止等情况抛出正确的错误，由 @davidzhao 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4917 中完成。\n* 支持在 livekit-plugins-browser 中使用 Claude Computer，由 @theomonnom 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4882 中完成。\n* livekit-agents 1.4.3，由 @theomonnom 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4920 中完成。\n\n## 新贡献者\n* @StianHanssen 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4876 中完成了首次贡献。\n\n**完整变更日志**：https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fcompare\u002Fbrowser-v0.1.4...livekit-agents@1.4.3","2026-02-23T04:07:10",{"id":203,"version":204,"summary_zh":205,"released_at":206},154314,"browser-v0.1.4","LiveKit Browser v0.1.4 的 CEF 原生二进制文件。支持 macOS arm64、Linux x64 和 Linux arm64 上的 Python 3.12 至 3.14。","2026-02-17T03:27:09",{"id":208,"version":209,"summary_zh":210,"released_at":211},154315,"livekit-agents@1.4.2","以稳定性为核心的版本发布，带来了显著的可靠性提升。修复了进程池中的多个内存泄漏问题：取消任务时作业计数器泄漏、超时后待处理分配泄漏、启动失败时套接字泄漏，以及发送失败时孤立执行器泄漏。IPC 管道的可靠性也得到了增强，并解决了若干边缘情况下的死锁问题（参与者始终无法加入、Ctrl+C 信号未能传递到子进程）。STT\u002FTTS 的回退行为 now 更加健壮：STT 回退在恢复过程中会正确跳过主流，而 TTS 回退不再跨流共享重采样器。其他修复包括：ChatContext.truncate 现在不会丢弃开发者消息；正确解析 cgroups v2 的 CPU 配额；确保 on_session_end 回调的正确顺序；即使会话无法启动，日志也能正常上传。工作进程现在会在排空或满载时自动拒绝新任务，且进程池在高负载下也能正确创建新进程。\n\n### 新的 `RecordingOptions` API\n\n`AgentSession.start()` 方法中的 `record` 参数现在除了接受布尔值外，还支持更细粒度的选项。所有未指定的键默认为 `True`。\n\n```python\n# 录制所有内容（默认）\nawait session.start(agent, record=True)\n\n# 不录制任何内容\nawait session.start(agent, record=False)\n\n# 细粒度配置：仅录制音频，但禁用跟踪信息、日志和转录\nawait session.start(agent, record={\"audio\": True, \"traces\": False, \"logs\": False, \"transcript\": False})\n```\n\n## 变更内容\n* 修复多声道输入在语速调整时的问题，由 @theomonnom 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4740 中完成\n* livekit-agents 1.4.1，由 @theomonnom 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4742 中发布\n* 修复 ruff 检查及类型检查，由 @theomonnom 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4743 中完成\n* 将 camb 插件更名为 cambai，由 @tinalenguyen 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4744 中完成\n* 修复 ruff 报错，由 @davidzhao 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4749 中完成\n* （liveavatar）：将头像模式从 CUSTOM 改为 LITE，由 @tinalenguyen 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4748 中完成\n* sarvam v3：STT 和 TTS 模型，由 @dhruvladia-sarvam 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4603 中完成\n* 导出 ToolContext，由 @theomonnom 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4750 中完成\n* 修复拼写错误：“occured”修正为“occurred”，由 @thecaptain789 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4751 中完成\n* 修复拼写错误：“dont't”修正为“don't”，由 @thecaptain789 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4752 中完成\n* 为 Neuphonic 添加 jwt_token 认证选项，由 @alexshelkov 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4734 中完成\n* 修复 py3.14 中 get_event_loop 的问题，由 @theomonnom 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4757 中完成\n* 功能性改进：添加缺失的 OpenTelemetry GenAI 属性（gen_ai.provider.name、gen_ai.operation.name），由 @Mr-Neutr0n 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4759 中完成\n* 为 SpeechHandle 添加 input_details，由 @longcw 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4701 中完成\n* 抑制 tee aclose 异常，由 @chenghao-mou 在 https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4766 中完成\n* 修复 3.14 的语法警告，由","2026-02-17T03:17:44",{"id":213,"version":214,"summary_zh":215,"released_at":216},154316,"browser-v0.1.3","LiveKit Browser v0.1.3 的 CEF 原生二进制文件。支持 macOS arm64、Linux x64 和 Linux arm64 上的 Python 3.12 至 3.14。","2026-02-16T22:41:51",{"id":218,"version":219,"summary_zh":220,"released_at":221},154317,"browser-v0.1.2","LiveKit Browser v0.1.2 的 CEF 原生二进制文件。支持 macOS arm64、Linux x64 和 Linux arm64 上的 Python 3.12 至 3.14。","2026-02-16T05:40:51",{"id":223,"version":224,"summary_zh":225,"released_at":226},154318,"livekit-agents@1.4.0","## Python 3.14 Support & Python 3.9 Dropped\r\n\r\nThis release adds **Python 3.14 support** and **drops Python 3.9**. The minimum supported version is now **Python 3.10**.\r\n\r\n## Tool Improvements\r\n\r\nTools and toolsets now have **stable unique IDs**, making it possible to reference and filter tools programmatically. Changes to agent configuration (instructions, tools) are now tracked in conversation history via `AgentConfigUpdate`.\r\n\r\n## `LLMStream.collect()` API\r\n\r\nA new `LLMStream.collect()` API makes it significantly easier to use LLMs outside of `AgentSession`. You can now call an LLM, collect the full response, and execute tool calls with a straightforward API — useful for background tasks, pre-processing, or any workflow where you need LLM capabilities without the full voice agent pipeline.\r\n\r\n```python\r\nfrom livekit.agents import llm\r\n\r\nresponse = await my_llm.chat(chat_ctx=ctx, tools=tools).collect()\r\n\r\nfor tc in response.tool_calls:\r\n    result = await llm.execute_function_call(tc, tool_ctx)\r\n    ctx.insert(result.fnc_call)\r\n    if result.fnc_call_out:\r\n        ctx.insert(result.fnc_call_out)\r\n```\r\n\r\n## Manual Turn Detection for Realtime Models\r\n\r\nRealtime models now support `commit_user_turn`, enabling `turn_detection=\"manual\"` mode. This gives you full control over when user turns are committed — useful for push-to-talk interfaces or scenarios where automatic VAD-based turn detection isn't ideal.\r\n\r\n```python\r\n@ctx.room.local_participant.register_rpc_method(\"end_turn\")\r\nasync def end_turn(data: rtc.RpcInvocationData):\r\n    session.input.set_audio_enabled(False)\r\n    session.commit_user_turn(\r\n        transcript_timeout=10.0,\r\n        stt_flush_duration=2.0,\r\n    )\r\n```\r\n\r\n## Job Migration on Reconnection\r\n\r\nWhen the agent server temporarily loses connection and reconnects, **active jobs are now automatically migrated** rather than being dropped. This significantly improves reliability during transient network issues.\r\n\r\n## False Interruption Fix\r\n\r\nFixed a bug where late end-of-speech events could trigger duplicate false interruption timers, causing the agent to incorrectly stop speaking. The agent now properly deduplicates these events and tracks STT completion state more reliably.\r\n\r\n### New Providers & Plugins\r\n\r\n- **xAI Responses LLM** — Use xAI's Responses API via `xai.responses.LLM()`\r\n- **Azure OpenAI Responses** — Azure-hosted Responses API via `azure.responses.LLM()`, with support for deployments and Azure auth\r\n- **Camb.ai TTS** — New TTS plugin powered by the MARS model family (mars-flash, mars-pro, mars-instruct), with voice selection, language control, and style instructions\r\n- **Avatario Avatar** — Virtual avatar plugin with session management and API client\r\n\r\n## What's Changed\r\n* feat(azure\u002Fstt): TrueText post processing option added to STTOptions by @rafallezanko in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4557\r\n* chore(README): remove STT and LLM API key configuration from LemonSlice example as not needed. by @codeSTACKr in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4589\r\n* fix: Add thread-safe initialization to _DefaultLoadCalc singleton by @darshankparmar in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4585\r\n* add missing plugins to dependencies by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4593\r\n* _setup_cloud_tracer still overrides TracerProviders due to checking the wrong base class by @hudson-worden in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4584\r\n* fix(google): add thought_signature support for Gemini 2.5 models by @gdoermann in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4595\r\n* remove shortcut inference STT model name by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4594\r\n* Increase read_bufsize in minimax tts plugin by @jose-speak in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4590\r\n* refactor(rtzr): FlushSentinel-based segment control and type safety improvements by @kimdwkimdw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4565\r\n* improve EndCallTool by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4563\r\n* Fix: Add 'required' field to function_tool schema for Groq compatibility by @VinayJogani14 in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4613\r\n* fix: avoid modifying original raw tool description by @davidzhao in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4616\r\n* continue instead of return in InferenceProcExecutor loop by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4612\r\n* add xai responses llm by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4618\r\n* move xAI tools to separate file by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4624\r\n* (xAI): backward compatibility for tools by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4625\r\n* update inference models to match the latest by @davidzhao in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4597\r\n* AssemblyAI added EU streaming endpoint option by @ftsef in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4571\r\n* feat: Add Camb.ai TTS plugin by @eRuaro in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4442\r\n* prevent dup","2026-02-06T21:10:44",{"id":228,"version":229,"summary_zh":230,"released_at":231},154319,"livekit-agents@1.3.12","## What's Changed\r\n* improve text mode CLI rendering by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4522\r\n* fix `Worker.aclose` raising RuntimeError  by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4523\r\n* better cli rendering for audio by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4524\r\n* fix frame capture order and add playback start callback in console mode by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4516\r\n* Add Connector to default participant kinds by @cnderrauber in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4526\r\n* add support for language detection for assembly ai by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4527\r\n* Support static context in integration with langchain by @benlangfeld in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4504\r\n* feat(google): add warnings when system messages are dropped in Gemini realtime model by @dhruvnigam93 in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4513\r\n* chore: change deprecated cartesia voice id by @davidzhao in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4528\r\n* #4481 Added Opus and PCM encoding to ElevenLabs TTS by @rafallezanko in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4525\r\n* interrupt the same speech handle by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4536\r\n* pin livekit-rtc version by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4531\r\n* fix(elevenlabs\u002Fstt): allow specifying scribe_v2 non-realtime model by @bml1g12 in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4515\r\n* add reasoning param for openai responses LLM by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4548\r\n* Defensive fixes by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4546\r\n* LemonSlice Plugin by @jp-lemon in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4539\r\n* feat (google STT): support profanity filter by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4573\r\n* fix(baseten): correct metadata and response field names for STT by @toubatbrian in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4572\r\n* drop frames when the ConsoleAudioInput is detached by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4576\r\n* fix audio recording in console mode by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4575\r\n* Chatterbox model support by @plangary in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4541\r\n* Inworld websocket improvements by @cshape in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4533\r\n* fix(deepgram): expose close code and reason on unexpected disconnects by @vadimatmurphy in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4569\r\n* playback started call for DataStreamAudioOutput and QueueAudioOutput by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4570\r\n* feat(azure): add lexicon_uri option to TTS by @zach-iee in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4485\r\n* feat(tts): integrate AsyncAI TTS engine into livekit by @ashotbagh in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F3596\r\n* Simplismart Integration in Livekit by @Tushar-ml in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4349\r\n* handle invalid bytes error by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4579\r\n* Fixes #4388: Correct transcription_delay metric calculation in STT turn detec… by @devbyteai in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4396\r\n* fix(mcp): Error message based on text attribute instead of str(part) by @rafallezanko in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4582\r\n* livekit-agents 1.3.12 by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4583\r\n\r\n## New Contributors\r\n* @cnderrauber made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4526\r\n* @benlangfeld made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4504\r\n* @dhruvnigam93 made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4513\r\n* @jp-lemon made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4539\r\n* @vadimatmurphy made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4569\r\n* @zach-iee made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4485\r\n* @ashotbagh made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F3596\r\n* @Tushar-ml made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4349\r\n* @devbyteai made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4396\r\n\r\n**Full Changelog**: https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fcompare\u002Flivekit-agents@1.3.11...livekit-agents@1.3.12","2026-01-21T22:13:55",{"id":233,"version":234,"summary_zh":235,"released_at":236},154320,"livekit-agents@1.3.11","## What's Changed\r\n* Add allowed_tools and transport_type parameters to MCPServerHTTP by @wasaybaig201 in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4365\r\n* better `transport_type` type in MCPServer by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4375\r\n* fix typo: double the - in multiple livekit-plugin providers by @ChristianBernhard in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4358\r\n* feat(AWS STT): use ChainedIdentityResolver by @itskyf in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4356\r\n* support next_in_chain for RoomIO text output by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4353\r\n* standardize Tool interface by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4368\r\n* add tests for function tool parsing and execution by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4384\r\n* add EndCallTool by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4377\r\n* chore(google): update doc string to reflect default realtime models by @davidzhao in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4398\r\n* fix AttributeError in NVIDIA Riva STT by @gau-nernst in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4391\r\n* fix aws credentials when using env vals by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4403\r\n* fix (gemini streaming tts): change default audio encoding and voice  by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4393\r\n* fix (mistral-ai): add flexibility for timestamps  by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4404\r\n* remove shutdown models from google gemini live  by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4421\r\n* update groq tts models, voices, and defaults by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4422\r\n* fix (google stt): set enable_word_time_offsets to False for chirp 3 by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4420\r\n* fix: return 503 health check when worker fails to connect to LiveKit by @rusg77 in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4419\r\n* Add retrieval config support for google LLM by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4408\r\n* allow pushing frames to VAD when agent speech is uninterruptible by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4418\r\n* Add extra comments about Google model deprecation by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4424\r\n* fix (gemini filesearch): require only filestore names by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4428\r\n* update docker dependencies by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4431\r\n* chore: minor fixup of console room name by @davidzhao in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4433\r\n* Inference: Improved support for mid session TTS updates by @adrian-cowham in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4412\r\n* fix: acquire lock in _DefaultLoadCalc.get_load() to prevent race condition by @martin-purplefish in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4435\r\n* fix vad rnn state by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4437\r\n* restore old behavior by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4434\r\n* fix: avoid double RoomIO.aclose during shutdown by @darshankparmar in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4446\r\n* add connect CLI command by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4452\r\n* fix function call created_at by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4453\r\n* Update STT tests and add batch recognition flag by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4425\r\n* allow tests on external PRs when triggered by members by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4456\r\n* Adding model query param to the STT and TTS websocket connection string. by @adrian-cowham in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4457\r\n* refactor connect CLI command by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4458\r\n* OpenAI Responses API Plugin by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4192\r\n* fix transcription truncate when agent is interrupted in console mode by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4473\r\n* feat(rtzr): add keyword boosting to streaming STT by @lalq in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4405\r\n* Revise AWS Plugin README for accuracy and clarity by @guiruggiero in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4468\r\n* close log_handler when process initialize failed by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4472\r\n* feat(deepgram): make vad_events configurable by @vchulski in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4476\r\n* Enables continuous language ID for Azure STT by @MSameerAbbas in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4479\r\n* fix: OpenAI realtime division by zero by @darshankparmar in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4490\r\n* update avatar example and openai readmes by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4495\r\n* type cleanup, include all plugins into type checker by @davidzhao in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4491\r\n* agents.md and claude.md by @davi","2026-01-14T18:45:33",{"id":238,"version":239,"summary_zh":240,"released_at":241},154321,"livekit-agents@1.3.10","## What's Changed\r\n* fix(google): improve handling of empty responses by @davidzhao in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4330\r\n* Add support for audio frame processor by @lukasIO in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4145\r\n* Update doc for min_endpointing_delay by @MonkeyLeeT in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4327\r\n* force interruption when closing the session by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4346\r\n* add ProviderTool & support built-in tools for xai & gemini realtime by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4344\r\n* fix dynamic tool updates in llm_node by @davidzhao in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4355\r\n* Proper support for V1 models for Google STT by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4338\r\n* Add Grok example by @ShayneP in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4363\r\n* allow aws realtime to accept str tool results by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4364\r\n* (gemini realtime) check for vertexai for api version by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4366\r\n* Enable Soniox STT turn detection & metrics by @matejmarinko-soniox in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4332\r\n\r\n### Provider tools\r\n\r\nThis release brings the ability to use tools that are specific to model providers with [provider tools](https:\u002F\u002Fdocs.livekit.io\u002Fagents\u002Flogic\u002Ftools\u002F#provider-tools). You can now mix & match function tools and provider tools in your agent by specifying `Agent(tools=[..])`.\r\n\r\nFor those that were using the experimental `_gemini_tools` parameter with Google LLMs, that experimental parameter has been removed in favor of provider tools. See usage example [here](https:\u002F\u002Fdocs.livekit.io\u002Fagents\u002Fmodels\u002Fllm\u002Fplugins\u002Fgemini\u002F#provider-tools).\r\n\r\n## New Contributors\r\n* @MonkeyLeeT made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4327\r\n\r\n**Full Changelog**: https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fcompare\u002Flivekit-agents@1.3.9...livekit-agents@1.3.10","2025-12-23T19:43:54",{"id":243,"version":244,"summary_zh":245,"released_at":246},154325,"livekit-agents@1.3.6","## What's Changed\r\n* more readable logs rendering by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4093\r\n* fix prometheus multiprocess mode by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4108\r\n* fix RecorderAudioOutput sample rate by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4098\r\n* refresh jwt used in otlp requests before it expires by @paulwe in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4107\r\n* AGT-2269 insert silence during pauses for RecorderIO by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4088\r\n* fix agent_turn and agent_speaking spans hierarchy & add agent_turn for tts_task by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4100\r\n* Replaced deprecated amazon-transcribe SDK with new aws-sdk-transcribe-streaming by @pabloFuente in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4111\r\n* make generation_id private in SpeechHandle by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4124\r\n* skip sig masking on windows by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4119\r\n* Fix realtime compatibility with aws-sdk-bedrock-runtime 0.2.0 upgrade by @kachenjr in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4134\r\n* Enable Deepgram Nova-3 multilingual keyterm prompting by @jkroll-deepgram in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4136\r\n* copy logger levels configuration to job processes by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4139\r\n* fix log text overflow by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4141\r\n* fix logging.getChildren for py\u003C3.12 by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4142\r\n* fix traceback print when using LogQueueHandler by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4128\r\n* add on_enter to AgentTask blocked_tasks if it's not done by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4113\r\n* add WarmTransferTask  by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4126\r\n* fix(anthropic): use passed client parameter instead of always creating new one (fixes #4129) by @joshiayush in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4143\r\n* heygen liveavatar plugin by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F3948\r\n* Gradium integration. by @LaurentMazare in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4150\r\n* Include mip_opt_out to batch deepgram STT requests by @eliooooooot in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4144\r\n* Inworld TTS Update by @cshape in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4112\r\n* Elevenlabs include pronunciation dictionary locators by @arvindvs in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4097\r\n* use log filter for log_context_fields by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4146\r\n* fix: `AgentHandoff` unable to serialize and then deserialize [ONE-LINER] by @slado122 in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4160\r\n* fix OpenTelemetry breaking changes by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4162\r\n* livekit-agents 1.3.6 by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4163\r\n\r\n## New Contributors\r\n* @pabloFuente made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4111\r\n* @jkroll-deepgram made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4136\r\n* @joshiayush made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4143\r\n* @LaurentMazare made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4150\r\n* @cshape made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4112\r\n* @arvindvs made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4097\r\n* @slado122 made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4160\r\n\r\n**Full Changelog**: https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fcompare\u002Flivekit-agents@1.3.5...livekit-agents@1.3.6","2025-12-03T19:12:03",{"id":248,"version":249,"summary_zh":250,"released_at":251},154326,"livekit-agents@1.3.5","## What's Changed\r\n* Improve IVR example README and add inline comments for clarifications by @toubatbrian in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4065\r\n* show milliseconds in CLI by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4080\r\n* fix legacy api `ws_url` (WorkerOptions) by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4090\r\n* fix turn-detector loading issue due to transformers 4.57.2 by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4084\r\n* add openai prompt cache retention param by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4089\r\n* flush telemetry traces and logs when cleanup job task by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4082\r\n* livekit-agents 1.3.5 by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4091\r\n\r\n\r\n**Full Changelog**: https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fcompare\u002Flivekit-agents@1.3.4...livekit-agents@1.3.5","2025-11-25T21:05:03",{"id":253,"version":254,"summary_zh":255,"released_at":256},154327,"livekit-agents@1.3.4","## What's Changed\r\n* fix `task_ids` is not defined by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4025\r\n* fix tests and type checking by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4011\r\n* fix contextvar when using text mode in console by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F3972\r\n* allow turn detection mode to be updated within session by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F3816\r\n* Inference: Allow provider specific parameter updates by @adrian-cowham in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F3808\r\n* Fix docstrings after #1811 Blingfire default tokenizer switch by @mrkowalski in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F3812\r\n* fix bithuman avatar getting local participant identity by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4029\r\n* Allow pause in final transcript by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F3995\r\n* clear internal buffer of datastream io when interruption by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4030\r\n* Support for pronunciation dictionary in Cartesia TTS by @cateet in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4033\r\n* Add OVHcloud AI Endpoints provider by @eliasto in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4037\r\n* bring back `drain-timeout` on the CLI by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4038\r\n* feat(elevenlabs): add STTv2 with streaming support for Scribe v2 by @yorrick in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F3909\r\n* add JobContext.local_participant_identity by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4031\r\n* fix: ensure logger name is set even when custom scope is provided by @davidzhao in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4040\r\n* chore: remove pyav \u003C16 lock by @davidzhao in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4044\r\n* add use_realtime to elevenlabs stt and support scribe v2 realtime model by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4041\r\n* Remove flags from RawFunctionDescription by @philipp-eisen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4050\r\n* Temp workaround for langfuse otel traces by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F3987\r\n* fix cloud tracer overwrites user-defined tracer provider by @longcw in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4060\r\n* Fix: Propagate ws_url in AgentServer.from_server_options by @kstonekuan in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4046\r\n* make `ChatContext.summarize` private by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4068\r\n* add makefile by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4067\r\n* feat(openai): add verbosity parameter support to LLM.with_azure() by @IanSteno in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4070\r\n* add dump signal handler and IPC message by @chenghao-mou in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4064\r\n* fix: accurate speech duration in VAD EOS by @jayeshp19 in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4058\r\n* add `chat_ctx` argument to `AgentSession.generate_reply` by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4074\r\n* add livekit credentials to environment by @tinalenguyen in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4075\r\n* Changing audio format for rime from wav\u002Fmp3 to pcm by @gokuljs in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4073\r\n* livekit-agents 1.3.4 by @theomonnom in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4077\r\n\r\n## New Contributors\r\n* @eliasto made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4037\r\n* @yorrick made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F3909\r\n* @philipp-eisen made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4050\r\n* @kstonekuan made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4046\r\n* @IanSteno made their first contribution in https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fpull\u002F4070\r\n\r\n**Full Changelog**: https:\u002F\u002Fgithub.com\u002Flivekit\u002Fagents\u002Fcompare\u002Flivekit-agents@1.3.3...livekit-agents@1.3.4","2025-11-24T22:30:38"]