[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-Azure-Samples--aoai-realtime-audio-sdk":3,"tool-Azure-Samples--aoai-realtime-audio-sdk":65},[4,17,27,35,43,56],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":16},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",159267,2,"2026-04-17T11:29:14",[13,14,15],"开发框架","Agent","语言模型","ready",{"id":18,"name":19,"github_repo":20,"description_zh":21,"stars":22,"difficulty_score":23,"last_commit_at":24,"category_tags":25,"status":16},4487,"LLMs-from-scratch","rasbt\u002FLLMs-from-scratch","LLMs-from-scratch 是一个基于 PyTorch 的开源教育项目，旨在引导用户从零开始一步步构建一个类似 ChatGPT 的大型语言模型（LLM）。它不仅是同名技术著作的官方代码库，更提供了一套完整的实践方案，涵盖模型开发、预训练及微调的全过程。\n\n该项目主要解决了大模型领域“黑盒化”的学习痛点。许多开发者虽能调用现成模型，却难以深入理解其内部架构与训练机制。通过亲手编写每一行核心代码，用户能够透彻掌握 Transformer 架构、注意力机制等关键原理，从而真正理解大模型是如何“思考”的。此外，项目还包含了加载大型预训练权重进行微调的代码，帮助用户将理论知识延伸至实际应用。\n\nLLMs-from-scratch 特别适合希望深入底层原理的 AI 开发者、研究人员以及计算机专业的学生。对于不满足于仅使用 API，而是渴望探究模型构建细节的技术人员而言，这是极佳的学习资源。其独特的技术亮点在于“循序渐进”的教学设计：将复杂的系统工程拆解为清晰的步骤，配合详细的图表与示例，让构建一个虽小但功能完备的大模型变得触手可及。无论你是想夯实理论基础，还是为未来研发更大规模的模型做准备",90106,3,"2026-04-06T11:19:32",[15,26,14,13],"图像",{"id":28,"name":29,"github_repo":30,"description_zh":31,"stars":32,"difficulty_score":10,"last_commit_at":33,"category_tags":34,"status":16},8553,"spec-kit","github\u002Fspec-kit","Spec Kit 是一款专为提升软件开发效率而设计的开源工具包，旨在帮助团队快速落地“规格驱动开发”（Spec-Driven Development）模式。传统开发中，需求文档往往与代码实现脱节，导致沟通成本高且结果不可控；而 Spec Kit 通过将规格说明书转化为可执行的指令，让 AI 直接依据明确的业务场景生成高质量代码，从而减少从零开始的随意编码，确保产出结果的可预测性。\n\n该工具特别适合希望利用 AI 辅助编程的开发者、技术负责人及初创团队。无论是启动全新项目还是在现有工程中引入规范化流程，用户只需通过简单的命令行操作，即可初始化项目并集成主流的 AI 编程助手。其核心技术亮点在于“规格即代码”的理念，支持社区扩展与预设模板，允许用户根据特定技术栈定制开发流程。此外，Spec Kit 强调官方维护的安全性，提供稳定的版本管理，帮助开发者在享受 AI 红利的同时，依然牢牢掌握架构设计的主动权，真正实现从“凭感觉写代码”到“按规格建系统”的转变。",88749,"2026-04-17T09:48:14",[15,26,14,13],{"id":36,"name":37,"github_repo":38,"description_zh":39,"stars":40,"difficulty_score":10,"last_commit_at":41,"category_tags":42,"status":16},3704,"NextChat","ChatGPTNextWeb\u002FNextChat","NextChat 是一款轻量且极速的 AI 助手，旨在为用户提供流畅、跨平台的大模型交互体验。它完美解决了用户在多设备间切换时难以保持对话连续性，以及面对众多 AI 模型不知如何统一管理的痛点。无论是日常办公、学习辅助还是创意激发，NextChat 都能让用户随时随地通过网页、iOS、Android、Windows、MacOS 或 Linux 端无缝接入智能服务。\n\n这款工具非常适合普通用户、学生、职场人士以及需要私有化部署的企业团队使用。对于开发者而言，它也提供了便捷的自托管方案，支持一键部署到 Vercel 或 Zeabur 等平台。\n\nNextChat 的核心亮点在于其广泛的模型兼容性，原生支持 Claude、DeepSeek、GPT-4 及 Gemini Pro 等主流大模型，让用户在一个界面即可自由切换不同 AI 能力。此外，它还率先支持 MCP（Model Context Protocol）协议，增强了上下文处理能力。针对企业用户，NextChat 提供专业版解决方案，具备品牌定制、细粒度权限控制、内部知识库整合及安全审计等功能，满足公司对数据隐私和个性化管理的高标准要求。",87618,"2026-04-05T07:20:52",[13,15],{"id":44,"name":45,"github_repo":46,"description_zh":47,"stars":48,"difficulty_score":10,"last_commit_at":49,"category_tags":50,"status":16},2268,"ML-For-Beginners","microsoft\u002FML-For-Beginners","ML-For-Beginners 是由微软推出的一套系统化机器学习入门课程，旨在帮助零基础用户轻松掌握经典机器学习知识。这套课程将学习路径规划为 12 周，包含 26 节精炼课程和 52 道配套测验，内容涵盖从基础概念到实际应用的完整流程，有效解决了初学者面对庞大知识体系时无从下手、缺乏结构化指导的痛点。\n\n无论是希望转型的开发者、需要补充算法背景的研究人员，还是对人工智能充满好奇的普通爱好者，都能从中受益。课程不仅提供了清晰的理论讲解，还强调动手实践，让用户在循序渐进中建立扎实的技能基础。其独特的亮点在于强大的多语言支持，通过自动化机制提供了包括简体中文在内的 50 多种语言版本，极大地降低了全球不同背景用户的学习门槛。此外，项目采用开源协作模式，社区活跃且内容持续更新，确保学习者能获取前沿且准确的技术资讯。如果你正寻找一条清晰、友好且专业的机器学习入门之路，ML-For-Beginners 将是理想的起点。",85092,"2026-04-10T11:13:16",[26,51,52,53,14,54,15,13,55],"数据工具","视频","插件","其他","音频",{"id":57,"name":58,"github_repo":59,"description_zh":60,"stars":61,"difficulty_score":62,"last_commit_at":63,"category_tags":64,"status":16},5784,"funNLP","fighting41love\u002FfunNLP","funNLP 是一个专为中文自然语言处理（NLP）打造的超级资源库，被誉为\"NLP 民工的乐园”。它并非单一的软件工具，而是一个汇集了海量开源项目、数据集、预训练模型和实用代码的综合性平台。\n\n面对中文 NLP 领域资源分散、入门门槛高以及特定场景数据匮乏的痛点，funNLP 提供了“一站式”解决方案。这里不仅涵盖了分词、命名实体识别、情感分析、文本摘要等基础任务的标准工具，还独特地收录了丰富的垂直领域资源，如法律、医疗、金融行业的专用词库与数据集，甚至包含古诗词生成、歌词创作等趣味应用。其核心亮点在于极高的全面性与实用性，从基础的字典词典到前沿的 BERT、GPT-2 模型代码，再到高质量的标注数据和竞赛方案，应有尽有。\n\n无论是刚刚踏入 NLP 领域的学生、需要快速验证想法的算法工程师，还是从事人工智能研究的学者，都能在这里找到急需的“武器弹药”。对于开发者而言，它能大幅减少寻找数据和复现模型的时间；对于研究者，它提供了丰富的基准测试资源和前沿技术参考。funNLP 以开放共享的精神，极大地降低了中文自然语言处理的开发与研究成本，是中文 AI 社区不可或缺的宝藏仓库。",79857,1,"2026-04-08T20:11:31",[15,51,54],{"id":66,"github_repo":67,"name":68,"description_en":69,"description_zh":70,"ai_summary_zh":70,"readme_en":71,"readme_zh":72,"quickstart_zh":73,"use_case_zh":74,"hero_image_url":75,"owner_login":76,"owner_name":77,"owner_avatar_url":78,"owner_bio":79,"owner_company":80,"owner_location":80,"owner_email":80,"owner_twitter":80,"owner_website":81,"owner_url":82,"languages":83,"stars":119,"forks":120,"last_commit_at":121,"license":122,"difficulty_score":10,"env_os":123,"env_gpu":124,"env_ram":124,"env_deps":125,"category_tags":128,"github_topics":80,"view_count":10,"oss_zip_url":80,"oss_zip_packed_at":80,"status":16,"created_at":129,"updated_at":130,"faqs":131,"releases":162},8645,"Azure-Samples\u002Faoai-realtime-audio-sdk","aoai-realtime-audio-sdk","Azure OpenAI code resources for using gpt-4o-realtime capabilities.","aoai-realtime-audio-sdk 是微软提供的一套代码资源，旨在帮助开发者在 Azure 平台上调用 GPT-4o 模型的实时音频交互能力。它核心解决了传统对话 AI 响应延迟较高的问题，通过全新的 `\u002Frealtime` API 端点，实现了“语音输入、语音输出”的低延迟双向流式对话，让机器与人的交流更加自然流畅。\n\n这套工具特别适合需要构建高响应速度应用的开发者，例如智能客服系统、虚拟助手或实时翻译场景。其独特的技术亮点在于基于 WebSocket 协议构建，支持全异步通信，不仅能处理语音流，还兼容文本消息和函数调用等现有功能。需要注意的是，该 SDK 定位为参考实现，目前项目已不再积极维护，且处于公共预览阶段，官方建议在新项目中优先采用 OpenAI 各语言官方库的最新支持方案。对于希望探索实时语音交互架构的技术人员而言，它仍是一份宝贵的中间态参考资料，但需留意服务可能存在的变动与中断风险。","> [!WARNING]\n> This project is not actively maintained and does not match the latest, general availability state of the OpenAI Realtime API. The code in this repository is retained as reference for interim materials before official library support was in place. Please see official library support available across all OpenAI libraries (Python, JS, Java, Go, .NET, Ruby) for new solutions.\n\n# Azure OpenAI GPT-4o Audio and \u002Frealtime: Public Preview Documentation\n\nWelcome to the Public Preview for Azure OpenAI `\u002Frealtime` using `gpt-4o-realtime-preview`! This repository provides documentation, standalone libraries, and sample code for using `\u002Frealtime` -- applicable to both Azure OpenAI and standard OpenAI v1 endpoint use.\n\n## Overview: what's \u002Frealtime?\n\nThis preview introduces a new `\u002Frealtime` API endpoint for the `gpt-4o-realtime-preview` model family. `\u002Frealtime`:\n\n- Supports low-latency, \"speech in, speech out\" conversational interactions\n- Works with text messages, function tool calling, and many other existing capabilities from other endpoints like `\u002Fchat\u002Fcompletions`\n- Is a great fit for support agents, assistants, translators, and other use cases that need highly responsive back-and-forth with a user\n\n`\u002Frealtime` is built on [the WebSockets API](https:\u002F\u002Fdeveloper.mozilla.org\u002Fen-US\u002Fdocs\u002FWeb\u002FAPI\u002FWebSockets_API) to facilitate fully asynchronous streaming communication between the end user and model. It's designed to be used in the context of a trusted, intermediate service that manages both connections to end users and model endpoint connections; it **is not** designed to be used directly from untrusted end user devices, and device details like capturing and rendering audio data are outside the scope of the `\u002Frealtime` API.\n\nAt a summary level, the architecture of an experience built atop `\u002Frealtime` looks something like the following (noting that the user interactions, as previously mentioned, are not part of the API itself):\n\n```mermaid\nsequenceDiagram\n  actor User as End User\n  participant MiddleTier as \u002Frealtime host\n  participant AOAI as Azure OpenAI\n  User->>MiddleTier: Begin interaction\n  MiddleTier->>MiddleTier: Authenticate\u002FValidate User\n  MiddleTier--)User: audio information\n  User--)MiddleTier: \n  MiddleTier--)User: text information\n  User--)MiddleTier: \n  MiddleTier--)User: control information\n  User--)MiddleTier: \n  MiddleTier->>AOAI: connect to \u002Frealtime\n  MiddleTier->>AOAI: configure session\n  AOAI->>MiddleTier: session start\n  MiddleTier--)AOAI: send\u002Freceive WS commands\n  AOAI--)MiddleTier: \n  AOAI--)MiddleTier: create\u002Fstart conversation responses\n  AOAI--)MiddleTier: (within responses) create\u002Fstart\u002Fadd\u002Ffinish items\n  AOAI--)MiddleTier: (within items) create\u002Fstream\u002Ffinish content parts\n```\n\nNote that `\u002Frealtime` is in **public preview**. API changes, code updates, and occasional service disruptions are expected.\n\n## How to get started\n\n- Create an Azure OpenAI resource using the `eastus2` or `swedencentral` region\n- Deploy the `gpt-4o-realtime-preview` model (`2024-10-01` version) to one of these supported resources\n- Use one of the included samples to see `\u002Frealtime` in action\n\n## Connecting to and authenticating with `\u002Frealtime`\n\nThe `\u002Frealtime` API requires an existing Azure OpenAI resource endpoint in a supported region. A full request URI can be constructed by concatenating:\n\n1. The secure WebSocket (`wss:\u002F\u002F`) protocol\n2. Your Azure OpenAI resource endpoint hostname, e.g. `my-aoai-resource.openai.azure.com`\n3. The `openai\u002Frealtime` API path\n4. An `api-version` query string parameter for a supported API version -- initially, `2024-10-01-preview`\n5. A `deployment` query string parameter with the name of your `gpt-4o-realtime-preview` model deployment\n\nCombining into a full example, the following could be a well-constructed `\u002Frealtime` request URI:\n\n```http\nwss:\u002F\u002Fmy-eastus2-openai-resource.openai.azure.com\u002Fopenai\u002Frealtime?api-version=2024-10-01-preview&deployment=gpt-4o-realtime-preview-1001\n```\n\nTo authenticate:\n- **Using Microsoft Entra**: `\u002Frealtime` supports token-based authentication with against an appropriately configured Azure OpenAI Service resource that has managed identity enabled. Use a `Bearer` token with the `Authorization` header to apply a retrieved authentication token.\n- **Using an API key**: An `api-key` can be provided in one of two ways:\n  1. Using an `api-key` connection header on the pre-handshake connection (note: not available in a browser environment)\n  2. Using an `api-key` query string parameter on the request URI (note: query string parameters are encrypted when using https\u002Fwss)\n\n## API concepts\n\n- A caller establishes a connection to `\u002Frealtime`, which starts a new `session`\n- The `session` can be configured to customize input and output audio behavior, voice activity detection behavior, and other shared settings\n- A `session` automatically creates a default `conversation`\n  - Note: in the future, multiple concurrent conversations may be supported -- this is not currently available\n- The `conversation` accumulates input signals until a `response` is started, either via a direct command by the caller or automatically by voice-activity-based turn detection\n- Each `response` consists of one or more `items`, which can encapsulate messages, function calls, and other information\n- Message `item`s have `content_part`s, allowing multiple modalities (text, audio) to be represented across a single item\n- The `session` manages configuration of caller input handling (e.g. user audio) and common output\u002Fgeneration handling\n- Each caller-initiated `response.create` can override some of the output `response` behavior, if desired\n- Server-created `item`s and the `content_part`s in messages can be populated asynchronously and in parallel, e.g. receiving audio, text, and function information concurrently (round robin)\n\n## API details\n\nOnce the WebSocket connection session to `\u002Frealtime` is established and authenticated, the functional interaction takes place via sending and receiving WebSocket messages, herein referred to as \"commands\" to avoid ambiguity with the content-bearing \"message\" concept already present for inference. These commands each take the form of a JSON object. Commands can be sent and received in parallel and applications should generally handle them both concurrently and asynchronously.\n\nFor a full, structured description of request and response commands, see [realtime-openapi3.yml](realtime-openapi3.yml). As with other aspects of the public preview, note that the protocol specifics may be subject to change.\n\n**Session configuration and turn handling mode**\n\nOften, the first command sent by the caller on a newly-established `\u002Frealtime` session will be a `session.update` payload. This command controls a wide set of input and output behavior, with output and response generation portions then later overrideable via `response.create` properties, if desired.\n\nOne of the key session-wide settings is `turn_detection`, which controls how data flow is handled between the caller and model:\n\n- `server_vad` will evaluate incoming user audio (as sent via `input_audio_buffer.append`) using a voice activity detector (VAD) component and automatically use that audio to initiate response generation on applicable conversations when an end of speech is detected. Silence detection for the VAD can be configured when specifying `server_vad` detection mode.\n- `none` will rely on caller-initiated `input_audio_buffer.commit` and `response.create` commands to progress conversations and produce output. This is useful for push-to-talk applications or situations that have external audio flow control (such as caller-side VAD component). Note that these manual signals can be still be used in `server_vad` mode to supplement VAD-initiated response generation.\n\nTranscription of user input audio is opted into via the `input_audio_transcription` property; specifying a transcription model (`whisper-1`) in this configuration will enable the delivery of `conversation.item.audio_transcription.completed` events.\n\nAn example `session.update` that configures several aspects of the session, including tools, follows. Note that all session parameters are optional; not everything needs to be configured!\n\n```json\n{\n  \"type\": \"session.update\",\n  \"session\": {\n    \"voice\": \"alloy\",\n    \"instructions\": \"Call provided tools if appropriate for the user's input.\",\n    \"input_audio_format\": \"pcm16\",\n    \"input_audio_transcription\": {\n      \"model\": \"whisper-1\"\n    },\n    \"turn_detection\": {\n      \"threshold\": 0.4,\n      \"silence_duration_ms\": 600,\n      \"type\": \"server_vad\"\n    },\n    \"tools\": [\n      {\n        \"type\": \"function\",\n        \"name\": \"get_weather_for_location\",\n        \"description\": \"gets the weather for a location\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"location\": {\n              \"type\": \"string\",\n              \"description\": \"The city and state e.g. San Francisco, CA\"\n            },\n            \"unit\": {\n              \"type\": \"string\",\n              \"enum\": [\n                \"c\",\n                \"f\"\n              ]\n            }\n          },\n          \"required\": [\n            \"location\",\n            \"unit\"\n          ]\n        }\n      }\n    ]\n  }\n}\n```\n\n## Summary of commands\n\nSee [realtime-openapi3.yml](realtime-openapi3.yml) for full parameter details.\n\n### Requests: commands sent from the caller to the `\u002Frealtime` endpoint\n\n| `type` | Description |\n|---|---|\n| **Session Configuration** | |\n| `session.update` | Configures the connection-wide behavior of the conversation session such as shared audio input handling and common response generation characteristics. This is typically sent immediately after connecting but can also be sent at any point during a session to reconfigure behavior after the current response (if in progress) is complete. |\n| **Input Audio** | |\n| `input_audio_buffer.append` | Appends audio data to the shared user input buffer. This audio will not be processed until an end of speech is detected in the `server_vad` `turn_detection` mode or until a manual `response.create` is sent (in either `turn_detection` configuration). |\n| `input_audio_buffer.clear` | Clears the current audio input buffer. Note that this will not impact responses already in progress. |\n| `input_audio_buffer.commit` | Commits the current state of the user input buffer to subscribed conversations, including it as information for the next response. |\n| **Item Management** | For establishing history or including non-audio item information |\n| `conversation.item.create` | Inserts a new item into the conversation, optionally positioned according to `previous_item_id`. This can provide new, non-audio input from the user (like a text message), tool responses, or historical information from another interaction to form a conversation history prior to generation. |\n| `conversation.item.delete` | Removes an item from an existing conversation |\n| `conversation.item.truncate` | Manually shortens text and\u002For audio content in a message, which may be useful in situations where faster-than-realtime model generation produced significant additional data that was later skipped by an interruption. |\n| **Response Management** |\n| `response.create` | Initiates model processing of unprocessed conversation input, signifying the end of the caller's logical turn. `server_vad` `turn_detection` mode will automatically trigger generation at end of speech, but `response.create` must be called in other circumstances (text input, tool responses, `none` mode, etc.) to signal that the conversation should continue. **Note**: when responding to tool calls, `response.create` should be invoked *after* the `response.done` command from the model that confirms all tool calls and other messages have been provided. |\n| `response.cancel` | Cancels an in-progress response. |\n\n\n### Responses: commands sent by the `\u002Frealtime` endpoint to the caller\n\n| `type` | Description |\n|---|---|\n| `session.created` | Sent as soon as the connection is successfully established. Provides a connection-specific ID that may be useful for debugging or logging. |\n| `session.updated` | Sent in response to a `session.update` event, reflecting the changes made to the session configuration. |\n| **Caller Item Acknowledgement** | |\n| `conversation.item.created` | Provides acknowledgement that a new conversation item has been inserted into a conversation. |\n| `conversation.item.deleted` | Provides acknowledgement that an existing conversation item has been removed from a conversation. |\n| `conversation.item.truncated` | Provides acknowledgement that an existing item in a conversation has been truncated. |\n| **Response Flow** | |\n| `response.created` | Notifies that a new response has started for a conversation. This snapshots input state and begins generation of new items. Until `response.done` signifies the end of the response, a response may create items via `response.output_item.added` that are then populated via `*delta*` commands. |\n| `response.done` | Notifies that a response generation is complete for a conversation. |\n| `rate_limits.updated` | Sent immediately after `response.done`, this provides the current rate limit information reflecting updated status after the consumption of the just-finished response. |\n| **Item Flow in a Response** | |\n| `response.output_item.added` | Notifies that a new, server-generated conversation item *is being created*; content will then be populated via incremental `add_content` messages with a final `response.output_item.done` command signifying the item creation has completed. |\n| `response.output_item.done` | Notifies that a new conversation item has completed its addition into a conversation. For model-generated messages, this is preceded by `response.output_item.added` and `*delta*` commands which begin and populate the new item, respectively. |\n| **Content Flow within Response Items** | |\n| `response.content_part.added` | Notifies that a new content part is being created within a conversation item in an ongoing response. Until `response_content_part_done` arrives, content will then be incrementally provided via appropriate `*delta`* commands. |\n| `response.content_part.done` | Signals that a newly created content part is complete and will receive no further incremental updates. |\n| `response.audio.delta` | Provides an incremental update to a binary audio data content part generated by the model. |\n| `response.audio.done` | Signals that an audio content part's incremental updates are complete. |\n| `response.audio_transcript.delta` | Provides an incremental update to the audio transcription associated with the output audio content generated by the model. |\n| `response.audio_transcript.done` | Signals that the incremental updates to audio transcription of output audio are complete. |\n| `response.text.delta` | Provides an incremental update to a text content part within a conversation message item. |\n| `response.text.done` | Signals that the incremental updates to a text content part are complete. |\n| `response.function_call_arguments.delta` | Provides an incremental update to the arguments of a function call, as represented within an item in a conversation. |\n| `response.function_call_arguments.done` | Signals that incremental function call arguments are complete and that accumulated arguments can now be used in their entirety. |\n| **User Input Audio** | |\n| `input_audio_buffer.speech_started` | When using configured voice activity detection, this command notifies that a start of user speech has been detected within the input audio buffer at a specific audio sample index. |\n| `input_audio_buffer.speech_stopped` | When using configured voice activity detection, this command notifies that an end of user speech has been detected within the input audio buffer at a specific audio sample index. This will automatically trigger response generation when configured. |\n| `conversation.item.input_audio_transcription.completed` | Notifies that a supplementary transcription of the user's input audio buffer is available. This behavior must be opted into via the `input_audio_transcription` property in `session.update`. |\n| `conversation.item_input_audio_transcription.failed` | Notifies that input audio transcription failed. |\n| `input_audio_buffer_committed` | Provides acknowledgement that the current state of the user audio input buffer has been submitted to subscribed conversations. |\n| `input_audio_buffer_cleared` | Provides acknowledgement that the pending user audio input buffer has been cleared. |\n| **Other** | |\n| `error` | Indicates that something went wrong while processing data on the session. Includes an `error` message that provides additional detail. |\n\n\n## Troubleshooting and FAQ\n\nBest practices and expected patterns are evolving rapidly and topics represented in this section may become quickly out of date.\n\n### I send audio, but see no commands back from the service\n\n- Ensure that the input audio format matches what was provided in the `session.update` command (24KHz, 16-bit mono PCM by default); if the format doesn't match, it will be decoded as \"noise\" and not be interpreted as input.\n- If using the `none` `turn_detection` mode (`null` in newer protocol versions), ensure you send `input_audio_buffer.commit` and\u002For `response.create` commands as needed.\n\n### Tool calling isn't working or isn't responding\n\nAs a single response can feature multiple tool calls, a bit of statefulness is introduced with the tool call\u002Fresponse contract:\n\n- The caller may add a tool call output item `tool_call` at any point after the `item_added` message for that tool call arrives.\n- Once all items for the current response have been generated, the model's `response.done` command will arrive, including references in `output` to all tool calls and other items that were part of the response.\n- At this point (once all incoming tool calls have been resolved), the caller can send a new `response.create` command.\n- Sending the `response.create` command *before* the paired `response.done` command for the prior response arrives (e.g. immediately after an `response.function_call_arguments.done` or `response.output_item.done`) may produce unexpected behavior and race conditions.\n- Not sending any `response.create` command may fail to advance the conversation.\n\n### Using an audio file as input, I see many responses or my responses get stuck\n\nWhen using lengthy audio input that's significantly faster than real time -- such as from an audio file with natural pauses -- server voice activity detection can trigger many responses in rapid succession and this can cause responses to become unreliable. It's highly recommended to disable voice activity detection (`\"turn_detection\": { \"type\": \"none\" }` (`\"turn_detection\": null` in newer protocol versions) in `session.update`) for such scenarios and instead manually invoke `response.create` when all audio has been transmitted.\n\n### What's the long-term plan for library support?\n\nThe shortest answer: many details are still TBD.\n\n- **.NET** (https:\u002F\u002Fgithub.com\u002Fopenai\u002Fopenai-dotnet): preview support for `\u002Frealtime` is available now, beginning with the `2.1.0-beta.1` release. SDK representations within the beta library version are subject to continued development, refinement, and adaptation -- some number of breaking changes across preview versions are expected.\n- **Python** and **JavaScript**: As described [in the \"What's next\" section of the Realtime announcement](https:\u002F\u002Fopenai.com\u002Findex\u002Fintroducing-the-realtime-api\u002F#whats_next), official library support (via https:\u002F\u002Fgithub.com\u002Fopenai\u002Fopenai-python and https:\u002F\u002Fgithub.com\u002Fopenai\u002Fopenai-node) is coming at a later date. The timelines and specifics will be shared a bit further on, but we should expect converged support for `\u002Frealtime` alongside other client capabilities like `\u002Fchat\u002Fcompletions` in the future. In the meantime, this repository provides standalone libraries (compatible with both standard OpenAI and Azure OpenAI) with samples and will continue to be expanded and improved.\n- **Java** and **Go**: Client library support discussions are underway and we hope to have more to share soon.\n","> [!WARNING]\n> 该项目未处于积极维护状态，且与 OpenAI 实时 API 的最新正式发布版本不一致。此仓库中的代码仅作为官方库支持推出前的参考材料保留。有关新解决方案，请参阅适用于所有 OpenAI 库（Python、JS、Java、Go、.NET、Ruby）的官方库支持。\n\n# Azure OpenAI GPT-4o 音频和 \u002Frealtime：公开预览文档\n\n欢迎使用基于 `gpt-4o-realtime-preview` 模型的 Azure OpenAI `\u002Frealtime` 公开预览版！本仓库提供了关于 `\u002Frealtime` 的文档、独立库以及示例代码——这些内容既适用于 Azure OpenAI，也适用于标准 OpenAI v1 端点的使用场景。\n\n## 概述：什么是 \u002Frealtime？\n\n本次预览引入了针对 `gpt-4o-realtime-preview` 模型系列的新 `\u002Frealtime` API 端点。\u002Frealtime 具有以下特点：\n\n- 支持低延迟的“语音输入、语音输出”式对话交互\n- 可处理文本消息、函数工具调用以及其他来自 `\u002Fchat\u002Fcompletions` 等端点的功能\n- 非常适合客服代理、智能助手、翻译等需要与用户进行高效互动的应用场景\n\n\u002Frealtime 基于 [WebSockets API](https:\u002F\u002Fdeveloper.mozilla.org\u002Fen-US\u002Fdocs\u002FWeb\u002FAPI\u002FWebSockets_API) 构建，以实现终端用户与模型之间的完全异步流式通信。它被设计用于在受信任的中间服务环境中运行，该服务负责管理与终端用户的连接以及与模型端点的连接；\u002Frealtime **并非**设计用于直接从不受信任的终端设备上使用，而诸如音频数据的捕获和渲染等设备相关细节也不在 \u002Frealtime API 的范围内。\n\n从总体架构上看，基于 \u002Frealtime 构建的应用体验大致如下（请注意，如前所述，用户交互本身并不属于 API 的一部分）：\n\n```mermaid\nsequenceDiagram\n  actor User as 终端用户\n  participant MiddleTier as \u002Frealtime 主机\n  participant AOAI as Azure OpenAI\n  User->>MiddleTier: 开始交互\n  MiddleTier->>MiddleTier: 认证\u002F验证用户\n  MiddleTier--)User: 音频信息\n  User--)MiddleTier: \n  MiddleTier--)User: 文本信息\n  User--)MiddleTier: \n  MiddleTier--)User: 控制信息\n  User--)MiddleTier: \n  MiddleTier->>AOAI: 连接到 \u002Frealtime\n  MiddleTier->>AOAI: 配置会话\n  AOAI->>MiddleTier: 会话开始\n  MiddleTier--)AOAI: 发送\u002F接收 WS 命令\n  AOAI--)MiddleTier: \n  AOAI--)MiddleTier: 创建\u002F开始对话响应\n  AOAI--)MiddleTier: （在响应中）创建\u002F开始\u002F添加\u002F完成项目\n  AOAI--)MiddleTier: （在项目中）创建\u002F流式传输\u002F完成内容部分\n```\n\n请注意，\u002Frealtime 处于 **公开预览** 阶段。API 变更、代码更新以及偶尔的服务中断均有可能发生。\n\n## 如何开始使用\n\n- 在 `eastus2` 或 `swedencentral` 区域创建一个 Azure OpenAI 资源\n- 将 `gpt-4o-realtime-preview` 模型（版本为 `2024-10-01`）部署到上述支持的资源之一\n- 使用附带的示例之一来体验 \u002Frealtime 的实际效果\n\n## 连接与认证 \u002Frealtime\n\n\u002Frealtime API 需要一个位于支持区域内的现有 Azure OpenAI 资源端点。完整的请求 URI 可通过以下步骤拼接而成：\n\n1. 安全的 WebSocket 协议 (`wss:\u002F\u002F`)\n2. 您的 Azure OpenAI 资源端点主机名，例如 `my-aoai-resource.openai.azure.com`\n3. `\u002Fopenai\u002Frealtime` API 路径\n4. 用于指定支持 API 版本的 `api-version` 查询字符串参数——初始版本为 `2024-10-01-preview`\n5. 包含您 `gpt-4o-realtime-preview` 模型部署名称的 `deployment` 查询字符串参数\n\n综合以上步骤，一个完整的 \u002Frealtime 请求 URI 示例可能如下所示：\n\n```http\nwss:\u002F\u002Fmy-eastus2-openai-resource.openai.azure.com\u002Fopenai\u002Frealtime?api-version=2024-10-01-preview&deployment=gpt-4o-realtime-preview-1001\n```\n\n认证方式如下：\n\n- **使用 Microsoft Entra**：\u002Frealtime 支持基于令牌的身份验证，需配合已启用托管标识的适当配置的 Azure OpenAI 服务资源使用。请在 `Authorization` 头中使用 `Bearer` 令牌来应用获取到的认证令牌。\n- **使用 API 密钥**：可以通过两种方式提供 API 密钥：\n  1. 在握手前的连接中使用 `api-key` 连接头（注意：浏览器环境不支持此方法）\n  2. 在请求 URI 中使用 `api-key` 查询字符串参数（注意：使用 https\u002Fwss 时，查询字符串参数会被加密）\n\n## API 核心概念\n\n- 调用者建立与 \u002Frealtime 的连接后，将启动一个新的 `会话`\n- 可以对 `会话` 进行配置，以自定义输入和输出音频行为、语音活动检测行为以及其他共享设置\n- `会话` 会自动创建一个默认的 `对话`\n  - 注意：未来可能会支持同时进行多个对话——但目前尚不可用\n- `对话` 会持续积累输入信号，直到由调用者直接发出命令或根据语音活动自动检测轮次来启动一次 `响应`\n- 每个 `响应` 由一个或多个 `项目` 组成，这些项目可以封装消息、函数调用和其他信息\n- 消息 `项目` 包含 `内容部分`，允许在一个项目中同时表示多种模态（文本、音频等）\n- `会话` 负责管理调用者的输入处理（例如用户音频）以及通用的输出\u002F生成处理\n- 每次调用者发起的 `response.create` 操作都可以根据需要覆盖部分输出行为\n- 由服务器创建的 `项目` 以及消息中的 `内容部分` 可以异步并行地填充，例如同时接收音频、文本和函数信息（轮询方式）\n\n## API 详情\n\n一旦与 `\u002Frealtime` 的 WebSocket 连接会话建立并完成身份验证后，功能交互便通过发送和接收 WebSocket 消息来进行。为避免与用于推理的承载内容的“消息”概念产生歧义，这些消息在此被称为“命令”。每个命令都采用 JSON 对象的形式。命令可以并行发送和接收，应用程序通常应以并发和异步的方式处理它们。\n\n有关请求和响应命令的完整、结构化描述，请参阅 [realtime-openapi3.yml](realtime-openapi3.yml)。与其他公测阶段的内容一样，请注意协议细节可能会发生变更。\n\n**会话配置与轮次处理模式**\n\n在新建立的 `\u002Frealtime` 会话中，调用方发送的第一个命令通常是 `session.update` 负载。该命令控制广泛的输入和输出行为，而后续的输出和响应生成部分则可以通过 `response.create` 属性进行覆盖（如果需要）。\n\n其中一个关键的会话级设置是 `turn_detection`，它控制调用方与模型之间的数据流处理方式：\n\n- `server_vad` 会使用语音活动检测器 (VAD) 组件评估传入的用户音频（通过 `input_audio_buffer.append` 发送），并在检测到语音结束时自动利用该音频启动适用对话的响应生成。在指定 `server_vad` 检测模式时，可以配置 VAD 的静音检测。\n- `none` 则依赖于调用方发起的 `input_audio_buffer.commit` 和 `response.create` 命令来推进对话并生成输出。这对于按住说话的应用程序或具有外部音频流控制的情况（例如调用方侧的 VAD 组件）非常有用。请注意，在 `server_vad` 模式下，这些手动信号仍然可以用来补充由 VAD 触发的响应生成。\n\n用户输入音频的转录可通过 `input_audio_transcription` 属性启用；在此配置中指定转录模型（`whisper-1`）将启用 `conversation.item.audio_transcription.completed` 事件的传递。\n\n以下是一个配置了包括工具在内的多个会话方面的 `session.update` 示例。请注意，所有会话参数都是可选的；并非所有内容都需要配置！\n\n```json\n{\n  \"type\": \"session.update\",\n  \"session\": {\n    \"voice\": \"alloy\",\n    \"instructions\": \"根据用户的输入，必要时调用提供的工具。\",\n    \"input_audio_format\": \"pcm16\",\n    \"input_audio_transcription\": {\n      \"model\": \"whisper-1\"\n    },\n    \"turn_detection\": {\n      \"threshold\": 0.4,\n      \"silence_duration_ms\": 600,\n      \"type\": \"server_vad\"\n    },\n    \"tools\": [\n      {\n        \"type\": \"function\",\n        \"name\": \"get_weather_for_location\",\n        \"description\": \"获取某个地点的天气信息\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"location\": {\n              \"type\": \"string\",\n              \"description\": \"城市和州，例如旧金山, 加利福尼亚州\"\n            },\n            \"unit\": {\n              \"type\": \"string\",\n              \"enum\": [\n                \"c\",\n                \"f\"\n              ]\n            }\n          },\n          \"required\": [\n            \"location\",\n            \"unit\"\n          ]\n        }\n      }\n    ]\n  }\n}\n```\n\n## 命令概览\n\n完整的参数详情请参阅 [realtime-openapi3.yml](realtime-openapi3.yml)。\n\n### 请求：从调用方发送到 `\u002Frealtime` 端点的命令\n\n| `type` | 描述 |\n|---|---|\n| **会话配置** | |\n| `session.update` | 配置对话会话的连接级行为，例如共享音频输入的处理方式和通用的响应生成特性。此命令通常在连接后立即发送，但也可在会话中的任何时间点发送，以便在当前响应（如果正在进行中）完成后重新配置行为。 |\n| **输入音频** | |\n| `input_audio_buffer.append` | 将音频数据追加到共享的用户输入缓冲区。在 `server_vad` 的 `turn_detection` 模式下，这些音频只有在检测到语音结束时才会被处理；而在其他 `turn_detection` 配置下，则需手动发送 `response.create` 命令才能触发处理。 |\n| `input_audio_buffer.clear` | 清空当前的音频输入缓冲区。请注意，这不会影响正在处理中的响应。 |\n| `input_audio_buffer.commit` | 将用户输入缓冲区的当前状态提交给已订阅的对话，作为下一次响应的信息。 |\n| **项目管理** | 用于建立对话历史或将非音频项目信息纳入对话 |\n| `conversation.item.create` | 在对话中插入一条新条目，可选择根据 `previous_item_id` 进行定位。这可以提供来自用户的新的非音频输入（如文本消息）、工具响应，或来自其他交互的历史信息，以形成生成之前的对话历史。 |\n| `conversation.item.delete` | 从现有对话中移除一条条目。 |\n| `conversation.item.truncate` | 手动缩短消息中的文本和\u002F或音频内容，这在模型以超实时速度生成大量额外数据，随后因中断而被跳过的情况下可能很有用。 |\n| **响应管理** |\n| `response.create` | 启动对未处理对话输入的模型处理，标志着调用方逻辑轮次的结束。在 `server_vad` 的 `turn_detection` 模式下，语音结束时会自动触发响应生成；但在其他情况下（文本输入、工具响应、`none` 模式等），必须调用 `response.create` 命令来表明对话应继续。**注意**：在响应工具调用时，应在模型发出确认所有工具调用及其他消息均已提供的 `response.done` 命令之后，再调用 `response.create`。 |\n| `response.cancel` | 取消正在进行中的响应。 |\n\n### 响应：由 `\u002Frealtime` 端点发送给调用方的命令\n\n| `type` | 描述 |\n|---|---|\n| `session.created` | 在连接成功建立后立即发送。提供一个特定于连接的 ID，可用于调试或日志记录。 |\n| `session.updated` | 作为对 `session.update` 事件的响应发送，反映会话配置所做的更改。 |\n| **调用方项目确认** | |\n| `conversation.item.created` | 确认一个新的对话项目已被插入到对话中。 |\n| `conversation.item.deleted` | 确认一个现有的对话项目已从对话中移除。 |\n| `conversation.item.truncated` | 确认对话中的一个现有项目已被截断。 |\n| **响应流程** | |\n| `response.created` | 通知某个对话的新响应已开始。此命令会捕获输入状态，并开始生成新的项目。在 `response.done` 表示响应结束之前，响应可能会通过 `response.output_item.added` 创建项目，然后通过 `*delta*` 命令逐步填充内容。 |\n| `response.done` | 通知某个对话的响应生成已完成。 |\n| `rate_limits.updated` | 在 `response.done` 之后立即发送，提供当前的速率限制信息，反映刚刚完成的响应消耗后的更新状态。 |\n| **响应中的项目流** | |\n| `response.output_item.added` | 通知一个新的、由服务器生成的对话项目 *正在创建中*；随后将通过增量的 `add_content` 消息逐步填充内容，最后以 `response.output_item.done` 命令表示该项目的创建已完成。 |\n| `response.output_item.done` | 通知一个新的对话项目已成功添加到对话中。对于模型生成的消息，这之前会有 `response.output_item.added` 和 `*delta*` 命令，分别用于开始和填充新项目。 |\n| **响应项目内的内容流** | |\n| `response.content_part.added` | 通知在正在进行的响应中，对话项目内正在创建一个新的内容部分。在 `response_content_part_done` 到达之前，内容将通过相应的 `*delta*` 命令逐步提供。 |\n| `response.content_part.done` | 表示新创建的内容部分已完成，不再接收进一步的增量更新。 |\n| `response.audio.delta` | 提供模型生成的二进制音频数据内容部分的增量更新。 |\n| `response.audio.done` | 表示音频内容部分的增量更新已完成。 |\n| `response.audio_transcript.delta` | 提供与模型生成的输出音频相关的音频转录的增量更新。 |\n| `response.audio_transcript.done` | 表示输出音频的音频转录的增量更新已完成。 |\n| `response.text.delta` | 提供对话消息项目中文本内容部分的增量更新。 |\n| `response.text.done` | 表示文本内容部分的增量更新已完成。 |\n| `response.function_call_arguments.delta` | 提供对话项目中表示的函数调用参数的增量更新。 |\n| `response.function_call_arguments.done` | 表示函数调用参数的增量更新已完成，累积的参数现在可以完全使用。 |\n| **用户输入音频** | |\n| `input_audio_buffer.speech_started` | 当使用配置的语音活动检测时，此命令通知在输入音频缓冲区中已检测到用户语音的开始，具体位置为某个音频采样索引。 |\n| `input_audio_buffer.speech_stopped` | 当使用配置的语音活动检测时，此命令通知在输入音频缓冲区中已检测到用户语音的结束，具体位置为某个音频采样索引。如果已配置，则会自动触发响应生成。 |\n| `conversation.item.input_audio_transcription.completed` | 通知用户输入音频缓冲区的补充转录现已可用。此行为必须通过 `session.update` 中的 `input_audio_transcription` 属性进行启用。 |\n| `conversation.item_input_audio_transcription.failed` | 通知输入音频转录失败。 |\n| `input_audio_buffer_committed` | 确认当前用户音频输入缓冲区的状态已提交至已订阅的对话。 |\n| `input_audio_buffer_cleared` | 确认待处理的用户音频输入缓冲区已被清空。 |\n| **其他** | |\n| `error` | 表示在处理会话数据时出现了问题。包含一个提供详细信息的 `error` 消息。 |\n\n\n## 故障排除与常见问题解答\n\n最佳实践和预期模式正在迅速发展，本节中涉及的主题可能会很快过时。\n\n### 我发送了音频，但没有收到服务返回的任何命令\n\n- 确保输入音频格式与 `session.update` 命令中提供的格式一致（默认为 24KHz、16 位单声道 PCM）；如果格式不匹配，音频将被解码为“噪声”，而不会被解释为输入。\n- 如果使用 `none` 的 `turn_detection` 模式（较新协议版本中为 `null`），请确保根据需要发送 `input_audio_buffer.commit` 和\u002F或 `response.create` 命令。\n\n### 工具调用无法正常工作或无响应\n\n由于单个响应可能包含多个工具调用，因此工具调用与响应之间引入了一定的状态性：\n\n- 调用方可以在收到该工具调用的 `item_added` 消息之后的任何时间添加工具调用输出项目 `tool_call`。\n- 一旦当前响应的所有项目都已生成，模型的 `response.done` 命令将会到达，其中 `output` 字段会引用所有工具调用及其他属于该响应的项目。\n- 此时（即所有传入的工具调用均已解决之后），调用方可以发送一个新的 `response.create` 命令。\n- 在前一响应的配对 `response.done` 命令到达之前就发送 `response.create` 命令（例如，在 `response.function_call_arguments.done` 或 `response.output_item.done` 之后立即发送），可能会导致意外行为和竞态条件。\n- 如果不发送任何 `response.create` 命令，对话可能无法继续推进。\n\n### 使用音频文件作为输入时，我看到许多响应，或者我的响应会卡住\n\n当使用比实时快得多的长音频输入时——例如来自带有自然停顿的音频文件——服务器的语音活动检测可能会连续触发大量响应，从而导致响应变得不可靠。在这种情况下，强烈建议在 `session.update` 中禁用语音活动检测（`\"turn_detection\": { \"type\": \"none\" }`，在较新协议版本中为 `\"turn_detection\": null`），并在所有音频传输完毕后手动调用 `response.create`。\n\n### 关于库支持的长期计划是什么？\n\n最简短的回答是：许多细节仍有待确定。\n\n- **.NET**（https:\u002F\u002Fgithub.com\u002Fopenai\u002Fopenai-dotnet）：现已提供对 `\u002Frealtime` 的预览支持，自 `2.1.0-beta.1` 版本开始。该预览版 SDK 的实现仍将持续开发、优化和调整——预计在各个预览版本之间会出现一定数量的破坏性变更。\n- **Python** 和 **JavaScript**：正如 [Realtime 公告中的“接下来的计划”部分](https:\u002F\u002Fopenai.com\u002Findex\u002Fintroducing-the-realtime-api\u002F#whats_next) 所述，官方库支持（通过 https:\u002F\u002Fgithub.com\u002Fopenai\u002Fopenai-python 和 https:\u002F\u002Fgithub.com\u002Fopenai\u002Fopenai-node）将在稍后推出。具体的时间表和细节将在后续进一步公布，但我们预计未来将实现对 `\u002Frealtime` 与其他客户端功能，如 `\u002Fchat\u002Fcompletions` 的统一支持。在此期间，本仓库提供了独立的库（兼容标准 OpenAI 和 Azure OpenAI），并附有示例，未来将继续扩展和改进。\n- **Java** 和 **Go**：关于客户端库支持的讨论正在进行中，我们希望很快能分享更多进展。","# aoai-realtime-audio-sdk 快速上手指南\n\n> **⚠️ 重要提示**：本项目目前**不再积极维护**，其代码状态与 OpenAI Realtime API 的最新通用版本不完全匹配。本仓库代码仅作为官方库支持到位之前的参考材料。对于新的解决方案，请使用 OpenAI 官方支持的库（Python, JS, Java, Go, .NET, Ruby）。以下指南基于 Azure OpenAI GPT-4o Audio `\u002Frealtime` 公共预览版文档整理。\n\n## 环境准备\n\n在开始之前，请确保满足以下系统和资源要求：\n\n*   **Azure 区域**：必须创建位于 `eastus2` (美国东部 2) 或 `swedencentral` (瑞典中部) 区域的 Azure OpenAI 资源。\n*   **模型部署**：在上述资源中部署 `gpt-4o-realtime-preview` 模型，版本需为 `2024-10-01`。\n*   **网络协议**：支持 WebSocket (`wss:\u002F\u002F`) 连接。\n*   **开发环境**：任意支持 HTTP\u002FWebSocket 请求的编程语言或工具（如 Node.js, Python, Postman 等）。\n*   **认证方式**：\n    *   Microsoft Entra ID (Bearer Token)\n    *   或者 API Key\n\n## 安装步骤\n\n由于这是一个参考性质的 SDK 仓库，通常不需要通过包管理器安装特定的 \"aoai-realtime-audio-sdk\" 包。你可以通过克隆仓库获取示例代码，或直接根据 API 规范在你的项目中实现 WebSocket 连接。\n\n**获取示例代码：**\n\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk.git\ncd aoai-realtime-audio-sdk\n```\n\n*注：如果你使用官方 OpenAI 库进行新开发，请运行对应语言的安装命令，例如：*\n```bash\n# Python 示例\npip install openai\n# Node.js 示例\nnpm install openai\n```\n\n## 基本使用\n\n### 1. 构建连接 URI\n\n`\u002Frealtime` API 通过 WebSocket 连接。你需要构造如下格式的 URI：\n\n```text\nwss:\u002F\u002F\u003C你的资源名称>.openai.azure.com\u002Fopenai\u002Frealtime?api-version=2024-10-01-preview&deployment=\u003C你的部署名称>\n```\n\n**示例 URI：**\n```http\nwss:\u002F\u002Fmy-eastus2-openai-resource.openai.azure.com\u002Fopenai\u002Frealtime?api-version=2024-10-01-preview&deployment=gpt-4o-realtime-preview-1001\n```\n\n### 2. 认证连接\n\n在建立 WebSocket 握手时，需选择以下一种认证方式：\n\n*   **方式 A：API Key (推荐用于测试)**\n    在 URI 后追加 `&api-key=\u003C你的密钥>` (WSS 加密传输是安全的)，或在非浏览器环境的预握手连接头中添加 `api-key`。\n*   **方式 B：Microsoft Entra Token**\n    在 WebSocket 请求头中添加 `Authorization: Bearer \u003C你的访问令牌>`。\n\n### 3. 配置会话 (Session Update)\n\n连接成功后，首先发送一个 `session.update` 命令来配置语音、指令和交互模式。\n\n**示例命令 (JSON)：**\n```json\n{\n  \"type\": \"session.update\",\n  \"session\": {\n    \"voice\": \"alloy\",\n    \"instructions\": \"Call provided tools if appropriate for the user's input.\",\n    \"input_audio_format\": \"pcm16\",\n    \"input_audio_transcription\": {\n      \"model\": \"whisper-1\"\n    },\n    \"turn_detection\": {\n      \"threshold\": 0.4,\n      \"silence_duration_ms\": 600,\n      \"type\": \"server_vad\"\n    },\n    \"tools\": [\n      {\n        \"type\": \"function\",\n        \"name\": \"get_weather_for_location\",\n        \"description\": \"gets the weather for a location\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"location\": {\n              \"type\": \"string\",\n              \"description\": \"The city and state e.g. San Francisco, CA\"\n            },\n            \"unit\": {\n              \"type\": \"string\",\n              \"enum\": [\"c\", \"f\"]\n            }\n          },\n          \"required\": [\"location\", \"unit\"]\n        }\n      }\n    ]\n  }\n}\n```\n\n### 4. 发送音频与获取响应\n\n*   **发送音频**：将用户音频数据分块发送，命令类型为 `input_audio_buffer.append`。\n*   **触发响应**：\n    *   如果配置了 `server_vad` (语音活动检测)，检测到说话结束时会自动触发响应。\n    *   如果配置为 `none` 或发送文本，需手动发送 `response.create` 命令来触发模型生成。\n\n**手动触发响应示例：**\n```json\n{\n  \"type\": \"response.create\"\n}\n```\n\n### 5. 处理流式返回\n\n服务端会通过 WebSocket 推送各种事件（Commands），你需要异步监听并处理：\n*   `response.audio.delta`: 包含生成的音频片段。\n*   `response.text.delta`: 包含生成的文本片段。\n*   `conversation.item.audio_transcription.completed`: 用户语音转写完成事件。\n\n---\n*注意：由于处于公共预览阶段 (Public Preview)，API 细节、命令格式及服务稳定性可能会发生变化。生产环境请务必关注官方最新文档。*","某跨境电商平台正在开发一款嵌入网站的智能客服助手，旨在为海外用户提供即时的语音咨询与售后支持服务。\n\n### 没有 aoai-realtime-audio-sdk 时\n- **交互延迟高**：传统方案需等待用户说完、录音上传、转文字、推理、合成语音再返回，导致对话停顿长达数秒，体验极不自然。\n- **开发复杂度大**：团队需自行拼接 WebSocket 连接、处理音频流的分片发送与接收，并手动管理会话状态，代码维护成本极高。\n- **打断机制缺失**：系统无法在模型回答过程中识别用户的插话（Barge-in），必须等机器说完才能响应新指令，显得笨拙。\n- **多模态协同难**：难以在同一低延迟通道中灵活切换文本日志记录、函数调用（如查询订单）与实时语音流。\n\n### 使用 aoai-realtime-audio-sdk 后\n- **实现毫秒级响应**：借助 SDK 封装的 `\u002Frealtime` 接口，直接建立“语音进、语音出”的全双工通道，将对话延迟压缩至人类自然交流水平。\n- **快速集成落地**：利用 SDK 提供的参考实现和会话配置模板，开发者无需深究底层 WebSocket 协议细节，即可在 Azure 环境中快速跑通 Demo。\n- **支持自然插话**：基于 SDK 的流式架构，系统能实时感知用户中断行为并立即停止当前输出，转而处理新指令，交互更加拟人化。\n- **统一会话管理**：SDK 原生支持在同一连接中处理语音流、文本消息及工具调用，简化了订单查询等复杂业务逻辑的代码结构。\n\naoai-realtime-audio-sdk 通过标准化的实时音频流处理方案，帮助开发者以极低门槛构建出具备“真人般”反应速度的智能语音应用。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FAzure-Samples_aoai-realtime-audio-sdk_9b2eabf4.png","Azure-Samples","Azure Samples","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002FAzure-Samples_101a6251.png","Microsoft Azure code samples and examples in .NET, Java, Python, JavaScript, TypeScript, PHP and Ruby",null,"https:\u002F\u002Flearn.microsoft.com\u002Fazure","https:\u002F\u002Fgithub.com\u002FAzure-Samples",[84,88,92,96,100,104,108,112,115],{"name":85,"color":86,"percentage":87},"TypeScript","#3178c6",41.2,{"name":89,"color":90,"percentage":91},"Python","#3572A5",40,{"name":93,"color":94,"percentage":95},"C#","#178600",9.1,{"name":97,"color":98,"percentage":99},"Java","#b07219",6,{"name":101,"color":102,"percentage":103},"JavaScript","#f1e05a",1.2,{"name":105,"color":106,"percentage":107},"HTML","#e34c26",0.9,{"name":109,"color":110,"percentage":111},"CSS","#663399",0.6,{"name":113,"color":114,"percentage":111},"PowerShell","#012456",{"name":116,"color":117,"percentage":118},"Shell","#89e051",0.4,834,192,"2026-04-10T22:21:22","MIT","","未说明",{"notes":126,"python":124,"dependencies":127},"该项目并非本地运行的 AI 模型，而是一个用于连接 Azure OpenAI 或 OpenAI `\u002Frealtime` API 的客户端 SDK 示例库。运行环境仅需支持 WebSocket (wss:\u002F\u002F) 和网络请求的标准开发环境（如 Node.js、.NET、Python 等，具体取决于使用的示例代码）。无需本地 GPU、大显存或特定深度学习框架。核心需求是拥有 Azure OpenAI 资源（部署在 eastus2 或 swedencentral 区域）及有效的 API 密钥或 Entra ID 认证。注意：该项目目前不再积极维护，仅作为参考，官方建议使用 OpenAI 各语言官方库进行新开发。",[],[15,55],"2026-03-27T02:49:30.150509","2026-04-18T03:50:59.111002",[132,137,142,147,152,157],{"id":133,"question_zh":134,"answer_zh":135,"source_url":136},38722,"是否有 React JS 的示例代码？","是的，项目已添加 React 示例。维护者已在 Issue #64 中添加了 React 前端示例。如果您需要将 React 前端与 .NET API 后端结合使用，请注意音频播放问题：确保正确处理二进制数据（API 返回的是二进制数据，而非 JavaScript 版本中的 Base64 编码数据），避免音频播放缓慢或出现干扰。","https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fissues\u002F10",{"id":138,"question_zh":139,"answer_zh":140,"source_url":141},38723,"Python client_sample.py 无法工作，只有 no_vad 版本能运行，如何解决？","该问题通常由版本缺陷引起。维护者已发布修复版本（0.5.0）。请升级库：`pip install --upgrade rt-client`。如果升级后出现 `TypeError: RTLowLevelClient._get_azure_params() missing 1 required positional argument: 'self'` 错误，请确保完全卸载旧版本并重新安装最新版本，同时检查代码中是否正确实例化了客户端类。","https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fissues\u002F35",{"id":143,"question_zh":144,"answer_zh":145,"source_url":146},38724,"运行 npm install 时提示 rt-client tarball 损坏或找不到依赖，如何解决？","这通常是因为缺少 `jq` 工具导致 `download-pkg.sh` 脚本执行失败。请先安装 `jq`（macOS 用户可使用 `brew install jq`，Linux 用户可使用 `apt-get install jq` 或 `yum install jq`），然后重新运行下载脚本。确保 `package.json` 中包含正确的依赖项：`\"rt-client\": \"file:..\u002Frt-client-\u003Cversion>.tgz\"`。","https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fissues\u002F8",{"id":148,"question_zh":149,"answer_zh":150,"source_url":151},38725,"Python 示例目录中的 download-pkg.sh 脚本不存在，应该如何下载依赖包？","文档中的脚本名称有误。实际脚本名为 `download-wheel.sh`，位于 `python\u002Fsamples\u002F` 目录下。请使用以下命令下载：`.\u002Fdownload-wheel.sh`。此外，安装命令中的版本号不应硬编码，建议使用通配符或动态获取最新版本，例如：`pip install rtclient-*-py3-none-any.whl`。","https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fissues\u002F31",{"id":153,"question_zh":154,"answer_zh":155,"source_url":156},38726,"此实时 WebSocket SDK 未来是否会与非实时 API（如 Assistants API）统一？","目前官方 Python 和 JavaScript 库的完整功能融合时间表尚未确定（TBD），但已在路线图中。.NET 库已在最新版本中包含预览支持。在过渡期间，本仓库提供的独立 Python 和 JavaScript 库将继续维护和改进，以便开发者更容易上手 `\u002Frealtime` 协议。建议暂时将实时 SDK 作为独立模块使用，并关注官方库的更新。","https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fissues\u002F25",{"id":158,"question_zh":159,"answer_zh":160,"source_url":161},38727,"如何处理函数调用结果并将其返回给大模型进行语音输出重构？","是的，应使用 `FunctionCallOutputItem` 将函数执行结果传回。具体步骤：1) 接收 `RTFunctionCallItem` 并解析参数；2) 执行对应函数获取结果；3) 构造 `FunctionCallOutputItem` 并通过 `send_item` 发送回客户端。注意：如果使用异步函数，必须等待所有异步操作完成后再调用 `client.close()`，否则可能导致连接提前关闭而丢失响应。","https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fissues\u002F120",[163,168,173,178,183,188,192,196,200,204,208,212,216,220,224],{"id":164,"version":165,"summary_zh":166,"released_at":167},314643,"js\u002Fv0.5.5","## 变更内容\n\n* [JS] 在 WebSocket 关闭时添加连接关闭消息，由 @glharper 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F148 中实现\n\n## 新贡献者\n* @glharper 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F148 中完成了首次贡献\n\n**完整变更日志**: https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fcompare\u002Fpy\u002Fv0.5.4...js\u002Fv0.5.5","2025-08-26T19:55:25",{"id":169,"version":170,"summary_zh":171,"released_at":172},314644,"py\u002Fv0.5.4","## 变更内容\n* 由 @raghu017 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F139 中处理转录增量事件和未知事件\n* 由 @glecaros 准备发布 0.5.4 版本，详情见 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F140\n\n## 新贡献者\n* @raghu017 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F139 中完成了首次贡献\n\n**完整变更日志**: https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fcompare\u002Fpy\u002Fv0.5.3...py\u002Fv0.5.4","2025-06-16T20:28:35",{"id":174,"version":175,"summary_zh":176,"released_at":177},314645,"js\u002Fv0.5.2","## 变更内容\n* [js] 修复：将 ResponseCreateParams 中的可选参数设为可选。由 @glecaros 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F108 中完成。\n\n**完整变更日志**：https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fcompare\u002Fjs\u002Fv0.5.1...js\u002Fv0.5.2","2024-12-17T20:56:29",{"id":179,"version":180,"summary_zh":181,"released_at":182},314646,"py\u002Fv0.5.3","## 变更内容\n* 添加缺失的导出 + 响应竞态问题。由 @glecaros 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F61 中完成。\n* [js] 为 RTResponse 添加类型，并为 RTMessageItem 添加更多字段。由 @glecaros 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F65 中完成。\n* 由 @glecaros 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F64 中添加了 RTClient 的 React 示例。\n* 由 @yulin-li 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F71 中修复了 JavaScript 示例中的播放器性能问题。\n* .NET 示例：针对昨日新发布的 Beta 版本进行初步更新。由 @trrwilson 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F72 中完成。\n* .NET 示例：为 beta.2 更新 README 文件。由 @trrwilson 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F73 中完成。\n* 修复：在 README 中的拼写错误。由 @ry0y4n 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F81 中完成。\n* 由 @bgrgv 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F79 中更新了 rt-client。\n* 将 \u002Fpython 目录下的 aiohttp 从 3.10.5 升级至 3.10.11。由 @dependabot 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F87 中完成。\n* 将 \u002Fjavascript\u002Fstandalone 目录下的 cookie 和 @bundled-es-modules\u002Fcookie 升级。由 @dependabot 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F89 中完成。\n* 将 \u002Fsamples\u002Fjavascript\u002Freact 目录下的 cross-spawn 从 7.0.3 升级至 7.0.6。由 @dependabot 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F90 中完成。\n* 由 @xwang-otterai 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F88 中在 models.py 中添加更多支持的语音。\n* 将 OpenAI 支持的所有语音添加到 JavaScript 库中。由 @glecaros 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F92 中完成。\n* [JavaScript] 在 JavaScript 示例应用中添加“清除全部”按钮。由 @MikeYan01 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F91 中完成。\n* 修复了 Python 和 JavaScript SDK 中缺失的使用说明。由 @BetterAndBetterII 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F84 中完成。\n* 修复 JavaScript package.json 配置。由 @glecaros 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F93 中完成。\n* 将 RTInputAudioItem 的 id 设置为公共字段，并升级 JavaScript 和 Python 版本。由 @glecaros 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F97 中完成。\n\n## 新贡献者\n* @ry0y4n 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F81 中完成了首次贡献。\n* @bgrgv 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F79 中完成了首次贡献。\n* @dependabot 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F87 中完成了首次贡献。\n* @xwang-otterai 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F88 中完成了首次贡献。\n* @MikeYan01 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F91 中完成了首次贡献。\n* @BetterAndBetterII 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F84 中完成了首次贡献。\n\n**完整变更日志","2024-12-05T19:33:06",{"id":184,"version":185,"summary_zh":186,"released_at":187},314647,"js\u002Fv0.5.1","## 变更内容\n* @glecaros 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F64 中添加了 RTClient 的 React 示例\n* @yulin-li 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F71 中修复了 JavaScript 示例中的播放器性能问题\n* .NET 示例：@trrwilson 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F72 中为昨日发布的全新 Beta 版本进行了初步更新\n* .NET 示例：@trrwilson 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F73 中更新了 Beta.2 版本的 README 文件\n* 修复：@ry0y4n 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F81 中修正了 README 文件中的拼写错误\n* @bgrgv 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F79 中更新了 rt-client\n* 在 \u002Fpython 目录下，@dependabot 将 aiohttp 从 3.10.5 升级至 3.10.11，详见 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F87\n* 在 \u002Fjavascript\u002Fstandalone 目录下，@dependabot 将 cookie 和 @bundled-es-modules\u002Fcookie 升级，详见 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F89\n* 在 \u002Fsamples\u002Fjavascript\u002Freact 目录下，@dependabot 将 cross-spawn 从 7.0.3 升级至 7.0.6，详见 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F90\n* @xwang-otterai 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F88 中在 models.py 文件中增加了更多支持的语音选项\n* @glecaros 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F92 中将 OpenAI 支持的所有语音添加到了 JavaScript 库中\n* [JavaScript] @MikeYan01 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F91 中为 JavaScript 示例应用添加了一个“清除所有”按钮\n* @BetterAndBetterII 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F84 中修复了 Python 和 JavaScript SDK 中缺失的使用说明\n* @glecaros 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F93 中修复了 JavaScript 的 package.json 配置\n* @glecaros 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F97 中将 RTInputAudioItem 的 id 设置为公共字段，并升级了 JavaScript 和 Python 版本\n\n## 新贡献者\n* @ry0y4n 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F81 中完成了首次贡献\n* @bgrgv 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F79 中完成了首次贡献\n* @dependabot 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F87 中完成了首次贡献\n* @xwang-otterai 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F88 中完成了首次贡献\n* @MikeYan01 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F91 中完成了首次贡献\n* @BetterAndBetterII 在 https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fpull\u002F84 中完成了首次贡献\n\n**完整变更日志**：https:\u002F\u002Fgithub.com\u002FAzure-Samples\u002Faoai-realtime-audio-sdk\u002Fcompare\u002Fjs\u002Fv0.5.0...js\u002Fv0.5.1","2024-12-05T19:27:37",{"id":189,"version":190,"summary_zh":80,"released_at":191},314648,"js\u002Fv0.5.0","2024-10-28T19:52:47",{"id":193,"version":194,"summary_zh":80,"released_at":195},314649,"py\u002Fv0.5.2","2024-10-25T19:19:30",{"id":197,"version":198,"summary_zh":80,"released_at":199},314650,"js\u002Fv0.4.7","2024-10-16T19:49:58",{"id":201,"version":202,"summary_zh":80,"released_at":203},314651,"py\u002Fv0.5.1","2024-10-15T16:30:47",{"id":205,"version":206,"summary_zh":80,"released_at":207},314652,"py\u002Fv0.5.0","2024-10-14T16:53:08",{"id":209,"version":210,"summary_zh":80,"released_at":211},314653,"py\u002Fv0.4.4","2024-10-04T05:15:29",{"id":213,"version":214,"summary_zh":80,"released_at":215},314654,"py\u002Fv0.4.3","2024-10-03T23:31:48",{"id":217,"version":218,"summary_zh":80,"released_at":219},314655,"py\u002Fv0.4.2","2024-10-03T04:15:26",{"id":221,"version":222,"summary_zh":80,"released_at":223},314656,"py\u002Fv0.4.1","2024-10-02T21:51:48",{"id":225,"version":226,"summary_zh":227,"released_at":228},314657,"v0.1.0-beta.1","`aoai-realtime-audio-sdk` 示例所使用的初始库支持。Python 和 JavaScript 由仓库源代码构建；.NET 则基于 https:\u002F\u002Fgithub.com\u002Fopenai\u002Fopenai-dotnet 构建。","2024-10-01T16:42:51"]