[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-microsoft--aici":3,"tool-microsoft--aici":62},[4,18,26,36,46,54],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",160784,2,"2026-04-19T11:32:54",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":42,"last_commit_at":43,"category_tags":44,"status":17},8272,"opencode","anomalyco\u002Fopencode","OpenCode 是一款开源的 AI 编程助手（Coding Agent），旨在像一位智能搭档一样融入您的开发流程。它不仅仅是一个代码补全插件，而是一个能够理解项目上下文、自主规划任务并执行复杂编码操作的智能体。无论是生成全新功能、重构现有代码，还是排查难以定位的 Bug，OpenCode 都能通过自然语言交互高效完成，显著减少开发者在重复性劳动和上下文切换上的时间消耗。\n\n这款工具专为软件开发者、工程师及技术研究人员设计，特别适合希望利用大模型能力来提升编码效率、加速原型开发或处理遗留代码维护的专业人群。其核心亮点在于完全开源的架构，这意味着用户可以审查代码逻辑、自定义行为策略，甚至私有化部署以保障数据安全，彻底打破了传统闭源 AI 助手的“黑盒”限制。\n\n在技术体验上，OpenCode 提供了灵活的终端界面（Terminal UI）和正在测试中的桌面应用程序，支持 macOS、Windows 及 Linux 全平台。它兼容多种包管理工具，安装便捷，并能无缝集成到现有的开发环境中。无论您是追求极致控制权的资深极客，还是渴望提升产出的独立开发者，OpenCode 都提供了一个透明、可信",144296,1,"2026-04-16T14:50:03",[13,45],"插件",{"id":47,"name":48,"github_repo":49,"description_zh":50,"stars":51,"difficulty_score":32,"last_commit_at":52,"category_tags":53,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",109154,"2026-04-18T11:18:24",[14,15,13],{"id":55,"name":56,"github_repo":57,"description_zh":58,"stars":59,"difficulty_score":32,"last_commit_at":60,"category_tags":61,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[45,13,15,14],{"id":63,"github_repo":64,"name":65,"description_en":66,"description_zh":67,"ai_summary_zh":68,"readme_en":69,"readme_zh":70,"quickstart_zh":71,"use_case_zh":72,"hero_image_url":73,"owner_login":74,"owner_name":75,"owner_avatar_url":76,"owner_bio":77,"owner_company":78,"owner_location":78,"owner_email":79,"owner_twitter":80,"owner_website":81,"owner_url":82,"languages":83,"stars":123,"forks":124,"last_commit_at":125,"license":126,"difficulty_score":127,"env_os":128,"env_gpu":129,"env_ram":130,"env_deps":131,"category_tags":143,"github_topics":144,"view_count":32,"oss_zip_url":78,"oss_zip_packed_at":78,"status":17,"created_at":158,"updated_at":159,"faqs":160,"releases":191},9835,"microsoft\u002Faici","aici","AICI: Prompts as (Wasm) Programs","AICI（人工智能控制器接口）是一款由微软研究院开发的开源工具，旨在让开发者能够实时约束和引导大语言模型（LLM）的输出。它将提示词转化为可执行的 WebAssembly (Wasm) 程序，使“控制器”能在模型生成每一个 token 的过程中动态介入，从而实现受控解码、内容动态编辑以及多并行生成的协调管理。\n\nAICI 主要解决了大模型输出不可控、难以严格遵循特定规则或逻辑的痛点。通过抽象底层推理引擎的细节，它让构建复杂的控制策略（如程序化解码或多智能体对话）变得更加简单高效，同时兼容 llama.cpp、HuggingFace Transformers 等多种主流推理后端。\n\n这款工具特别适合 AI 研究人员、系统工程师及高级开发者使用，尤其是那些希望深入定制模型行为、探索新型控制算法或需要高安全性沙箱环境的团队。其核心技术亮点在于利用轻量级 Wasm 模块运行控制逻辑：这些模块能与 GPU 上的模型推理并行在 CPU 上执行，既充分利用了计算资源，又几乎不增加生成延迟；同时，Wasm 沙箱机制确保了控制器无法访问文件系统或网络，保障了运行安全。目前 AICI 仍处于原型阶段，为上","AICI（人工智能控制器接口）是一款由微软研究院开发的开源工具，旨在让开发者能够实时约束和引导大语言模型（LLM）的输出。它将提示词转化为可执行的 WebAssembly (Wasm) 程序，使“控制器”能在模型生成每一个 token 的过程中动态介入，从而实现受控解码、内容动态编辑以及多并行生成的协调管理。\n\nAICI 主要解决了大模型输出不可控、难以严格遵循特定规则或逻辑的痛点。通过抽象底层推理引擎的细节，它让构建复杂的控制策略（如程序化解码或多智能体对话）变得更加简单高效，同时兼容 llama.cpp、HuggingFace Transformers 等多种主流推理后端。\n\n这款工具特别适合 AI 研究人员、系统工程师及高级开发者使用，尤其是那些希望深入定制模型行为、探索新型控制算法或需要高安全性沙箱环境的团队。其核心技术亮点在于利用轻量级 Wasm 模块运行控制逻辑：这些模块能与 GPU 上的模型推理并行在 CPU 上执行，既充分利用了计算资源，又几乎不增加生成延迟；同时，Wasm 沙箱机制确保了控制器无法访问文件系统或网络，保障了运行安全。目前 AICI 仍处于原型阶段，为上层控制库提供了高效、灵活且跨平台的基础设施支持。","# Artificial Intelligence Controller Interface (AICI)\n\n**[LLGuidance library](https:\u002F\u002Fgithub.com\u002Fguidance-ai\u002Fllguidance) is an actively maintained evolution and specialization of AICI, recommended if all you want is constrained decoding.**\n\nThe Artificial Intelligence Controller Interface (AICI) lets you build Controllers that constrain and direct output of a Large Language Model (LLM) in real time.\nControllers are flexible programs capable of implementing constrained decoding, dynamic editing of prompts and generated text, and coordinating execution across multiple, parallel generations.\nControllers incorporate custom logic during the token-by-token decoding and maintain state during an LLM request. This allows diverse Controller strategies, from programmatic or query-based decoding to multi-agent conversations to execute efficiently in tight integration with the LLM itself.\n\n**The purpose of AICI is to make it easy to build and experiment with both existing and entirely new Controller strategies for improving LLM generations.**\nBy abstracting away implementation details of the underlying LLM inference and serving engine, AICI aims to simplify the development of Controllers, make it easier to \nwrite fast Controllers, and ease compatibility across LLM inference and serving engines.\n\nAICI is designed for both local and cloud execution, including (eventually) multi-tenant LLM deployments.\nControllers are implemented as light-weight WebAssembly (Wasm) modules which run on the same machine as the LLM inference engine, utilizing the CPU while the GPU is busy with token generation.\nAICI is one layer in the inference stack, and is designed to allow control libraries such as Guidance, LMQL, and others to run on top of it and gain both efficiency and performance improvements, as well as portability across LLM inference and serving engines.\n\nAICI currently integrates with llama.cpp, HuggingFace Transformers, and rLLM (custom tch-based LLM inference engine), with vLLM in the works.\n\nAICI is:\n\n- [Flexible](#flexibility): Controllers can be written in any language that can compile to Wasm (Rust, C, C++, ...),\n  or be interpreted inside Wasm (Python, JavaScript, ...)\n- [Secure](#security): Controllers are sandboxed and cannot access the filesystem, network, or any other resources\n- [Fast](#performance): Wasm modules are compiled to native code and run in parallel with the LLM inference engine, inducing only a\n  minimal overhead to the generation process\n\nAICI is a prototype, designed and built at [Microsoft Research](https:\u002F\u002Fwww.microsoft.com\u002Fen-us\u002Fresearch\u002F).\n\n# Table of Contents\n\n- [Artificial Intelligence Controller Interface (AICI)](#artificial-intelligence-controller-interface-aici)\n- [QuickStart: Example Walkthrough](#quickstart-example-walkthrough)\n  - [Development Environment Setup](#development-environment-setup)\n  - [Build and start rLLM server and AICI Runtime](#build-and-start-rllm-server-and-aici-runtime)\n  - [Control AI output using AICI controllers](#control-ai-output-using-aici-controllers)\n- [Comprehensive Guide: Exploring Further](#comprehensive-guide-exploring-further)\n- [Architecture](#architecture)\n- [Security](#security)\n- [Performance](#performance)\n- [Flexibility](#flexibility)\n- [Acknowledgements](#acknowledgements)\n- [Contributing](#contributing)\n- [Trademarks](#trademarks)\n\n# QuickStart: Example Walkthrough\n\nIn this quickstart, we'll guide you through the following steps:\n\n* Set up **rLLM Server** and **AICI Runtime**.\n* Build and deploy a **Controller**.\n* Use AICI to control LLM output, so you can **customize a LLM to follow specific rules** when generating text.\n\n## Development Environment Setup\n\nTo compile AICI components, you need to set up your development environment for Rust. For this quickstart you also need Python 3.11 or later to create a controller.\n\n### Windows WSL \u002F Linux \u002F macOS\n\n> [!NOTE]\n> **Windows users**: please use WSL2 or the included [devcontainer](https:\u002F\u002Fcontainers.dev). Adding native Windows support [is tracked here](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Faici\u002Fissues\u002F42).\n> \n> **MacOS users**: please make sure you have XCode command line tools installed by running `xcode-select -p` and, if not installed, run `xcode-select --install`.\n>\n> **CUDA**: the CUDA build relies on specific libtorch installation. It's highly recommended you use the included devcontainer.\n\nIf you're using devcontainer, you can skip to the [next section](#build-and-start-rllm-server-and-aici-runtime).\n\nUsing the system package manager, install the necessary tools for building code in the repository, including `git`, `cmake` and `ccache`. \n\nFor instance in WSL \u002F Ubuntu using `apt`:\n\n    sudo apt-get install --assume-yes --no-install-recommends \\\n        build-essential cmake ccache pkg-config libssl-dev libclang-dev clang llvm-dev git-lfs\n\nor using Homebrew on macOS:\n\n    brew install git cmake ccache\n\nThen install **Rust, Rustup and Cargo**, following the instructions provided [here](https:\u002F\u002Fdoc.rust-lang.org\u002Fcargo\u002Fgetting-started\u002Finstallation.html) and [here](https:\u002F\u002Fwww.rust-lang.org\u002Flearn\u002Fget-started):\n\n    curl --proto '=https' --tlsv1.2 -sSf https:\u002F\u002Fsh.rustup.rs | sh\n\nAfter installation, verify that the `rustup --version` command is accessible by running it from the terminal. If the command isn't recognized, try opening a new terminal session.\n  \nNext install wasm32-wasi Rust component:\n    \n    rustup target add wasm32-wasi\n\nIf you already had Rust installed, or are getting complaints from Cargo about outdated versions, run:\n\n    rustup update\n\nLast, to work with **Python** controllers and scripts (like this tutorial), run this command to install the required packages:\n\n    pip install pytest pytest-forked ujson posix_ipc numpy requests\n\n\n## Build and start rLLM server and AICI Runtime\n\nThe rLLM server has two backends, one based on `libtorch` and CUDA\n(`rllm-cuda`), and the other based on `llama.cpp` (`rllm-llamacpp`).\n\nThe `rllm-cuda` backend only works with NVidia GPUs with compute capability 8.0 or later\n(A100 and later; RTX 30x0 and later) and requires a fiddly setup of libtorch\n-- it's strongly recommended to use the included devcontainer.\nWhile this guide focuses on the `rllm-llamacpp` backend,\nthe build steps are the same for `rllm-cuda`, modulo the folder name.\n\nAfter [dev env setup](#development-environment-setup) above,\nclone the AICI repository and proceed with the next steps outlined below.\n\nUse the following command to build and run `aicirt` and `rllm-llamacpp`:\n\n    cd rllm\u002Frllm-llamacpp\n    .\u002Fserver.sh phi2\n\nYou can pass other model names as argument (run `.\u002Fserver.sh` without arguments to see available models).\nYou can also use a HuggingFace URL to `.gguf` file or a local path to a `.gguf` file.\n(For `rllm-cuda` use HuggingFace model id or path to folder).\n\n    .\u002Fserver.sh orca\n\nYou can find more details about `rllm-llamacpp` [here](rllm\u002Frllm-llamacpp\u002FREADME.md).\n\nThe rLLM server provides a HTTP interface, utilized for configuration tasks and processing requests. You can also use this interface to promptly verify its status. For instance, if you open http:\u002F\u002F127.0.0.1:4242\u002Fv1\u002Fmodels, you should see:\n\n```json\n{\n  \"object\": \"list\",\n  \"data\": [\n    {\n      \"object\": \"model\",\n      \"id\": \"TheBloke\u002Fphi-2-GGUF\",\n      \"created\": 946810800,\n      \"owned_by\": \"owner\"\n    }\n  ]\n}\n```\n\nconfirming that the selected model is loaded.\n\n## Control AI output using AICI controllers\n\nAICI allows hosting custom logic, called **Controllers**, that initiate, terminate, and interact with LLMs token generation. Controllers take input arguments, process them, and return a result with logs, LLM tokens, and variables.\n\nThe repository includes some examples, in particular:\n\n* **jsctrl**: a controller that accepts JavaScript code as input for execution. This code can interact with the model to generate text and tokens.\n* **pyctrl**: a controller that accepts Python code as input for execution. This code can also interact with the model to generate text and tokens.\n\nIn this example we'll utilize **pyctrl** to manage token generation using a simple **Python script**.\nIf you want, you can [build and upload pyctrl](.\u002Fcontrollers\u002Fpyctrl\u002FREADME.md),\nhowever by default the server will automatically\ndownload the [latest release](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Faici\u002Freleases\u002Flatest) of pyctrl from GitHub.\n\nIn general, controllers require building and deployment, while scripts (Python or JavaScript) are sent with each request.\n\nThe following illustrates the relationship between the rLLM server, the AICI runtime, and the controller:\n\n```mermaid\nerDiagram\n    Host    ||--|{ CPU : \"\"\n    Host    ||--|{ GPU : \"\"\n    \n    CPU     ||--|| \"rLLM Server\" : execute\n    CPU     ||--|{ \"AICI Runtime\" : execute\n\n    \"AICI Runtime\" ||--|| \"Controller\" : instantiate\n\n    GPU     ||--|{ \"LLM token generation\" : execute\n```\n\n### Controlling the LLM token generation\n\nSuppose we aim for a model to generate a list, adhering to a specific format and containing only five items.\n\nTypically, achieving this involves prompt engineering, crafting the prompt precisely with clear instructions, such as:\n\n    What are the five most popular types of vehicles?\n    Return the result as a numbered list.\n    Do not add explanations, only the list.\n\nThe prompt would also vary depending on the model in use, given that each model tends to add explanations and understands instructions in different ways.\n\nWith AICI, we shift control back to code, and we can simplify the prompt to:\n\n    What are the most popular types of vehicles?\n\nusing code to:\n\n1. Limit the list to 5 items\n2. Prevent the model from adding some initial explanation\n3. Format to a numbered list\n4. Stop the model from adding some text after the list.\n\nLet's create a `list-of-five.py` python file with the following content:\n\n```python\nimport pyaici.server as aici\n\n# Force the model to generate a well formatted list of 5 items, e.g.\n#   1. name 1\n#   2. name 2\n#   3. name 3\n#   4. name 4\n#   5. name 5\nasync def main():\n    \n    # This is the prompt we want to run.\n    # Note how the prompt doesn't mention a number of vehicles or how to format the result.\n    prompt = \"What are the most popular types of vehicles?\\n\"\n\n    # Tell the model to generate the prompt string, ie. let's start with the prompt \"to complete\"\n    await aici.FixedTokens(prompt)\n\n    # Store the current position in the token generation process\n    marker = aici.Label()\n\n    for i in range(1,6):\n      # Tell the model to generate the list number\n      await aici.FixedTokens(f\"{i}.\")\n\n      # Wait for the model to generate a vehicle name and end with a new line\n      await aici.gen_text(stop_at = \"\\n\")\n\n    await aici.FixedTokens(\"\\n\")\n\n    # Store the tokens generated in a result variable\n    aici.set_var(\"result\", marker.text_since())\n\naici.start(main())\n```\n\nRunning the script is not too different from sending a prompt. In this case, we're sending control logic and instructions all together.\n\nTo see the final result, execute the following command:\n\n    .\u002Faici.sh run list-of-five.py\n\nResult:\n```\nRunning with tagged AICI Controller: gh:microsoft\u002Faici\u002Fpyctrl\n[0]: FIXED 'What are the most popular types of vehicles?\\n'\n[0]: FIXED '1.'\n[0]: GEN ' Cars\\n'\n[0]: FIXED '2.'\n[0]: GEN ' Motorcycles\\n'\n[0]: FIXED '3.'\n[0]: GEN ' Bicycles\\n'\n[0]: FIXED '4.'\n[0]: GEN ' Trucks\\n'\n[0]: FIXED '5.'\n[0]: GEN ' Boats\\n'\n[0]: FIXED '\\n'\n[DONE]\n[Response] What are the most popular types of vehicles?\n1. Cars\n2. Motorcycles\n3. Bicycles\n4. Trucks\n5. Boats\n\nresponse saved to tmp\u002Fresponse.json\nUsage: {'sampled_tokens': 16, 'ff_tokens': 37, 'cost': 69}\nTiming: {'http_response': 0.05193686485290527, 'data0': 0.05199289321899414, 'first_token': 0.0658726692199707, 'last_token': 0.1784682273864746}\nTokens\u002Fsec: {'prompt': 861.0913072488067, 'sampling': 89.65181217019571}\nStorage: {'result': '1. Cars\\n2. Motorcycles\\n3. Bicycles\\n4. Trucks\\n5. Boats\\n\\n'}\n```\n\n# Comprehensive Guide: Exploring Further\n\nThis repository contains a number of components, and which ones you need depends on your use case.\n\nYou can **use an existing controller module**.\nWe provide [PyCtrl](.\u002Fcontrollers\u002Fpyctrl) and [JsCtrl](.\u002Fcontrollers\u002Fjsctrl)\nthat let you script controllers using server-side Python and JavaScript, respectively.\nThe [pyaici](.\u002Fpy\u002Fpyaici) package contains `aici` command line tool that lets you\n[upload and run scripts](.\u002Fdocs\u002Fproxy.md) with any controller\n(we also provide [REST API definition](.\u002Fdocs\u002FREST.md) for the curious).\n> 🧑‍💻[Python code samples for scripting PyCtrl](.\u002Fcontrollers\u002Fpyctrl) and a [JavaScript Hello World for JSCtrl](.\u002Fcontrollers\u002Fjsctrl\u002Fsamples\u002Fhello.js)\n\nWe anticipate [libraries](#architecture) will be built on top of controllers.\nWe provide an example in [promptlib](.\u002Fpy\u002Fpromptlib) - a client-side Python library\nthat generates interacts with [DeclCtrl](.\u002Fcontrollers\u002Fdeclctrl) via the pyaici package.\n> 🧑‍💻 [Example notebook that uses PromptLib to interact with DeclCtrl](.\u002Fpy\u002Fpromptlib\u002Fnotebooks\u002Fbasics_tutorial.ipynb).\n\nThe controllers can be run in a cloud or local AICI-enabled LLM inference engine.\nYou can **run the provided reference engine (rLLM) locally** with either\n[libtorch+CUDA](.\u002Frllm\u002Frllm-cuda) or [llama.cpp backend](.\u002Frllm\u002Frllm-llamacpp).\n\nTo **develop a new controller**, use a Rust [starter project](.\u002Fcontrollers\u002Fuppercase) that shows usage of [aici_abi](.\u002Fcontrollers\u002Faici_abi)\nlibrary, which simplifies implementing the [low-level AICI interface](controllers\u002Faici_abi\u002FREADME.md#low-level-interface).\n> 🧑‍💻[Sample code for a minimal new controller](.\u002Fcontrollers\u002Fuppercase) to get you started\n\nTo **add AICI support to a new LLM inference engine**,\nyou will need to implement LLM-side of the [protocol](docs\u002Faicirt-proto.md)\nthat talks to [AICI runtime](aicirt).\n\nFinally, you may want to modify any of the provided components - PRs are most welcome!\n\n# Architecture\n\nAICI abstracts LLM inference engine from the controller and vice-versa, as in the picture below.\nThe rounded nodes are aspirational.\nAdditional layers can be built on top - we provide [promptlib](py\u002Fpromptlib),\nbut we strongly believe that\n[Guidance](https:\u002F\u002Fgithub.com\u002Fguidance-ai\u002Fguidance),\n[LMQL](https:\u002F\u002Flmql.ai\u002F),\n[SGLang](https:\u002F\u002Fgithub.com\u002Fsgl-project\u002Fsglang),\n[Outlines](https:\u002F\u002Fgithub.com\u002Foutlines-dev\u002Foutlines),\n[jsonformer](https:\u002F\u002Fgithub.com\u002F1rgs\u002Fjsonformer),\n[LMFE](https:\u002F\u002Fgithub.com\u002Fnoamgat\u002Flm-format-enforcer),\netc.\ncan also run on top of AICI (either with custom controllers or utilizing PyCtrl or JsCtrl).\n\n```mermaid\ngraph TD\n    PyCtrl -- AICI --> aicirt[AICI-runtime]\n    JsCtrl -- AICI --> aicirt\n    guidance([GuidanceCtrl]) -- AICI --> aicirt\n    lmql([LMQL Ctrl]) -- AICI --> aicirt\n    aicirt -- POSIX SHM --> rLLM\n    aicirt -- POSIX SHM --> llama[llama.cpp]\n    aicirt -- POSIX SHM --> pyaici\n    pyaici -- Python --> vLLM(vLLM)\n    pyaici -- Python --> hf[HF Transformers]\n```\n\nThe [pyaici](py\u002Fpyaici) package makes it easier to integrate AICI with Python-based LLM inference engines.\nTake a look at integration with [HuggingFace Transformers](scripts\u002Fpy\u002Frun_hf.py),\nthough note that it doesn't support forking (generation of multiple sequences in parallel).\nThe [vLLM REST server](scripts\u002Fpy\u002Fvllm_server.py) is currently out of date.\nPlease use the [rLLM-cuda](rllm\u002Frllm-cuda) or [rLLM-llama.cpp](rllm\u002Frllm-llamacpp) for now.\n\n# Security\n\n- `aicirt` runs in a separate process, and can run under a different user than the LLM engine\n- Wasm modules are [sandboxed by Wasmtime](https:\u002F\u002Fdocs.wasmtime.dev\u002Fsecurity.html)\n- Wasm only have access to [`aici_host_*` functions](controllers\u002Faici_abi\u002Fsrc\u002Fhost.rs),\n  implemented in [hostimpl.rs](aicirt\u002Fsrc\u002Fhostimpl.rs)\n- `aicirt` also exposes a partial WASI interface; however almost all the functions are no-op, except\n  for `fd_write` which shims file descriptors 1 and 2 (stdout and stderr) to print debug messages\n- each Wasm module runs in a separate process, helping with Spectre\u002FMeltdown mitigation\n  and allowing limits on CPU usage\n\nIn particular, Wasm modules cannot access the filesystem, network, or any other resources.\nThey also cannot spin threads or access any timers (this is relevant for Spectre\u002FMeltdown attacks).\n\n# Performance\n\nMost of computation in AICI Controllers occurs on the CPU, in parallel with the logit generation on the GPU.\nThe generation occurs in steps, where logits are generated in parallel for a new token for each sequence in a batch\n(typically between 1 and 50).\nThis involves reading the whole model and KV caches for sequences in the batch from the GPU memory.\nFor optimal batch throughput, the model and KV caches should utilize a major fraction of the GPU memory,\nand reading the whole memory takes about 40ms on A100 GPU (80GB).\n\nThus, each step of generation takes on the order of 20-50ms.\nWith careful engineering,\nthis is more than enough to compute the set of allowed tokens in Rust compiled to Wasm.\nThese can be combined either natively in Rust, or via Python or JavaScript interpreters\nwe provide.\n\nFor example, computing allowed token set in the 32000-strong vocabulary of Llama model takes:\n\n- about 2.0ms for Yacc grammar of the C programming language\n- about 0.3ms for a regular expression\n- about 0.2ms for a substring constraint, from 4kB string\n\nThe above numbers are for a single sequence, however each sequence is processed in separate process,\nand thus if there is more cores than sequences (which is typical), they do not change.\nThey also include overhead of calling into Python interpreter implemented in Wasm, and then back into\nRust-generated Wasm code for the constraint itself.\nThey are all well within the 20-50ms budget, so do not affect the generation time at all.\n\nThere is also some overhead in the critical path of sampling. It comes down to about 0.3ms per generation step\nwhen executing 10 sequences in parallel (this is irrespective of the constraint used).\nThe overhead goes up to around 0.7ms for 40 sequences (though it has not been fully optimized yet).\n\nWebAssembly is designed to have minimal overhead, compared to native code.\nIn our experience, [highly optimized](controllers\u002Faici_abi\u002Fimplementation.md#token-trie)\nRust code is less than 2x slower when run in\n[Wasmtime](https:\u002F\u002Fwasmtime.dev\u002F) than native.\nThis is 10-100x better than JavaScript or Python.\n\nAll measurements done on AMD EPYC 7V13 with nVidia A100 GPU with 80GB of VRAM.\n\n# Flexibility\n\nThe low-level interface that AICI runtime provides allows for:\n\n- interaction with the LLM inference engine before, during, and after every generated token\n- constraining decoding to a set of tokens\n- backtracking KV-cache to a previous state\n- fast-forwarding several tokens at a time (if they are known)\n- forking generation into multiple branches\n- communication between forks via shared variables\n- utility functions for converting between tokens and byte strings\n\nIt can be utilized from any language that compiles to Wasm.\n\nThis repository provides a Rust library that makes it easy to implement controllers in Rust,\nand provides [efficient implementations](controllers\u002Faici_abi\u002Fimplementation.md)\nof specific constraints ([regular expressions](controllers\u002Faici_abi\u002FREADME.md#regular-expressions),\n[yacc grammars](controllers\u002Faici_abi\u002FREADME.md#lr1-grammars), substrings).\nWe also provide [Python](controllers\u002Fpyctrl) and [JavaScript](controllers\u002Fjsctrl) interpreters\nthat allow to glue these constraints together.\nAll of these can be easily extended.\n\n# Acknowledgements\n\n- [Flash Attention kernels](rllm\u002Ftch-cuda\u002Fkernels\u002Fflash_attn\u002F) are copied from\n  [flash-attention repo](https:\u002F\u002Fgithub.com\u002FDao-AILab\u002Fflash-attention);\n  see [BSD LICENSE](rllm\u002Ftch-cuda\u002Fkernels\u002Fflash_attn\u002FLICENSE)\n- [Paged Attention kernels](rllm\u002Ftch-cuda\u002Fkernels\u002Fvllm\u002F) are copied from\n  [vLLM repo](https:\u002F\u002Fgithub.com\u002Fvllm-project\u002Fvllm);\n  see [Apache LICENSE](rllm\u002Ftch-cuda\u002Fkernels\u002Fvllm\u002FLICENSE)\n- [OpenAI API definitions](rllm\u002Frllm-base\u002Fsrc\u002Fserver\u002Fopenai\u002F) are copied and modified from\n  [candle-vllm](https:\u002F\u002Fgithub.com\u002FEricLBuehler\u002Fcandle-vllm);\n  see [MIT LICENSE](rllm\u002Frllm-base\u002Fsrc\u002Fserver\u002Fopenai\u002FLICENSE)\n- [cache_engine.rs](rllm\u002Frllm-cuda\u002Fsrc\u002Fllm\u002Fpaged\u002Fcache_engine.rs),\n  [config.rs](rllm\u002Frllm-base\u002Fsrc\u002Fconfig.rs),\n  and [scheduler.rs](rllm\u002Frllm-base\u002Fsrc\u002Fscheduler.rs)\n  are loosely based on [vLLM](https:\u002F\u002Fgithub.com\u002Fvllm-project\u002Fvllm)\n- [llama.rs](rllm\u002Frllm-cuda\u002Fsrc\u002Fllm\u002Fllama.rs), [phi.rs](rllm\u002Frllm-cuda\u002Fsrc\u002Fllm\u002Fphi.rs)\n  and [logits.rs](rllm\u002Frllm-base\u002Fsrc\u002Flogits.rs) are based on\n  [candle-transformers](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fcandle\u002Ftree\u002Fmain\u002Fcandle-transformers)\n- specific [Python library](.\u002Fcontrollers\u002Fpyctrl\u002FLib\u002F) files are copied from\n  [RustPython](https:\u002F\u002Fgithub.com\u002FRustPython\u002FRustPython)\n  (as we only use a subset of them)\n- the [example ANSI C grammar](controllers\u002Faici_abi\u002Fgrammars\u002Fc.y) is based on\n  https:\u002F\u002Fwww.lysator.liu.se\u002Fc\u002FANSI-C-grammar-y.html by Jeff Lee (from 1985)\n\n# Citing this package\n\nIf you find the AI Controller Interface and its ideas for defining a new layer in the LLM inference stack useful, please cite the package using the following reference: \n\n* Michal Moskal, Madan Musuvathi, Emre Kıcıman. AI Controller Interface, (2024), GitHub repository. https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Faici\n\nBibtex:\n\n```bibtex\n@misc{Moskal2024,\n  author = {Moskal, Michal and Musuvathi, Madan and {K\\i c\\i man}, Emre},\n  title = {{AI Controller Interface}},\n  year = {2024},\n  publisher = {{GitHub}},\n  journal = {{GitHub} repository},\n  howpublished = {\\url{https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Faici\u002F}}\n}\n```\n\n# Contributing\n\nThis project welcomes contributions and suggestions. Most contributions require you to agree to a\nContributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us\nthe rights to use your contribution. For details, visit https:\u002F\u002Fcla.opensource.microsoft.com.\n\nWhen you submit a pull request, a CLA bot will automatically determine whether you need to provide\na CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions\nprovided by the bot. You will only need to do this once across all repos using our CLA.\n\nThis project has adopted the [Microsoft Open Source Code of Conduct](https:\u002F\u002Fopensource.microsoft.com\u002Fcodeofconduct\u002F).\nFor more information see the [Code of Conduct FAQ](https:\u002F\u002Fopensource.microsoft.com\u002Fcodeofconduct\u002Ffaq\u002F) or\ncontact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.\n\n# Trademarks\n\nThis project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft\ntrademarks or logos is subject to and must follow\n[Microsoft's Trademark & Brand Guidelines](https:\u002F\u002Fwww.microsoft.com\u002Fen-us\u002Flegal\u002Fintellectualproperty\u002Ftrademarks\u002Fusage\u002Fgeneral).\nUse of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.\nAny use of third-party trademarks or logos are subject to those third-party's policies.\n","# 人工智能控制器接口（AICI）\n\n**[LLGuidance库](https:\u002F\u002Fgithub.com\u002Fguidance-ai\u002Fllguidance) 是 AICI 的一个积极维护的演进与专业化版本，如果您只需要受限解码功能，推荐使用该库。**\n\n人工智能控制器接口（AICI）使您能够构建实时约束和引导大型语言模型（LLM）输出的控制器。这些控制器是灵活的程序，能够实现受限解码、动态编辑提示和生成文本，以及协调多个并行生成任务的执行。控制器在逐标记解码过程中融入自定义逻辑，并在 LLM 请求期间保持状态。这使得各种控制器策略成为可能，从基于编程或查询的解码，到多智能体对话，从而与 LLM 本身紧密集成并高效执行。\n\n**AICI 的目的是让构建和试验现有及全新的控制器策略以改进 LLM 生成变得容易。** 通过抽象底层 LLM 推理和服务引擎的实现细节，AICI 旨在简化控制器开发，使编写快速控制器更加容易，并促进跨 LLM 推理和服务引擎的兼容性。\n\nAICI 既适用于本地执行，也适用于云端执行，包括（最终）多租户 LLM 部署。控制器被实现为轻量级 WebAssembly（Wasm）模块，在与 LLM 推理引擎相同的机器上运行，利用 CPU 资源，而 GPU 则专注于标记生成任务。AICI 是推理栈中的一层，旨在允许 Guidance、LMQL 等控制库在其之上运行，从而获得效率和性能提升，以及跨 LLM 推理和服务引擎的可移植性。\n\n目前，AICI 已与 llama.cpp、HuggingFace Transformers 和 rLLM（自定义基于 tch 的 LLM 推理引擎）集成，vLLM 的集成也在进行中。\n\nAICI 具有以下特点：\n\n- [灵活性](#flexibility)：控制器可以用任何可编译为 Wasm 的语言编写（Rust、C、C++ 等），也可以在 Wasm 内解释执行（Python、JavaScript 等）。\n- [安全性](#security)：控制器运行在沙盒环境中，无法访问文件系统、网络或其他资源。\n- [高性能](#performance)：Wasm 模块被编译为原生代码，与 LLM 推理引擎并行运行，对生成过程仅引入极小的开销。\n\nAICI 是一个原型，由 [微软研究院](https:\u002F\u002Fwww.microsoft.com\u002Fen-us\u002Fresearch\u002F) 设计并构建。\n\n# 目录\n\n- [人工智能控制器接口（AICI）](#artificial-intelligence-controller-interface-aici)\n- [快速入门：示例 walkthrough](#quickstart-example-walkthrough)\n  - [开发环境设置](#development-environment-setup)\n  - [构建并启动 rLLM 服务器和 AICI 运行时](#build-and-start-rllm-server-and-aici-runtime)\n  - [使用 AICI 控制器控制 AI 输出](#control-ai-output-using-aici-controllers)\n- [全面指南：深入探索](#comprehensive-guide-exploring-further)\n- [架构](#architecture)\n- [安全性](#security)\n- [性能](#performance)\n- [灵活性](#flexibility)\n- [致谢](#acknowledgements)\n- [贡献](#contributing)\n- [商标](#trademarks)\n\n# 快速入门：示例 walkthrough\n\n在本快速入门中，我们将引导您完成以下步骤：\n\n* 设置 **rLLM 服务器** 和 **AICI 运行时**。\n* 构建并部署一个 **控制器**。\n* 使用 AICI 控制 LLM 输出，以便在生成文本时 **根据特定规则自定义 LLM 行为**。\n\n## 开发环境设置\n\n要编译 AICI 组件，您需要为 Rust 设置开发环境。对于本快速入门，您还需要 Python 3.11 或更高版本来创建控制器。\n\n### Windows WSL \u002F Linux \u002F macOS\n\n> [!NOTE]\n> **Windows 用户**：请使用 WSL2 或附带的 [devcontainer](https:\u002F\u002Fcontainers.dev)。添加原生 Windows 支持的计划已在 [此处跟踪](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Faici\u002Fissues\u002F42)。\n>\n> **macOS 用户**：请确保已安装 XCode 命令行工具，可通过运行 `xcode-select -p` 来检查；若未安装，请运行 `xcode-select --install`。\n>\n> **CUDA**：CUDA 构建依赖于特定的 libtorch 安装。强烈建议使用附带的 devcontainer。\n>\n> 如果您使用 devcontainer，可以直接跳至下一节 [构建并启动 rLLM 服务器和 AICI 运行时](#build-and-start-rllm-server-and-aici-runtime)。\n\n使用系统包管理器安装仓库中构建代码所需的工具，包括 `git`、`cmake` 和 `ccache`。\n\n例如，在 WSL \u002F Ubuntu 中使用 `apt`：\n\n    sudo apt-get install --assume-yes --no-install-recommends \\\n        build-essential cmake ccache pkg-config libssl-dev libclang-dev clang llvm-dev git-lfs\n\n或者在 macOS 上使用 Homebrew：\n\n    brew install git cmake ccache\n\n然后按照 [此处](https:\u002F\u002Fdoc.rust-lang.org\u002Fcargo\u002Fgetting-started\u002Finstallation.html) 和 [此处](https:\u002F\u002Fwww.rust-lang.org\u002Flearn\u002Fget-started) 的说明安装 **Rust、Rustup 和 Cargo**：\n\n    curl --proto '=https' --tlsv1.2 -sSf https:\u002F\u002Fsh.rustup.rs | sh\n\n安装完成后，请通过在终端中运行 `rustup --version` 来验证命令是否可用。如果命令未被识别，请尝试打开一个新的终端会话。\n\n接下来安装 wasm32-wasi Rust 组件：\n\n    rustup target add wasm32-wasi\n\n如果您已经安装了 Rust，或者 Cargo 报告版本过旧，请运行：\n\n    rustup update\n\n最后，为了使用 **Python** 控制器和脚本（如本教程），请运行以下命令安装所需软件包：\n\n    pip install pytest pytest-forked ujson posix_ipc numpy requests\n\n## 构建并启动 rLLM 服务器和 AICI 运行时\n\nrLLM 服务器有两个后端，一个基于 `libtorch` 和 CUDA（`rllm-cuda`），另一个基于 `llama.cpp`（`rllm-llamacpp`）。\n\n`rllm-cuda` 后端仅适用于计算能力为 8.0 或更高的 NVIDIA GPU（A100 及更高版本；RTX 30x0 及更高版本），并且需要对 `libtorch` 进行较为复杂的配置——强烈建议使用随附的 devcontainer。虽然本指南重点介绍 `rllm-llamacpp` 后端，但构建步骤与 `rllm-cuda` 相同，只是文件夹名称略有不同。\n\n在完成上述 [开发环境设置](#development-environment-setup) 后，克隆 AICI 仓库，并按照以下步骤继续操作。\n\n使用以下命令构建并运行 `aicirt` 和 `rllm-llamacpp`：\n\n    cd rllm\u002Frllm-llamacpp\n    .\u002Fserver.sh phi2\n\n您可以传递其他模型名称作为参数（运行 `.\u002Fserver.sh` 而不带参数可查看可用模型）。您也可以使用指向 `.gguf` 文件的 HuggingFace URL 或本地路径。\n\n    .\u002Fserver.sh orca\n\n有关 `rllm-llamacpp` 的更多详细信息，请参阅 [这里](rllm\u002Frllm-llamacpp\u002FREADME.md)。\n\nrLLM 服务器提供 HTTP 接口，用于配置任务和处理请求。您还可以使用此接口快速验证其状态。例如，如果您打开 http:\u002F\u002F127.0.0.1:4242\u002Fv1\u002Fmodels，您应该会看到：\n\n```json\n{\n  \"object\": \"list\",\n  \"data\": [\n    {\n      \"object\": \"model\",\n      \"id\": \"TheBloke\u002Fphi-2-GGUF\",\n      \"created\": 946810800,\n      \"owned_by\": \"owner\"\n    }\n  ]\n}\n```\n\n这表明所选模型已加载。\n\n## 使用 AICI 控制器控制 AI 输出\n\nAICI 允许托管称为 **控制器** 的自定义逻辑，这些控制器可以启动、终止并与 LLM 的标记生成进行交互。控制器接收输入参数，对其进行处理，并返回包含日志、LLM 标记和变量的结果。\n\n该仓库包含一些示例，特别是：\n\n* **jsctrl**：一个接受 JavaScript 代码作为输入以执行的控制器。该代码可以与模型交互以生成文本和标记。\n* **pyctrl**：一个接受 Python 代码作为输入以执行的控制器。该代码同样可以与模型交互以生成文本和标记。\n\n在本示例中，我们将使用 **pyctrl** 来管理使用简单 **Python 脚本** 的标记生成。如果您愿意，可以 [构建并上传 pyctrl](.\u002Fcontrollers\u002Fpyctrl\u002FREADME.md)，不过默认情况下，服务器会自动从 GitHub 下载 [最新版本](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Faici\u002Freleases\u002Flatest) 的 pyctrl。\n\n通常，控制器需要构建和部署，而脚本（Python 或 JavaScript）则随每次请求一起发送。\n\n下图展示了 rLLM 服务器、AICI 运行时和控制器之间的关系：\n\n```mermaid\nerDiagram\n    Host    ||--|{ CPU : \"\"\n    Host    ||--|{ GPU : \"\"\n    \n    CPU     ||--|| \"rLLM Server\" : execute\n    CPU     ||--|{ \"AICI Runtime\" : execute\n\n    \"AICI Runtime\" ||--|| \"Controller\" : instantiate\n\n    GPU     ||--|{ \"LLM token generation\" : execute\n```\n\n### 控制 LLM 标记生成\n\n假设我们希望模型生成一个符合特定格式且仅包含五项的列表。\n\n通常，实现这一点需要提示工程，即精心设计提示，明确指示，例如：\n\n    最受欢迎的五种交通工具是什么？\n    请以编号列表的形式返回结果。\n    不要添加解释，只需列出内容。\n\n提示也会因使用的模型而异，因为每个模型倾向于添加解释，并且对指令的理解方式也不同。\n\n借助 AICI，我们可以将控制权交还给代码，从而简化提示，例如：\n\n    最受欢迎的交通工具有哪些？\n\n然后通过代码来：\n\n1. 将列表限制为 5 项\n2. 阻止模型添加任何初始解释\n3. 格式化为编号列表\n4. 阻止模型在列表之后添加任何额外文本。\n\n让我们创建一个名为 `list-of-five.py` 的 Python 文件，内容如下：\n\n```python\nimport pyaici.server as aici\n\n# 强制模型生成格式良好的 5 项列表，例如：\n#   1. 名称 1\n#   2. 名称 2\n#   3. 名称 3\n#   4. 名称 4\n#   5. 名称 5\nasync def main():\n    \n    # 这是我们想要运行的提示。\n    # 注意提示中并未提及交通工具的数量或结果的格式。\n    prompt = \"最受欢迎的交通工具有哪些？\\n\"\n\n    # 告诉模型生成提示字符串，即从“待完成”的提示开始\n    await aici.FixedTokens(prompt)\n\n    # 记录当前标记生成过程中的位置\n    marker = aici.Label()\n\n    for i in range(1,6):\n      # 告诉模型生成列表编号\n      await aici.FixedTokens(f\"{i}.\")\n\n      # 等待模型生成交通工具名称并以换行符结束\n      await aici.gen_text(stop_at = \"\\n\")\n\n    await aici.FixedTokens(\"\\n\")\n\n    # 将生成的标记存储到结果变量中\n    aici.set_var(\"result\", marker.text_since())\n\naici.start(main())\n```\n\n运行该脚本与发送提示并没有太大区别。在这种情况下，我们同时发送了控制逻辑和指令。\n\n要查看最终结果，请执行以下命令：\n\n    .\u002Faici.sh run list-of-five.py\n\n结果：\n```\nRunning with tagged AICI Controller: gh:microsoft\u002Faici\u002Fpyctrl\n[0]: FIXED '最受欢迎的交通工具有哪些？\\n'\n[0]: FIXED '1.'\n[0]: GEN ' 汽车\\n'\n[0]: FIXED '2.'\n[0]: GEN ' 摩托车\\n'\n[0]: FIXED '3.'\n[0]: GEN ' 自行车\\n'\n[0]: FIXED '4.'\n[0]: GEN ' 卡车\\n'\n[0]: FIXED '5.'\n[0]: GEN ' 船舶\\n'\n[0]: FIXED '\\n'\n[DONE]\n[Response] 最受欢迎的交通工具有哪些？\n1. 汽车\n2. 摩托车\n3. 自行车\n4. 卡车\n5. 船舶\n\nresponse saved to tmp\u002Fresponse.json\nUsage: {'sampled_tokens': 16, 'ff_tokens': 37, 'cost': 69}\nTiming: {'http_response': 0.05193686485290527, 'data0': 0.05199289321899414, 'first_token': 0.0658726692199707, 'last_token': 0.1784682273864746}\nTokens\u002Fsec: {'prompt': 861.0913072488067, 'sampling': 89.65181217019571}\nStorage: {'result': '1. 汽车\\n2. 摩托车\\n3. 自行车\\n4. 卡车\\n5. 船舶\\n\\n'}\n```\n\n# 综合指南：深入探索\n\n本仓库包含多个组件，具体需要哪些组件取决于您的使用场景。\n\n您可以 **使用现有的控制器模块**。\n我们提供了 [PyCtrl](.\u002Fcontrollers\u002Fpyctrl) 和 [JsCtrl](.\u002Fcontrollers\u002Fjsctrl)，分别允许您使用服务器端的 Python 和 JavaScript 编写控制器脚本。`pyaici` 包（位于 `.\u002Fpy\u002Fpyaici`）中包含 `aici` 命令行工具，该工具使您能够通过任何控制器 **上传并运行脚本**（我们还为感兴趣者提供了 [REST API 定义](.\u002Fdocs\u002FREST.md)）。\n> 🧑‍💻 [PyCtrl 脚本的 Python 示例代码](.\u002Fcontrollers\u002Fpyctrl) 以及 [JSCtrl 的 JavaScript Hello World 示例](.\u002Fcontrollers\u002Fjsctrl\u002Fsamples\u002Fhello.js)\n\n我们预计会有基于控制器构建的 **库**。我们在 `promptlib`（位于 `.\u002Fpy\u002Fpromptlib`）中提供了一个示例——这是一个客户端 Python 库，它通过 `pyaici` 包与 `DeclCtrl`（位于 `.\u002Fcontrollers\u002Fdeclctrl`）进行交互。\n> 🧑‍💻 [使用 PromptLib 与 DeclCtrl 交互的示例笔记本](.\u002Fpy\u002Fpromptlib\u002Fnotebooks\u002Fbasics_tutorial.ipynb)。\n\n这些控制器可以在云端或本地的 AICI 支持的 LLM 推理引擎上运行。您也可以 **在本地运行提供的参考引擎 (rLLM)**，可以选择使用 [libtorch+CUDA](.\u002Frllm\u002Frllm-cuda) 或 [llama.cpp 后端](.\u002Frllm\u002Frllm-llamacpp)。\n\n要 **开发一个新的控制器**，可以使用 Rust 的 [入门项目](.\u002Fcontrollers\u002Fuppercase)，该项目展示了如何使用 `aici_abi`（位于 `.\u002Fcontrollers\u002Faici_abi`）库，该库简化了对 **低层级 AICI 接口**（详见 `controllers\u002Faici_abi\u002FREADME.md#low-level-interface`）的实现。\n> 🧑‍💻 [一个极简的新控制器示例代码](.\u002Fcontrollers\u002Fuppercase)，帮助您快速入门。\n\n要 **为新的 LLM 推理引擎添加 AICI 支持**，您需要实现与 [AICI 运行时](aicirt) 通信的 **协议** 中的 LLM 端部分（详见 `docs\u002Faicirt-proto.md`）。\n\n最后，如果您希望修改任何提供的组件，欢迎提交 PR！\n\n# 架构\n\nAICI 将 LLM 推理引擎与控制器相互抽象化，如图所示。图中的圆角节点代表未来的发展方向。在此基础上还可以构建更多层次——我们已经提供了 `promptlib`，但我们坚信，[Guidance](https:\u002F\u002Fgithub.com\u002Fguidance-ai\u002Fguidance)、[LMQL](https:\u002F\u002Flmql.ai\u002F)、[SGLang](https:\u002F\u002Fgithub.com\u002Fsgl-project\u002Fsglang)、[Outlines](https:\u002F\u002Fgithub.com\u002Foutlines-dev\u002Foutlines)、[jsonformer](https:\u002F\u002Fgithub.com\u002F1rgs\u002Fjsonformer)、[LMFE](https:\u002F\u002Fgithub.com\u002Fnoamgat\u002Flm-format-enforcer) 等工具也可以运行在 AICI 之上（无论是使用自定义控制器，还是利用 PyCtrl 或 JsCtrl）。\n\n```mermaid\ngraph TD\n    PyCtrl -- AICI --> aicirt[AICI-runtime]\n    JsCtrl -- AICI --> aicirt\n    guidance([GuidanceCtrl]) -- AICI --> aicirt\n    lmql([LMQL Ctrl]) -- AICI --> aicirt\n    aicirt -- POSIX SHM --> rLLM\n    aicirt -- POSIX SHM --> llama[llama.cpp]\n    aicirt -- POSIX SHM --> pyaici\n    pyaici -- Python --> vLLM(vLLM)\n    pyaici -- Python --> hf[HF Transformers]\n```\n\n`pyaici` 包（位于 `py\u002Fpyaici`）使得将 AICI 集成到基于 Python 的 LLM 推理引擎中更加容易。请参阅与 [HuggingFace Transformers](scripts\u002Fpy\u002Frun_hf.py) 的集成示例，但请注意，该集成不支持分叉（即并行生成多条序列）。目前，[vLLM REST 服务器](scripts\u002Fpy\u002Fvllm_server.py) 已经过时，请暂时使用 [rLLM-cuda](rllm\u002Frllm-cuda) 或 [rLLM-llama.cpp](rllm\u002Frllm-llamacpp)。\n\n# 安全性\n\n- `aicirt` 在独立进程中运行，并且可以以不同于 LLM 引擎的用户身份运行。\n- Wasm 模块由 [Wasmtime 进行沙箱隔离](https:\u002F\u002Fdocs.wasmtime.dev\u002Fsecurity.html)。\n- Wasm 模块仅能访问 [aici_host_* 函数](controllers\u002Faici_abi\u002Fsrc\u002Fhost.rs)，这些函数在 [hostimpl.rs](aicirt\u002Fsrc\u002Fhostimpl.rs) 中实现。\n- `aicirt` 还暴露了一个部分 WASI 接口；然而，除了 `fd_write` 函数外，几乎所有函数都是空操作，其中 `fd_write` 会将文件描述符 1 和 2（即标准输出和标准错误）重定向以打印调试信息。\n- 每个 Wasm 模块都在独立进程中运行，这有助于缓解 Spectre\u002FMeltdown 攻击，并允许对 CPU 使用率进行限制。\n\n特别地，Wasm 模块无法访问文件系统、网络或其他任何资源。它们也不能创建线程或访问任何定时器（这一点对于防止 Spectre\u002FMeltdown 攻击至关重要）。\n\n# 性能\n\nAICI 控制器中的大部分计算都在 CPU 上进行，与 GPU 上的 logits 生成并行执行。生成过程是分步进行的，每一步都会为批次中的每条序列并行生成下一个 token 的 logits（通常在 1 到 50 条序列之间）。这一过程涉及从 GPU 内存中读取整个模型和批次中各序列的 KV 缓存。为了获得最佳的批处理吞吐量，模型和 KV 缓存应占用 GPU 内存的大部分空间，而在 A100 GPU（80GB）上，读取全部内存大约需要 40 毫秒。\n\n因此，每一步的生成大约需要 20–50 毫秒。通过精心设计，这完全足够在编译为 Wasm 的 Rust 代码中计算出允许的 token 集合。这些集合可以通过 Rust 原生方式，或者通过我们提供的 Python 或 JavaScript 解释器来组合。\n\n例如，在 Llama 模型的 32,000 词汇表中计算允许的 token 集合所需的时间如下：\n\n- C 语言的 Yacc 文法约为 2.0 毫秒；\n- 正则表达式约为 0.3 毫秒；\n- 从 4KB 字符串中提取子字符串约束约为 0.2 毫秒。\n\n以上数字针对单条序列，但由于每条序列都在独立进程中处理，因此如果核心数多于序列数（这通常是情况），这些时间不会发生变化。此外，这些时间还包括调用 Wasm 中实现的 Python 解释器，然后再回到由 Rust 生成的用于约束本身的 Wasm 代码的开销。所有这些开销都远低于 20–50 毫秒的预算，因此完全不会影响生成时间。\n\n在采样关键路径上也存在一些额外开销。当并行执行 10 条序列时，每步采样的开销约为 0.3 毫秒（与所使用的约束无关）。而当并行执行 40 条序列时，这个开销会上升至约 0.7 毫秒（不过目前尚未完全优化）。\n\nWebAssembly 的设计目标之一就是尽可能减少与原生代码相比的开销。根据我们的经验，经过 **高度优化** 的 Rust 代码在 [Wasmtime](https:\u002F\u002Fwasmtime.dev\u002F) 中运行时，其速度仅比原生代码慢不到两倍。这比 JavaScript 或 Python 快 10 到 100 倍。\n\n所有测量均在配备 nVidia A100 GPU（80GB VRAM）的 AMD EPYC 7V13 服务器上完成。\n\n# 灵活性\n\nAICI 运行时提供的低级别接口支持以下功能：\n\n- 在每个生成的标记之前、期间和之后与 LLM 推理引擎进行交互\n- 将解码限制在一组特定的标记上\n- 将 KV 缓存回溯到先前的状态\n- 一次性快进多个标记（如果这些标记已知）\n- 将生成过程分支为多个路径\n- 通过共享变量在不同分支之间进行通信\n- 提供用于在标记和字节字符串之间相互转换的实用函数\n\n该接口可以被任何编译为 Wasm 的语言所使用。\n\n本仓库提供了一个 Rust 库，使得在 Rust 中实现控制器变得简单，并且提供了针对特定约束条件的高效实现（如正则表达式、Yacc 文法、子字符串）。我们还提供了 Python 和 JavaScript 解释器，允许将这些约束条件组合在一起。所有这些都可以轻松扩展。\n\n# 致谢\n\n- [Flash Attention 核心](rllm\u002Ftch-cuda\u002Fkernels\u002Fflash_attn\u002F) 源自 [flash-attention 仓库](https:\u002F\u002Fgithub.com\u002FDao-AILab\u002Fflash-attention)；参见 [BSD 许可证](rllm\u002Ftch-cuda\u002Fkernels\u002Fflash_attn\u002FLICENSE)\n- [分页注意力核心](rllm\u002Ftch-cuda\u002Fkernels\u002Fvllm\u002F) 源自 [vLLM 仓库](https:\u002F\u002Fgithub.com\u002Fvllm-project\u002Fvllm)；参见 [Apache 许可证](rllm\u002Ftch-cuda\u002Fkernels\u002Fvllm\u002FLICENSE)\n- [OpenAI API 定义](rllm\u002Frllm-base\u002Fsrc\u002Fserver\u002Fopenai\u002F) 源自并修改自 [candle-vllm](https:\u002F\u002Fgithub.com\u002FEricLBuehler\u002Fcandle-vllm)；参见 [MIT 许可证](rllm\u002Frllm-base\u002Fsrc\u002Fserver\u002Fopenai\u002FLICENSE)\n- [cache_engine.rs](rllm\u002Frllm-cuda\u002Fsrc\u002Fllm\u002Fpaged\u002Fcache_engine.rs)、[config.rs](rllm\u002Frllm-base\u002Fsrc\u002Fconfig.rs) 和 [scheduler.rs](rllm\u002Frllm-base\u002Fsrc\u002Fscheduler.rs) 均松散地基于 [vLLM](https:\u002F\u002Fgithub.com\u002Fvllm-project\u002Fvllm)\n- [llama.rs](rllm\u002Frllm-cuda\u002Fsrc\u002Fllm\u002Fllama.rs)、[phi.rs](rllm\u002Frllm-cuda\u002Fsrc\u002Fllm\u002Fphi.rs) 和 [logits.rs](rllm\u002Frllm-base\u002Fsrc\u002Flogits.rs) 基于 [candle-transformers](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fcandle\u002Ftree\u002Fmain\u002Fcandle-transformers)\n- 特定的 [Python 库](.\u002Fcontrollers\u002Fpyctrl\u002FLib\u002F) 文件源自 [RustPython](https:\u002F\u002Fgithub.com\u002FRustPython\u002FRustPython)（因为我们仅使用其中的一部分）\n- 示例 ANSI C 文法 [c.y](controllers\u002Faici_abi\u002Fgrammars\u002Fc.y) 基于 Jeff Lee（1985 年）提供的 https:\u002F\u002Fwww.lysator.liu.se\u002Fc\u002FANSI-C-grammar-y.html\n\n# 引用本软件包\n\n如果您认为 AI 控制器接口及其在 LLM 推理栈中定义新层的理念有用，请使用以下引用方式来引用本软件包：\n\n* Michal Moskal, Madan Musuvathi, Emre Kıcıman. AI 控制器接口, (2024), GitHub 仓库. https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Faici\n\nBibtex:\n\n```bibtex\n@misc{Moskal2024,\n  author = {Moskal, Michal and Musuvathi, Madan and {K\\i c\\i man}, Emre},\n  title = {{AI 控制器接口}},\n  year = {2024},\n  publisher = {{GitHub}},\n  journal = {{GitHub} 仓库},\n  howpublished = {\\url{https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Faici\u002F}}\n}\n```\n\n# 贡献\n\n本项目欢迎各种贡献和建议。大多数贡献都需要您签署一份贡献者许可协议（CLA），声明您有权并将您的贡献授予我们使用。有关详细信息，请访问 https:\u002F\u002Fcla.opensource.microsoft.com。\n\n当您提交拉取请求时，CLA 机器人会自动判断您是否需要提供 CLA，并相应地标记 PR（例如状态检查或评论）。请按照机器人提供的指示操作即可。对于使用我们 CLA 的所有仓库，您只需完成一次此步骤。\n\n本项目已采用 [微软开源行为准则](https:\u002F\u002Fopensource.microsoft.com\u002Fcodeofconduct\u002F)。更多信息请参阅 [行为准则常见问题解答](https:\u002F\u002Fopensource.microsoft.com\u002Fcodeofconduct\u002Ffaq\u002F) 或发送邮件至 [opencode@microsoft.com](mailto:opencode@microsoft.com) 咨询更多问题或意见。\n\n# 商标\n\n本项目可能包含其他项目、产品或服务的商标或标识。未经授权使用微软商标或标识必须遵守并遵循 [微软商标与品牌指南](https:\u002F\u002Fwww.microsoft.com\u002Fen-us\u002Flegal\u002Fintellectualproperty\u002Ftrademarks\u002Fusage\u002Fgeneral)。在本项目的修改版本中使用微软商标或标识不得造成混淆或暗示微软的赞助关系。任何第三方商标或标识的使用均应遵守其各自的政策。","# AICI 快速上手指南\n\nAICI (Artificial Intelligence Controller Interface) 是一个用于构建控制器的接口，能够实时约束和指导大语言模型 (LLM) 的输出。它通过轻量级的 WebAssembly (Wasm) 模块运行，支持与 LLM 推理引擎紧密集成，实现受限解码、动态提示编辑等功能。\n\n> **注意**：如果您仅需受限解码功能，推荐使用 AICI 的演进版本 **[LLGuidance](https:\u002F\u002Fgithub.com\u002Fguidance-ai\u002Fllguidance)**。\n\n## 环境准备\n\n### 系统要求\n- **操作系统**：Linux、macOS 或 Windows (需使用 WSL2)。\n  - *Windows 用户*：请使用 WSL2 或项目提供的 devcontainer。原生 Windows 支持尚在开发中。\n  - *macOS 用户*：确保已安装 XCode 命令行工具 (`xcode-select --install`)。\n- **硬件**：\n  - CPU：通用支持。\n  - GPU (可选)：若使用 `rllm-cuda` 后端，需 NVIDIA GPU (计算能力 8.0+, 如 A100, RTX 30x0 系列)。\n- **软件版本**：\n  - Python 3.11 或更高版本\n  - Rust (最新稳定版)\n\n### 前置依赖\n请使用系统包管理器安装以下基础构建工具：\n\n**Ubuntu \u002F WSL:**\n```bash\nsudo apt-get install --assume-yes --no-install-recommends \\\n    build-essential cmake ccache pkg-config libssl-dev libclang-dev clang llvm-dev git-lfs\n```\n\n**macOS (Homebrew):**\n```bash\nbrew install git cmake ccache\n```\n\n**安装 Rust 及 Wasm 目标组件:**\n```bash\n# 安装 Rustup 和 Cargo\ncurl --proto '=https' --tlsv1.2 -sSf https:\u002F\u002Fsh.rustup.rs | sh\n\n# 重新加载环境变量或打开新终端后，添加 wasm32-wasi 目标\nrustup target add wasm32-wasi\n\n# 如有需要，更新 Rust 工具链\nrustup update\n```\n\n**安装 Python 依赖:**\n```bash\npip install pytest pytest-forked ujson posix_ipc numpy requests\n```\n\n## 安装与启动\n\n本指南以 `rllm-llamacpp` 后端为例（兼容性最好，无需复杂 CUDA 配置）。\n\n### 1. 克隆仓库并启动服务\n克隆 AICI 仓库后，进入 `rllm-llamacpp` 目录并启动服务器。以下命令将自动下载并运行 `phi2` 模型（也可替换为其他模型名称或本地 `.gguf` 文件路径）。\n\n```bash\ncd rllm\u002Frllm-llamacpp\n.\u002Fserver.sh phi2\n```\n\n*注：运行 `.\u002Fserver.sh` 不带参数可查看可用模型列表。*\n\n### 2. 验证服务状态\n服务器启动后提供 HTTP 接口。访问 `http:\u002F\u002F127.0.0.1:4242\u002Fv1\u002Fmodels`，若看到如下 JSON 响应，说明模型加载成功：\n\n```json\n{\n  \"object\": \"list\",\n  \"data\": [\n    {\n      \"object\": \"model\",\n      \"id\": \"TheBloke\u002Fphi-2-GGUF\",\n      \"created\": 946810800,\n      \"owned_by\": \"owner\"\n    }\n  ]\n}\n```\n\n## 基本使用\n\nAICI 的核心是通过 **Controller** (控制器) 来控制 LLM 的生成过程。您可以编写 Python 或 JavaScript 脚本作为控制逻辑，无需复杂的提示词工程即可强制模型遵守格式。\n\n### 示例：生成固定格式的 5 项列表\n\n创建一个名为 `list-of-five.py` 的文件，内容如下。该脚本将强制模型生成一个包含 5 个项目的编号列表，并阻止模型添加额外的解释文字。\n\n```python\nimport pyaici.server as aici\n\nasync def main():\n    \n    # 定义基础提示词，无需指定数量或格式\n    prompt = \"What are the most popular types of vehicles?\\n\"\n\n    # 让模型先完成提示词部分\n    await aici.FixedTokens(prompt)\n\n    # 标记当前生成位置，用于后续提取结果\n    marker = aici.Label()\n\n    # 循环 5 次，强制生成 \"数字.\" 和 \"车辆名称\\n\"\n    for i in range(1, 6):\n      await aici.FixedTokens(f\"{i}.\")\n      await aici.gen_text(stop_at=\"\\n\")\n\n    await aici.FixedTokens(\"\\n\")\n\n    # 将生成的文本保存到变量\n    aici.set_var(\"result\", marker.text_since())\n\naici.start(main())\n```\n\n### 运行脚本\n\n在项目根目录下执行以下命令运行控制器脚本：\n\n```bash\n.\u002Faici.sh run list-of-five.py\n```\n\n**预期输出示例：**\n```text\nRunning with tagged AICI Controller: gh:microsoft\u002Faici\u002Fpyctrl\n[0]: FIXED 'What are the most popular types of vehicles?\\n'\n[0]: FIXED '1.'\n[0]: GEN ' Cars\\n'\n...\n[Response] What are the most popular types of vehicles?\n1. Cars\n2. Motorcycles\n3. Bicycles\n4. Trucks\n5. Boats\n\nStorage: {'result': '1. Cars\\n2. Motorcycles\\n3. Bicycles\\n4. Trucks\\n5. Boats\\n\\n'}\n```\n\n通过这种方式，您可以用代码精确控制 LLM 的输出结构，而无需依赖模型自身的指令遵循能力。","某金融科技公司正在开发一个自动化合规报告生成系统，需要确保大模型输出的每一段数据都严格符合监管格式且无幻觉。\n\n### 没有 aici 时\n- **格式校验滞后**：只能等模型生成完整文本后进行正则匹配，一旦发现日期或金额格式错误，必须丢弃重练，浪费大量算力和时间。\n- **逻辑控制困难**：难以在生成过程中动态干预，例如强制要求“若风险等级为高，则必须包含免责声明”，往往依赖脆弱的提示词工程，成功率不稳定。\n- **多语言开发受限**：团队擅长 Python 但底层推理引擎多为 C++，想要实现自定义解码逻辑需深入修改推理源码，门槛极高且难以维护。\n- **资源利用率低**：CPU 在等待 GPU 生成 token 时处于空闲状态，无法并行执行复杂的业务逻辑校验。\n\n### 使用 aici 后\n- **实时约束解码**：aici 将校验逻辑编译为 Wasm 模块，在 token 逐字生成时即时拦截非法字符，确保输出天然符合 JSON Schema 或特定正则，无需重试。\n- **动态流程编排**：控制器可在生成中途读取已出内容并动态修改后续提示，轻松实现“检测到高风险关键词即插入免责条款”的复杂业务规则。\n- **灵活的语言生态**：开发人员直接用熟悉的 Rust 或 Python 编写控制逻辑并编译为 Wasm，无需触碰底层推理引擎代码，大幅降低定制门槛。\n- **算力并行优化**：aici 利用 CPU 运行控制模块，与 GPU 的推理过程完美并行，在几乎零额外延迟的前提下实现了精细化的生成控制。\n\naici 通过将提示词升级为可执行的 Wasm 程序，让开发者能在毫秒级粒度上精准驾驭大模型输出，彻底解决了生成式 AI 在严肃场景中“不可控、难验证”的核心痛点。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmicrosoft_aici_0a178e49.png","microsoft","Microsoft","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fmicrosoft_4900709c.png","Open source projects and samples from Microsoft",null,"opensource@microsoft.com","OpenAtMicrosoft","https:\u002F\u002Fopensource.microsoft.com","https:\u002F\u002Fgithub.com\u002Fmicrosoft",[84,88,92,96,100,104,108,111,115,119],{"name":85,"color":86,"percentage":87},"Rust","#dea584",58.1,{"name":89,"color":90,"percentage":91},"Python","#3572A5",24.1,{"name":93,"color":94,"percentage":95},"Jupyter Notebook","#DA5B0B",4.5,{"name":97,"color":98,"percentage":99},"C","#555555",4.2,{"name":101,"color":102,"percentage":103},"Shell","#89e051",2.8,{"name":105,"color":106,"percentage":107},"TypeScript","#3178c6",2.5,{"name":109,"color":110,"percentage":32},"JavaScript","#f1e05a",{"name":112,"color":113,"percentage":114},"Yacc","#4B6C4B",0.9,{"name":116,"color":117,"percentage":118},"C++","#f34b7d",0.8,{"name":120,"color":121,"percentage":122},"Dockerfile","#384d54",0.2,2067,83,"2026-04-18T21:18:21","MIT",4,"Linux, macOS","非必需。若使用 rllm-cuda 后端，需要 NVIDIA GPU (计算能力 8.0+, 如 A100, RTX 30x0 系列); 若使用 rllm-llamacpp 后端则主要依赖 CPU。","未说明",{"notes":132,"python":133,"dependencies":134},"Windows 用户需使用 WSL2 或 devcontainer，原生支持尚在开发中。macOS 用户需安装 XCode 命令行工具。推荐使用项目提供的 devcontainer 以简化 CUDA 和 libtorch 的复杂配置。控制器逻辑通过 WebAssembly (Wasm) 运行，支持 Rust, C, C++, Python, JavaScript 等语言。","3.11+",[135,136,137,138,139,140,141,142],"Rust (wasm32-wasi target)","cmake","libtorch (用于 CUDA 后端)","pytest","ujson","posix_ipc","numpy","requests",[15,13,35,14],[145,146,147,148,149,150,151,152,153,154,155,156,157],"ai","rust","wasm","wasmtime","inference","language-model","llm","llm-framework","llm-inference","llm-serving","llmops","model-serving","transformer","2026-03-27T02:49:30.150509","2026-04-20T07:17:56.325129",[161,166,171,176,181,186],{"id":162,"question_zh":163,"answer_zh":164,"source_url":165},44159,"如何在 JavaScript 控制器 (jsctrl) 中实现 TokenSet.num_set\u002Frepr 功能？","该功能已在 Python 控制器 (pyctrl) 中实现，可参考以下提交记录作为实现依据：\n1. https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Faici\u002Fcommit\u002F171dfcf4b91e6c97f5e048df588d76fbd3b2a88c\n2. https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Faici\u002Fcommit\u002F97227956622a82f08e379514278bc3597db16a46\n3. https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Faici\u002Fcommit\u002F6390cbe4149b8a992a7e0df80ba9aebaebd9bebb\n此外，还需要添加 token_repr 功能。这是一个适合新手的入门任务，如果在实现过程中有疑问可以随时提出。","https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Faici\u002Fissues\u002F64",{"id":167,"question_zh":168,"answer_zh":169,"source_url":170},44160,"如何将非开源或非外部协作的仓库迁移到 GitHub inside Microsoft，或者如何选择退出迁移？","只有拥有仓库 `admin` 权限的用户才能响应此操作。需在 Issue 中评论以下命令之一：\n\n1. **选择加入迁移 (Opt-in)**:\n   格式：`@gimsvc optin --date \u003C目标迁移日期 mm-dd-yyyy>`\n   示例：`@gimsvc optin --date 03-15-2023`\n\n2. **选择退出迁移 (Opt-out)**:\n   格式：`@gimsvc optout --reason \u003C原因>`\n   原因选项包括：\n   - `staging`: 仓库将作为开源发布或转为公开 (public)\n   - `collaboration`: 用于与客户、合作伙伴等外部协作\n   - `delete`: 仓库不再需要，将被删除\n   - `other`: 其他未指定原因\n   示例：`@gimsvc optout --reason staging`\n\n如果不回应，仓库将被自动归档。如果选择退出，预计每 120 天会收到一次续期确认请求。","https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Faici\u002Fissues\u002F16",{"id":172,"question_zh":173,"answer_zh":174,"source_url":175},44161,"项目推荐的文件夹结构是怎样的？测试文件应该放在哪里？","建议的目录结构如下：\n- 顶层包含 README.md, aici.sh 以及合规性文件 (LICENSE, SECURITY.md 等)。\n- Rust 相关文件 (Cargo.toml 等) 放在顶层。\n- Python 相关文件放入 `py\u002F` 目录，其中 `pyaici` 包含 vLLM 和 HuggingFace 服务端代码。\n- 控制器代码放在 `controllers\u002F` (如 jsctrl, pyctrl)。\n- rLLM 相关代码放在 `rllm\u002F`。\n\n关于测试文件的存放：\n- 当前的 `tests` 文件夹专用于 pytest (目前仅包含 declctrl 测试)，建议将其移至 `py\u002Ftests\u002F`。\n- 脚本测试 (如 test-jsctrl.sh) 建议整合进 `.\u002Faici.sh` 或在各控制器目录下建立单独的 test.sh。\n- rLLM 的测试位于 `rllm-cuda\u002Ftest.sh`。\n- 也有建议考虑将 `py\u002F` 重命名为 `client\u002F`，或者将所有单元测试统一放到顶层的 `tests\u002F` 文件夹中以便管理。","https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Faici\u002Fissues\u002F56",{"id":177,"question_zh":178,"answer_zh":179,"source_url":180},44162,"在 vLLM 中如何实现 Fast-Forward (FF) 功能（生成多个零熵令牌）？","实现 Fast-Forward 的关键在于利用 `memory_efficient_attention_forward` 算子，具体步骤如下：\n1. 该算子支持不同的 KV 和 Q 尺寸，这是实现 FF 所需的。\n2. 需要传递 `BlockDiagonalCausalFromBottomRightMask` 而不是 `BlockDiagonalCausalMask`。\n3. 需要将分页的 KV 数据收集到连续张量中并传递给 `memory_efficient_attention_forward`。vLLM 中目前未使用的 `gather_cached_kv` 函数似乎可以完成此任务。\n4. 理想情况下，需要一个版本的 `memory_efficient_attention_forward` 能够直接从块中读取 KV 缓存。\n\n注意：xformers 中的该算子目前不支持增量使用（即提供部分 KV 缓存并计算剩余部分），内核需要支持像 vLLM 的 `single_query_cached_kv_attention` 那样将 KV 缓存分割成块。","https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Faici\u002Fissues\u002F17",{"id":182,"question_zh":183,"answer_zh":184,"source_url":185},44163,"为什么建议使用进程 (processes) 而不是线程 (threads) 来运行 Worker？跨平台通信机制如何解决？","使用进程而非线程的优势包括：\n- 获得操作系统级别的 Spectre 缓解措施。\n- 可以通过外部命令 (如 kill(2)) 限制时间，无需在 WASM 中使用 epochs，速度提升约 28%。\n- 可以对内存和时间进行更严格的限制（包括模块编译期间）。\n- 可以使用真实的 fork(2) 进行搜索分叉。\n\n关于跨平台进程间通信 (IPC) 的解决方案：\n由于 Windows 不支持 POSIX 共享内存 (SHM)，macOS 不支持 sem_init()，建议直接使用 **TCP sockets** 进行进程间通信，以确保证在不同操作系统上的兼容性。","https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Faici\u002Fissues\u002F15",{"id":187,"question_zh":188,"answer_zh":189,"source_url":190},44164,"什么样的 Issue 会被认为缺乏清晰度而被关闭？","如果 Issue 的描述过于宏大、模糊或超出项目当前范围，且发起人未能进一步阐述具体细节，维护者可能会以“缺乏清晰度 (lack of clarity)”为由关闭 Issue。\n例如，关于“文件格式感知的虚拟化 REPL 代理”或“提示驱动的虚拟化 REPL 代理”的提议，如果未说明具体的实现路径、应用场景或与现有架构的整合方式，仅列出宏大的功能列表（如支持所有已知格式、通过 GUI 交互等），会被视为范围不明。在这种情况下，维护者通常会请求发起人提供更多细节 (\"Could you elaborate?\")，若未得到回应则会关闭该 Issue。","https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Faici\u002Fissues\u002F60",[192,197,202,207,212,217,222,227,232,237,242,247],{"id":193,"version":194,"summary_zh":195,"released_at":196},351704,"v0.2.1","# AICI 控制器（wasm32-wasi-0.2.1）\n\n内容：\n```\n3367541 aici_declctrl.wasm\n3345005 aici_jsctrl.wasm\n14068426 aici_pyctrl.wasm\n316614 aici_uppercase.wasm\n632 tag.sh\n313698 yesno.wasm\n\n7acedc80579fb342c30d8767938000e346b48255d35e7830cf9fb88599661102  aici_declctrl.wasm\n1bb13e614b78525bfa920f7ee3816b260e6eb3160af948aed75cda02b125eb8f  aici_jsctrl.wasm\n9c902ca72729abb2b6f91bd06a3f2609a07db2ffc6b3e2d6841bfbb2df43bcb6  aici_pyctrl.wasm\n3330b7ce839164d031b2582a311063688ad2f0ca405906a30af3c6f301a950e8  aici_uppercase.wasm\n13e46f32f3e21e7ba390cae214798825f6e92c7eb1f92a528d2fee0c7a7f84b7  yesno.wasm\na8c4715017ade74a28a5a748917fce7692a8c256962c984453b1ec797c017ce3  tag.sh\n```\n\n## 打标签\n\n您可以使用 `tag.sh` 脚本上传并为模块打标签。\n\n```\n用法：.\u002Ftag.sh [--latest] [前缀]\n```\n\n`--latest` 选项还会为模块打上 `controller-latest` 标签，而 `前缀` 则会在打标签时被添加到模块名称的前面。\n\n这需要 `aici` 命令在系统路径中，并且已设置 `AICI_API_BASE` 变量。\n\n\n# AICI 运行时（linux-x86_64-0.2.1）\n\n内容：\n```\n14071480 aicirt\n\nf578fa4e4a0476edc05b98120ab710c6d2c2604f621c1155cd431e3615799b33  aicirt\n```\n\n# rLLM with llama.cpp（linux-x86_64-0.2.1）\n\n内容：\n```\n12712968 rllm-llamacpp\n\na1f8a695c0d75dbbe435a0c88bf50615c72b1a948c8613f0cc3a69dec0a43097  rllm-llamacpp\n```\n\n","2024-04-29T20:31:19",{"id":198,"version":199,"summary_zh":200,"released_at":201},351705,"v0.2.0","# AICI 控制器（wasm32-wasi-0.2.0）\n\n内容：\n```\n3367030 aici_declctrl.wasm\n3344841 aici_jsctrl.wasm\n14044816 aici_pyctrl.wasm\n316594 aici_uppercase.wasm\n632 tag.sh\n313671 yesno.wasm\n\n8073fcc72152da8de1e8f248c1ef71dd8adc595d3531234032ee683c2019b54c  aici_declctrl.wasm\ne97b56b75b8b0685665a99ed2edb17e8404cd474ddc8ee39ee1b718ba3775a23  aici_jsctrl.wasm\n52138cc27825534ccf2039b15f0c443230ad806e53e85a26b7ccef26ceec4c3b  aici_pyctrl.wasm\nb225f6200b02b00e4bacea385348c2312e4f96256b5399b2a6425adce23f5a01  aici_uppercase.wasm\nedcbceeebc9e0b75a18ad4819a97d2fa827c7347ae26a33707bd4671bad503cc  yesno.wasm\ncf99426b4f78a3133d38b8c6871ea7bf5a81d293bbf61a671e9b9cd8fdf5e805  tag.sh\n```\n\n## 打标签\n\n您可以使用 `tag.sh` 脚本上传并为模块打标签。\n\n```\n用法：.\u002Ftag.sh [--latest] [前缀]\n```\n\n`--latest` 选项还会为模块打上 `controller-latest` 标签，而 `前缀` 则会在打标签时被添加到模块名称的前面。\n\n这需要 `aici` 命令在系统路径中，并且已设置 `AICI_API_BASE` 变量。\n\n\n# AICI 运行时（linux-x86_64-0.2.0）\n\n内容：\n```\n14071480 aicirt\n\n4bc8d1062db610cfb117919b6d941cd476cdf3d31f5ea1f1f694bfdfde64fd39  aicirt\n```\n\n","2024-04-23T22:51:43",{"id":203,"version":204,"summary_zh":205,"released_at":206},351706,"v0.1.0","# AICI 控制器（wasm32-wasi-0.1.0）\n\n内容：\n```\n3370973 aici_declctrl.wasm\n3347948 aici_jsctrl.wasm\n14045558 aici_pyctrl.wasm\n316622 aici_uppercase.wasm\n632 tag.sh\n313113 yesno.wasm\n\n18901be2c3e171444f7dcf160179393c49a00e51a6a6dad01402d4ef09bfee5d  aici_declctrl.wasm\n029b9afc9abbbd8aeb752ddfdcc0e630d421f872f9d70b25fd8b14548075a6f1  aici_jsctrl.wasm\n7a1c0fa7356ef298e641ae332ed09421e1e740fcf53c2bd8817148cca9a59d0c  aici_pyctrl.wasm\n01f8a745cc243babdd2a9ddd0c7d667ea4707ac498e9c6ddc0da2e77b3c7dcbd  aici_uppercase.wasm\n145edbb2e65205e26404e348f422efb5c24b46117c66460398b1f68ed933a2bd  yesno.wasm\n7f812653e32b81a9d19dc570db0df74e4bf89def51484684183df82c94f0263d  tag.sh\n```\n\n## 打标签\n\n您可以使用 `tag.sh` 脚本上传并为模块打标签。\n\n```\n用法：.\u002Ftag.sh [--latest] [前缀]\n```\n\n`--latest` 选项还会为模块打上 `controller-latest` 标签，而 `前缀` 则会在打标签时被添加到模块名称的前面。\n\n这需要 `aici` 命令在 PATH 中，并且已设置 `AICI_API_BASE` 变量。\n\n\n# AICI 运行时（linux-x86_64-0.1.0）\n\n内容：\n```\n14010040 aicirt\n\n866a819bf46d8a9d439bbac66848c25e745291a86435d61348801f134b7fa8fc  aicirt\n```\n\n","2024-04-15T18:48:31",{"id":208,"version":209,"summary_zh":210,"released_at":211},351707,"v0.0.10","# AICI 控制器（wasm32-wasi-0.0.10）\n\n内容：\n```\n3723027 aici_declctrl.wasm\n3315302 aici_jsctrl.wasm\n14053698 aici_pyctrl.wasm\n199414 aici_uppercase.wasm\n633 tag.sh\n198645 yesno.wasm\n\nb54d095e72c98007915793ac95ad44ee6d69953c73e458ce8ec7085e16082645  aici_declctrl.wasm\n89690f0a48e38ea0c0d9c8ed7f5b7b37a65a70b60eb10edb41bacec988c86b90  aici_jsctrl.wasm\n5875d7d5932b061dc0ef4800aad4be76168bb87e16d5095ac8ddaa8aec7080d6  aici_pyctrl.wasm\nbd84eaf5da5cf9b940f925d64804ed06974d6ccd735a87a40f1b042289ea60ef  aici_uppercase.wasm\nf147b88fe62807b428beaf57c12ed3a7335bd8a57fb5c09a4c319bf1469e213f  yesno.wasm\n5aa1082d5f6a10f7669bbfd2e2ce5c0ad63f3cb44d8434be1e20ce052d6d6973  tag.sh\n```\n\n## 打标签\n\n您可以使用 `tag.sh` 脚本上传并为模块打标签。\n\n```\n用法：.\u002Ftag.sh [--latest] [前缀]\n```\n\n`--latest` 选项还会为模块打上 `controller-latest` 标签，而 `前缀` 则会在打标签时被添加到模块名称的前面。\n\n这需要 `aici` 命令在 PATH 中，并且已设置 `AICI_API_BASE` 变量。\n\n\n# AICI 运行时（linux-x86_64-0.0.10）\n\n内容：\n```\n14079672 aicirt\n\n42baac411f6231ba6120c7cd880aa8ad5b990d94cfdbf63370a626c877cb5bf6  aicirt\n```\n\n","2024-02-23T01:23:32",{"id":213,"version":214,"summary_zh":215,"released_at":216},351708,"v0.0.9","# AICI 控制器（wasm32-wasi-0.0.9）\n\n内容：\n```\n3723027 aici_declctrl.wasm\n3314831 aici_jsctrl.wasm\n14053401 aici_pyctrl.wasm\n199414 aici_uppercase.wasm\n632 tag.sh\n198645 yesno.wasm\n\nb54d095e72c98007915793ac95ad44ee6d69953c73e458ce8ec7085e16082645  aici_declctrl.wasm\nd9734fc9b500ac41688daad99bfc01ccc95071e20733776295d13fd0e76e10c8  aici_jsctrl.wasm\nb2f1d4b3b8f0836947d6c3601da7814358734d2de83e39a1652cd056495823da  aici_pyctrl.wasm\nbd84eaf5da5cf9b940f925d64804ed06974d6ccd735a87a40f1b042289ea60ef  aici_uppercase.wasm\nf147b88fe62807b428beaf57c12ed3a7335bd8a57fb5c09a4c319bf1469e213f  yesno.wasm\naab80664c47d88f0ded2507eaa1dd21d23ec492f7e83273de66a0152c87b5eb2  tag.sh\n```\n\n## 打标签\n\n您可以使用 `tag.sh` 脚本上传并为模块打标签。\n\n```\n用法：.\u002Ftag.sh [--latest] [前缀]\n```\n\n`--latest` 选项还会为模块打上 `controller-latest` 标签，而 `前缀` 则会在打标签时被添加到模块名称的前面。\n\n这需要 `aici` 命令在系统路径中，并且已设置 `AICI_API_BASE` 变量。\n\n\n# AICI 运行时（linux-x86_64-0.0.9）\n\n内容：\n```\n14079672 aicirt\n\n8f1734bc1a3c256edc45bfbd928362a5a3169cca2d026f4730ed6316be588570  aicirt\n```\n\n","2024-02-22T01:29:52",{"id":218,"version":219,"summary_zh":220,"released_at":221},351709,"v0.0.8","# AICI 控制器（wasm32-wasi-0.0.8）\n\n内容：\n```\n3712155 aici_declctrl.wasm\n3303407 aici_jsctrl.wasm\n13964221 aici_pyctrl.wasm\n183641 aici_uppercase.wasm\n632 tag.sh\n182701 yesno.wasm\n\n1fce044d579c0d944a482aa4734cb6da1239847070b40ee8a291d4ae2eb772f8  aici_declctrl.wasm\nf6aa7fc4421c9adbec93541be0dca77e4aac5d3c659a2f3a8eab29256c11bcc7  aici_jsctrl.wasm\n090442699aa331ab8ac03183b0fd7078124384aa303939b8d81ff63de9772f67  aici_pyctrl.wasm\n5dcf9ab3917bbdf74cb92257822f2facfcb8e43594379e68e294992fc16cba48  aici_uppercase.wasm\n900c3295dac997d4e7543a49095375521b7f69d116b43d706a6c28cef70a4137  yesno.wasm\ne8909cb4fa9cac9c2e491c33d1efd167d9cb412a2a4b3b18b385b8e2be751e14  tag.sh\n```\n\n## 打标签\n\n您可以使用 `tag.sh` 脚本上传并为模块打标签。\n\n```\n用法：.\u002Ftag.sh [--latest] [前缀]\n```\n\n`--latest` 选项还会为模块打上 `controller-latest` 标签，而 `前缀` 则会在打标签时被添加到模块名称的前面。\n\n这需要 `aici` 命令在系统路径中，并且已设置 `AICI_API_BASE` 变量。\n\n\n# AICI 运行时（linux-x86_64-0.0.8）\n\n内容：\n```\n13727344 aicirt\n\n2dc71753a4095e87dfb423c44148dfc8b71e61affcc03b835cdd86486a9057a7  aicirt\n```\n\n","2024-02-09T23:29:17",{"id":223,"version":224,"summary_zh":225,"released_at":226},351710,"v0.0.7","# AICI 控制器（wasm32-wasi-0.0.7）\n\n内容：\n```\n3711885 aici_declctrl.wasm\n3302762 aici_jsctrl.wasm\n13962100 aici_pyctrl.wasm\n183521 aici_uppercase.wasm\n632 tag.sh\n182622 yesno.wasm\n\nd3f74f43293cdf77e3f54fb27deccd25183e6b79b11cf70682047a1e62840f04  aici_declctrl.wasm\n6c057d8979c9a47a386182d140650481385bc91327451ca02e9ed6801e586c8c  aici_jsctrl.wasm\n36e82b659487a851983d7bc47370ff02c308695ed60d3990ee391f861f8859b2  aici_pyctrl.wasm\n9c1b5cef2a736c2ecd6f1588fdb255f37cde985b39d1432b840199189da5f189  aici_uppercase.wasm\n01e2963fd9b1f1069ee843ef61f65df1857753008c00e27b085493209331697f  yesno.wasm\nb380725c4c711b71cfb5e3d592d2e1a6e8d3e9af6460d691cf7d4e76a1efff5b  tag.sh\n```\n\n## 打标签\n\n您可以使用 `tag.sh` 脚本上传并为模块打标签。\n\n```\n用法：.\u002Ftag.sh [--latest] [前缀]\n```\n\n`--latest` 选项还会为模块打上 `controller-latest` 标签，而 `前缀` 则会在打标签时被添加到模块名称的前面。\n\n这需要 `aici` 命令在 PATH 中，并且已设置 `AICI_API_BASE` 变量。\n\n\n# AICI 运行时（linux-x86_64-0.0.7）\n\n内容：\n```\n29742688 aicirt\n\n220f13091f2d4710da9a836189d369727bcc905f87691027fe8396171ebb7eb0  aicirt\n```\n\n","2024-02-02T01:03:08",{"id":228,"version":229,"summary_zh":230,"released_at":231},351711,"v0.0.5","# AICI 控制器（wasm32-wasi-0.0.5）\n\n内容：\n```\n3711896 aici_declctrl.wasm\n3302259 aici_jsctrl.wasm\n13961932 aici_pyctrl.wasm\n183521 aici_uppercase.wasm\n632 tag.sh\n182622 yesno.wasm\n\n0c23087731740dd92d12897019f85e43e08bc48c00b6e343316f75e73568ba9a  aici_declctrl.wasm\naab90c5c6fcfd22fe9bd696ef7b490256dd42af3261579693c77b42c0633e29b  aici_jsctrl.wasm\n0da6bb8fb1143572aca49b321d753249644ff984b1f84cfdcd672ab78b799163  aici_pyctrl.wasm\n9c1b5cef2a736c2ecd6f1588fdb255f37cde985b39d1432b840199189da5f189  aici_uppercase.wasm\n01e2963fd9b1f1069ee843ef61f65df1857753008c00e27b085493209331697f  yesno.wasm\n17817089d42a5dc494aa1fb991acd9f3323db763074fb6c849345f9d83d03aa9  tag.sh\n```\n\n## 打标签\n\n您可以使用 `tag.sh` 脚本上传并为模块打标签。\n\n```\n用法：.\u002Ftag.sh [--latest] [前缀]\n```\n\n`--latest` 选项还会为模块打上 `controller-latest` 标签，而 `前缀` 则会在打标签时被添加到模块名称的前面。\n\n这需要 `aici` 命令在 PATH 中，并且已设置 `AICI_API_BASE` 变量。\n\n\n# AICI 运行时（linux-x86_64-0.0.5）\n\n内容：\n```\n29742688 aicirt\n\nd6255d04e89a32a08c13eadb73a035700b7196f82490e44b56799e4d3dae0e75  aicirt\n```\n\n","2024-01-31T00:18:28",{"id":233,"version":234,"summary_zh":235,"released_at":236},351712,"v0.0.4","# AICI 控制器（wasm32-wasi-0.0.4）\n\n内容：\n```\n3727851 aici_declctrl.wasm\n3324015 aici_jsctrl.wasm\n13981334 aici_pyctrl.wasm\n202414 aici_uppercase.wasm\n632 tag.sh\n196404 yesno.wasm\n\nd591f641ecf6930e05168d1a849ca86eb33816b1fb5942e1b57beb70382b21b4  aici_declctrl.wasm\n456ce7575ac48d39f63b83f18e8beb5936a7d89c0b61aeff6079d4fa78ec0f5f  aici_jsctrl.wasm\nf29a4ed06a143f0a68e2b666993ac04fd2b707229d9a206a678d5e0a42015d38  aici_pyctrl.wasm\n547c5f74f5c68c3cb0095a7f69e94b4aa9b17201bf12e9531d2065437524756c  aici_uppercase.wasm\n31e4096536d649f8245a49f2e2e8828f824a176e76fabfd0a6c0d7501b7eb02c  yesno.wasm\n6cfac6d5d758916631c495dad0e23f33549a32492d6acfe13b7fc802d4c274b2  tag.sh\n```\n\n## 打标签\n\n您可以使用 `tag.sh` 脚本上传并为模块打标签。\n\n```\n用法：.\u002Ftag.sh [--latest] [前缀]\n```\n\n`--latest` 选项还会为模块打上 `controller-latest` 标签，而 `前缀` 则会在打标签时被添加到模块名称的前面。\n\n这需要 `aici` 命令在系统路径中，并且已设置 `AICI_API_BASE` 变量。\n\n\n# AICI 运行时（linux-x86_64-0.0.4）\n\n内容：\n```\n28038752 aicirt\n\n5f925fdadcaebc0f73f25437cffa70823408008118a026ddf0f68cee9a00ae9e  aicirt\n```\n\n","2024-01-26T19:02:10",{"id":238,"version":239,"summary_zh":240,"released_at":241},351713,"v0.0.3","# AICI 控制器（wasm32-wasi-0.0.3）\n\n内容：\n```\n3727402 aici_declctrl.wasm\n3323663 aici_jsctrl.wasm\n13981950 aici_pyctrl.wasm\n202418 aici_uppercase.wasm\n632 tag.sh\n196407 yesno.wasm\n\n8d8160322c800ae8d113e12fe4e93ba16cf7481ff88d7a6badfbf98615b88c95  aici_declctrl.wasm\n5505cf5fd83fcfc4498643cc1c37b1bcd37c49d48d8d2b4856957a35d16c89e9  aici_jsctrl.wasm\n41bc81f0ce56f2add9c18e914e30919e6b608c1eaec593585bcebd61cc1ba744  aici_pyctrl.wasm\n3e44d1bfb11a08bfbd59118af4c1cd90d8b08edb3f9d311aaffdf81cbdabe5df  aici_uppercase.wasm\n6be1f7ea830371a41c69b22a2ed73623d22afda1b9708e14151d93dbf70dd6b5  yesno.wasm\neb7cd4267b8346e9aa09890c027866b4d781ce63125d8cbf184be43ff6047ad8  tag.sh\n```\n\n## 打标签\n\n您可以使用 `tag.sh` 脚本上传并为模块打标签。\n\n```\n用法：.\u002Ftag.sh [--latest] [前缀]\n```\n\n`--latest` 选项还会为模块打上 `controller-latest` 标签，而 `前缀` 则会在打标签时被添加到模块名称的前面。\n\n这需要 `aici` 命令在系统路径中，并且已设置 `AICI_API_BASE` 变量。\n\n\n# AICI 运行时（linux-x86_64-0.0.3）\n\n内容：\n```\n28026464 aicirt\n\n7c9a0a3e128a3ddb33fa6a205990968b84dea62bb2c423432d5ac78ea191e528  aicirt\n```\n\n","2024-01-19T01:28:20",{"id":243,"version":244,"summary_zh":245,"released_at":246},351714,"v0.0.2","# AICI Controllers (wasm32-wasi-0.0.2)\n\nContents:\n```\n3727387 aici_declctrl.wasm\n3323228 aici_jsctrl.wasm\n13981844 aici_pyctrl.wasm\n202418 aici_uppercase.wasm\n632 tag.sh\n196407 yesno.wasm\n\n234b37553deb4de44079ecb4131a4b3fa3db782b82e57d7aabf2c7f2c3fe1afb  aici_declctrl.wasm\ndcc93eeb897efec6bec34d0564af11308f8c5bd1f34d96b11e2814cf26ce34f0  aici_jsctrl.wasm\n342722c32676d59600ea8007e56da4810096f443b99952612896c160e0f83119  aici_pyctrl.wasm\n4540093c625d04f60daa42255840eee0722ff3e1e13beb2ec1f73840013a0131  aici_uppercase.wasm\na3428091108da4cac3005c36357b45dba9271dd6e081252394533c3993908e0d  yesno.wasm\na24178931081b50f771dad5aa2984c5a1b57fd361d34a6df2ab639d65089fd9d  tag.sh\n```\n\n## Tagging\n\nYou can upload and tag the modules using the `tag.sh` script.\n\n```\nUsage: .\u002Ftag.sh [--latest] [prefix]\n```\n\nThe `--latest` option will also tag the module with the `controller-latest` tag,\nwhile the `prefix` will be prepended to the module name when tagging.\n\nThis requires `aici` command to be in path and AICI_API_BASE variable set.\n\n\n# AICI Runtime (linux-x86_64-0.0.2)\n\nContents:\n```\n28124768 aicirt\n\nf58bd2b8b891aa0963d0c5f8da86c2ca8a5423ad398dafdfa2f88e0caaa5042b  aicirt\n```\n\n","2024-01-12T22:02:19",{"id":248,"version":249,"summary_zh":250,"released_at":251},351715,"v0.0.1","# AICI Controllers (wasm32-wasi-0.0.1)\n\nContents:\n```\n3727387 aici_declctrl.wasm\n3323373 aici_jsctrl.wasm\n13981844 aici_pyctrl.wasm\n202418 aici_uppercase.wasm\n196407 yesno.wasm\n\n234b37553deb4de44079ecb4131a4b3fa3db782b82e57d7aabf2c7f2c3fe1afb  aici_declctrl.wasm\nfa947de9d465ddff5cd1dcbc3968c4e6378add9da38e772d40f7d14048294898  aici_jsctrl.wasm\na3f7ce1e3902df75a9762a9c3724704f34d0922cfa574a0f4734e30665a4a565  aici_pyctrl.wasm\n4540093c625d04f60daa42255840eee0722ff3e1e13beb2ec1f73840013a0131  aici_uppercase.wasm\na3428091108da4cac3005c36357b45dba9271dd6e081252394533c3993908e0d  yesno.wasm\n```\n\n# AICI Runtime (linux-x86_64-0.0.1)\n\nContents:\n```\n28124768 aicirt\n\nf58bd2b8b891aa0963d0c5f8da86c2ca8a5423ad398dafdfa2f88e0caaa5042b  aicirt\n```\n\n","2024-01-12T20:58:20"]