[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-rksm--org-ai":3,"tool-rksm--org-ai":64},[4,17,27,35,43,56],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":16},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,3,"2026-04-05T11:01:52",[13,14,15],"开发框架","图像","Agent","ready",{"id":18,"name":19,"github_repo":20,"description_zh":21,"stars":22,"difficulty_score":23,"last_commit_at":24,"category_tags":25,"status":16},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",138956,2,"2026-04-05T11:33:21",[13,15,26],"语言模型",{"id":28,"name":29,"github_repo":30,"description_zh":31,"stars":32,"difficulty_score":23,"last_commit_at":33,"category_tags":34,"status":16},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",107662,"2026-04-03T11:11:01",[13,14,15],{"id":36,"name":37,"github_repo":38,"description_zh":39,"stars":40,"difficulty_score":23,"last_commit_at":41,"category_tags":42,"status":16},3704,"NextChat","ChatGPTNextWeb\u002FNextChat","NextChat 是一款轻量且极速的 AI 助手，旨在为用户提供流畅、跨平台的大模型交互体验。它完美解决了用户在多设备间切换时难以保持对话连续性，以及面对众多 AI 模型不知如何统一管理的痛点。无论是日常办公、学习辅助还是创意激发，NextChat 都能让用户随时随地通过网页、iOS、Android、Windows、MacOS 或 Linux 端无缝接入智能服务。\n\n这款工具非常适合普通用户、学生、职场人士以及需要私有化部署的企业团队使用。对于开发者而言，它也提供了便捷的自托管方案，支持一键部署到 Vercel 或 Zeabur 等平台。\n\nNextChat 的核心亮点在于其广泛的模型兼容性，原生支持 Claude、DeepSeek、GPT-4 及 Gemini Pro 等主流大模型，让用户在一个界面即可自由切换不同 AI 能力。此外，它还率先支持 MCP（Model Context Protocol）协议，增强了上下文处理能力。针对企业用户，NextChat 提供专业版解决方案，具备品牌定制、细粒度权限控制、内部知识库整合及安全审计等功能，满足公司对数据隐私和个性化管理的高标准要求。",87618,"2026-04-05T07:20:52",[13,26],{"id":44,"name":45,"github_repo":46,"description_zh":47,"stars":48,"difficulty_score":23,"last_commit_at":49,"category_tags":50,"status":16},2268,"ML-For-Beginners","microsoft\u002FML-For-Beginners","ML-For-Beginners 是由微软推出的一套系统化机器学习入门课程，旨在帮助零基础用户轻松掌握经典机器学习知识。这套课程将学习路径规划为 12 周，包含 26 节精炼课程和 52 道配套测验，内容涵盖从基础概念到实际应用的完整流程，有效解决了初学者面对庞大知识体系时无从下手、缺乏结构化指导的痛点。\n\n无论是希望转型的开发者、需要补充算法背景的研究人员，还是对人工智能充满好奇的普通爱好者，都能从中受益。课程不仅提供了清晰的理论讲解，还强调动手实践，让用户在循序渐进中建立扎实的技能基础。其独特的亮点在于强大的多语言支持，通过自动化机制提供了包括简体中文在内的 50 多种语言版本，极大地降低了全球不同背景用户的学习门槛。此外，项目采用开源协作模式，社区活跃且内容持续更新，确保学习者能获取前沿且准确的技术资讯。如果你正寻找一条清晰、友好且专业的机器学习入门之路，ML-For-Beginners 将是理想的起点。",84991,"2026-04-05T10:45:23",[14,51,52,53,15,54,26,13,55],"数据工具","视频","插件","其他","音频",{"id":57,"name":58,"github_repo":59,"description_zh":60,"stars":61,"difficulty_score":10,"last_commit_at":62,"category_tags":63,"status":16},3128,"ragflow","infiniflow\u002Fragflow","RAGFlow 是一款领先的开源检索增强生成（RAG）引擎，旨在为大语言模型构建更精准、可靠的上下文层。它巧妙地将前沿的 RAG 技术与智能体（Agent）能力相结合，不仅支持从各类文档中高效提取知识，还能让模型基于这些知识进行逻辑推理和任务执行。\n\n在大模型应用中，幻觉问题和知识滞后是常见痛点。RAGFlow 通过深度解析复杂文档结构（如表格、图表及混合排版），显著提升了信息检索的准确度，从而有效减少模型“胡编乱造”的现象，确保回答既有据可依又具备时效性。其内置的智能体机制更进一步，使系统不仅能回答问题，还能自主规划步骤解决复杂问题。\n\n这款工具特别适合开发者、企业技术团队以及 AI 研究人员使用。无论是希望快速搭建私有知识库问答系统，还是致力于探索大模型在垂直领域落地的创新者，都能从中受益。RAGFlow 提供了可视化的工作流编排界面和灵活的 API 接口，既降低了非算法背景用户的上手门槛，也满足了专业开发者对系统深度定制的需求。作为基于 Apache 2.0 协议开源的项目，它正成为连接通用大模型与行业专有知识之间的重要桥梁。",77062,"2026-04-04T04:44:48",[15,14,13,26,54],{"id":65,"github_repo":66,"name":67,"description_en":68,"description_zh":69,"ai_summary_zh":69,"readme_en":70,"readme_zh":71,"quickstart_zh":72,"use_case_zh":73,"hero_image_url":74,"owner_login":75,"owner_name":76,"owner_avatar_url":77,"owner_bio":78,"owner_company":79,"owner_location":79,"owner_email":80,"owner_twitter":79,"owner_website":81,"owner_url":82,"languages":83,"stars":92,"forks":93,"last_commit_at":94,"license":95,"difficulty_score":10,"env_os":96,"env_gpu":97,"env_ram":98,"env_deps":99,"category_tags":110,"github_topics":111,"view_count":23,"oss_zip_url":79,"oss_zip_packed_at":79,"status":16,"created_at":118,"updated_at":119,"faqs":120,"releases":150},1275,"rksm\u002Forg-ai","org-ai","Emacs as your personal AI assistant. Use LLMs such as ChatGPT or LLaMA for text generation or DALL-E and Stable Diffusion for image generation. Also supports speech input \u002F output.","org-ai 是一个为 Emacs 用户打造的个人 AI 助手，让 Emacs 成为你的智能协作者。它支持使用 ChatGPT、LLaMA 等大语言模型生成文字，或通过 DALL-E 和 Stable Diffusion 生成图像，还能实现语音输入输出，极大提升了 Emacs 的交互能力。\n\n它解决了在 Emacs 中直接调用 AI 模型进行内容创作、图像生成和语音交互的问题，让用户无需切换环境即可完成复杂任务。无论是撰写文档、生成创意图像，还是通过语音与 AI 对话，都能在熟悉的 Emacs 环境中高效完成。\n\n适合经常使用 Emacs 的开发者、研究人员、设计师以及喜欢深度定制工作流的用户。对于需要频繁调用 AI 模型辅助写作、设计或数据分析的人来说，org-ai 提供了无缝集成的体验。\n\n其独特之处在于将 AI 功能深度嵌入 org-mode 编辑器中，支持代码块式交互、语音支持及多种 AI 模型灵活配置，使 AI 协作更加自然流畅。","# org-ai [![MELPA](https:\u002F\u002Fmelpa.org\u002Fpackages\u002Forg-ai-badge.svg)](https:\u002F\u002Fmelpa.org\u002F#\u002Forg-ai)\n\n[![org-ai video](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frksm_org-ai_readme_d85a18e1c7a8.png)](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=fvBDxiFPG6I)\n\nMinor mode for Emacs org-mode that provides access to generative AI models. Currently supported are\n- OpenAI API (ChatGPT, DALL-E, other text models), optionally run against Azure API instead of OpenAI\n- Stable Diffusion through [stable-diffusion-webui](https:\u002F\u002Fgithub.com\u002FAUTOMATIC1111\u002Fstable-diffusion-webui)\n\nInside an org-mode buffer you can\n- use ChatGPT to generate text, having full control over system and user prompts ([demo](#chatgpt-in-org-mode))\n- Speech input and output! Talk with your AI!\n- generate images and image variations with a text prompt using Stable Diffusion or DALL-E ([demo 1](#dall-e-in-org-mode), [demo 2](#image-variations))\n- org-ai everywhere: Various commands usable outside org-mode for prompting using the selected text or multiple files.\n\n_Note: In order to use the OpenAI API you'll need an [OpenAI account](https:\u002F\u002Fplatform.openai.com\u002F) and you need to get an API token. As far as I can tell, the current usage limits for the free tier get you pretty far._\n\n------------------------------\n\n## Table of Contents\n\n- [Demos](#demos)\n    - [ChatGPT in org-mode](#chatgpt-in-org-mode)\n    - [DALL-E in org-mode](#dall-e-in-org-mode)\n    - [Image variations](#image-variations)\n- [Features and Usage](#features-and-usage)\n    - [`#+begin_ai...#+end_ai` special blocks](#begin_aiend_ai-special-blocks)\n        - [Syntax highlighting in ai blocks](#syntax-highlighting-in-ai-blocks)\n        - [Jump to the end of the block after completion](#jump-to-the-end-of-the-block-after-completion)\n        - [Auto-fill paragraphs on insertion](#auto-fill-paragraphs-on-insertion)\n        - [Block Options](#block-options)\n            - [For ChatGPT](#for-chatgpt)\n            - [For DALL-E](#for-dall-e)\n            - [Other text models](#other-text-models)\n    - [Image variation](#image-variation)\n    - [Global Commands](#global-commands)\n        - [org-ai-on-project](#org-ai-on-project)\n    - [Noweb Support](#noweb-support)\n- [Installation](#installation)\n    - [Melpa](#melpa)\n    - [Straight.el](#straightel)\n    - [Manual](#manual)\n    - [OpenAI API key](#openai-api-key)\n        - [Using other services than OpenAI](#using-other-services-than-openai)\n            - [Azure](#azure)\n            - [perplexity.ai](#perplexityai)\n            - [Anthropic \u002F Claude](#anthropic--claude)\n    - [Setting up speech input \u002F output](#setting-up-speech-input--output)\n        - [Whisper](#whisper)\n            - [macOS specific steps](#macos-specific-steps)\n                - [macOS alternative: Siri dictation](#macos-alternative-siri-dictation)\n            - [Windows specific steps](#windows-specific-steps)\n        - [espeak \u002F greader](#espeak--greader)\n    - [Setting up Stable Diffusion](#setting-up-stable-diffusion)\n    - [Using local LLMs with oobabooga\u002Ftext-generation-webui](#using-local-llms-with-oobaboogatext-generation-webui)\n- [FAQ](#faq)\n- [Sponsoring](#sponsoring)\n\n## Demos\n\n### ChatGPT in org-mode\n\n```org\n#+begin_ai\nIs Emacs the greatest editor?\n#+end_ai\n```\n\n![chat-gpt in org-mode](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frksm_org-ai_readme_b257165d92ca.gif)\n\nYou can continue to type and press `C-c C-c` to create a conversation. `C-g` will interrupt a running request.\n\n\n### DALL-E in org-mode\n\nUse the `:image` keyword to generate an image. This uses DALL·E-3 by default.\n\n```org\n#+begin_ai :image :size 1024x1024\nHyper realistic sci-fi rendering of super complicated technical machine.\n#+end_ai\n```\n\n![dall-e in org-mode](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frksm_org-ai_readme_019876e0e636.gif)\n\nYou can use the following keywords to control the image generation:\n- `:size \u003Cwidth>x\u003Cheight>` - the size of the image to generate (default: 1024x1024)\n- `:model \u003Cmodel>` - the model to use (default: `\"dall-e-3\"`)\n- `:quality \u003Cquality>` - the quality of the image (choices: `hd`, `standard`)\n- `:style \u003Cstyle>` - the style to use (choices: `vivid`, `natural`)\n- `:n \u003Ccount> - the number of images to generate (default: 1)\n\n(For more information about those settings see [this OpenAI blog post](https:\u002F\u002Fcookbook.openai.com\u002Farticles\u002Fwhat_is_new_with_dalle_3).\n\nYou can customize the defaults for those variables with `customize-variable` or by setting them in your config:\n\n```elisp\n(setq org-ai-image-model \"dall-e-3\")\n(setq org-ai-image-default-size \"1792x1024\")\n(setq org-ai-image-default-count 2)\n(setq org-ai-image-default-style 'vivid)\n(setq org-ai-image-default-quality 'hd)\n(setq org-ai-image-directory (expand-file-name \"org-ai-images\u002F\" org-directory))\n```\n\n\n### Image variations\n\n![dall-e image generation in org-mode](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frksm_org-ai_readme_5f850f83616b.gif)\n\n\n\n## Features and Usage\n### `#+begin_ai...#+end_ai` special blocks\n\nSimilar to org-babel, these blocks demarcate input (and for ChatGPT also output) for the AI model. You can use it for AI chat, text completion and text -> image generation. See [options](#block-options) below for more information.\n\nCreate a block like\n\n```org\n#+begin_ai\nIs Emacs the greatest editor?\n#+end_ai\n```\n\nand press `C-c C-c`. The Chat input will appear inline and once the response is complete, you can enter your reply and so on. See [the demo](#chatgpt-in-org-mode) below. You can press `C-g` while the ai request is running to cancel it.\n\nYou can also modify the _system_ prompt and other parameters used. The system prompt is injected before the user's input and \"primes\" the model to answer in a certain style. For example you can do:\n\n```org\n#+begin_ai :max-tokens 250\n[SYS]: Act as if you are a powerful medival king.\n[ME]: What will you eat today?\n#+end_ai\n```\n\nThis will result in an API payload like\n\n```json\n{\n  \"messages\": [\n    {\n      \"role\": \"system\",\n      \"content\": \"Act as if you are a powerful medival king.\"\n    },\n    {\n      \"role\": \"user\",\n      \"content\": \"What will you eat today?\"\n    }\n  ],\n  \"model\": \"gpt-4o-mini\",\n  \"stream\": true,\n  \"max_tokens\": 250,\n  \"temperature\": 1.2\n}\n```\n\nFor some prompt ideas see for example [Awesome ChatGPT Prompts](https:\u002F\u002Fgithub.com\u002Ff\u002Fawesome-chatgpt-prompts).\n\nWhen generating images using the `:image` flag, images will appear underneath the ai block inline. Images will be stored (together with their prompt) inside `org-ai-image-directory` which defaults to `~\u002Forg\u002Forg-ai-images\u002F`.\n\nYou can also use speech input to transcribe the input. Press `C-c r` for `org-ai-talk-capture-in-org` to start recording. Note that this will require you to setup [speech recognition](#setting-up-speech-input--output) (see below). Speech output can be enabled with `org-ai-talk-output-enable`.\n\nInside an `#+begin_ai...#+end_ai` you can modify and select the parts of the chat with these commands:\n- Press `C-c \u003Cbackspace>` (`org-ai-kill-region-at-point`) to remove the chat part under point.\n- `org-ai-mark-region-at-point` will mark the region at point.\n- `org-ai-mark-last-region` will mark the last chat part.\n\n#### Syntax highlighting in ai blocks\n\nTo apply syntax highlighted to your `#+begin_ai ...` blocks just add a language major-mode name after `_ai`. E.g. `#+begin_ai markdown`. For markdown in particular, to then also correctly highlight code in in backticks, you can set `(setq markdown-fontify-code-blocks-natively t)`. Make sure that you also have the [markdown-mode package](https:\u002F\u002Fmelpa.org\u002F#\u002Fmarkdown-mode) installed. Thanks @tavisrudd for this trick!\n\n#### Jump to the end of the block after completion\n\nThis behavior is enabled by default to so that the interaction is more similar to a chat. It can be annoying when long output is present and the buffer scrolls while you are reading. So you can disable this with:\n\n```elisp\n(setq org-ai-jump-to-end-of-block nil)\n```\n\n#### Auto-fill paragraphs on insertion\n\nSet `(setq org-ai-auto-fill t)` to \"fill\" (automatically wrap lines according to `fill-column`) the inserted text. Basically like `auto-fill-mode` but for the AI.\n\n#### Block Options\n\nThe `#+begin_ai...#+end_ai` block can take the following options.\n\n##### For ChatGPT\nBy default, the content of ai blocks are interpreted as messages for ChatGPT. Text following `[ME]:` is associated with the user, text following `[AI]:` is associated as the model's response. Optionally you can start the block with a `[SYS]: \u003Cbehavior>` input to prime the model (see `org-ai-default-chat-system-prompt` below).\n\n- `:max-tokens number` - number of maximum tokens to generate (default: nil, use OpenAI's default)\n- `:temperature number` - temperature of the model (default: 1)\n- `:top-p number` - top_p of the model (default: 1)\n- `:frequency-penalty number` - frequency penalty of the model (default: 0)\n- `:presence-penalty` - presence penalty of the model (default: 0)\n- `:sys-everywhere` - repeat the system prompt for every user message (default: nil)\n\nIf you have a lot of different threads of conversation regarding the same topic and settings (system prompt, temperature, etc) and you don't want to repeat all the options, you can set org file scope properties or create a org heading with property drawer, such that all `#+begin_ai...#+end_ai` blocks under that heading will inherit the settings.\n\nExamples:\n```org\n* Emacs (multiple conversations re emacs continue in this subtree)\n:PROPERTIES:\n:SYS: You are a emacs expert. You can help me by answering my questions. You can also ask me questions to clarify my intention.\n:temperature: 0.5\n:model: gpt-4o-mini\n:END:\n\n** Web programming via elisp\n#+begin_ai\nHow to call a REST API and parse its JSON response?\n#+end_ai\n\n** Other emacs tasks\n#+begin_ai...#+end_ai\n\n* Python (multiple conversations re python continue in this subtree)\n:PROPERTIES:\n:SYS: You are a python programmer. Respond to the task with detailed step by step instructions and code.\n:temperature: 0.1\n:model: gpt-4\n:END:\n\n** Learning QUIC\n#+begin_ai\nHow to setup a webserver with http3 support?\n#+end_ai\n\n** Other python tasks\n#+begin_ai...#+end_ai\n```\n\nThe following custom variables can be used to configure the chat:\n\n- `org-ai-default-chat-model` (default: `\"gpt-4o-mini\"`)\n- `org-ai-default-max-tokens` How long the response should be. Currently cannot exceed 4096. If this value is too small an answer might be cut off (default: nil)\n- `org-ai-default-chat-system-prompt` How to \"prime\" the model. This is a prompt that is injected before the user's input. (default: `\"You are a helpful assistant inside Emacs.\"`)\n- `org-ai-default-inject-sys-prompt-for-all-messages` Wether to repeat the system prompt for every user message. Sometimes the model \"forgets\" how it was primed. This can help remind it. (default: `nil`)\n\n##### For DALL-E\n\nWhen you add an `:image` option to the ai block, the prompt will be used for image generation.\n\n- `:image` - generate an image instead of text\n- `:size` - size of the image to generate (default: 256x256, can be 512x512 or 1024x1024)\n- `:n` - the number of images to generate (default: 1)\n\nThe following custom variables can be used to configure the image generation:\n- `org-ai-image-directory` - where to store the generated images (default: `~\u002Forg\u002Forg-ai-images\u002F`)\n\n##### For Stable Diffusion\n\nSimilar to DALL-E but use\n\n```\n#+begin_ai :sd-image\n\u003CPROMPT>\n#+end_ai\n```\n\nYou can run img2img by labeling your org-mode image with #+name and\nreferencing it with :image-ref from your org-ai block.\n\n```\n#+begin_ai :sd-image :image-ref label1\nforest, Gogh style\n#+end_ai\n```\n\nM-x org-ai-sd-clip guesses the previous image's prompt on org-mode\nby the CLIP interrogator and saves it in the kill ring.\n\nM-x org-ai-sd-deepdanbooru guesses the previous image's prompt on\norg-mode by the DeepDanbooru interrogator and saves it in the kill\nring.\n\n##### For local models\nFor requesting completions from a local model served with [oobabooga\u002Ftext-generation-webui](https:\u002F\u002Fgithub.com\u002Foobabooga\u002Ftext-generation-webui), go through the setup steps described [below](#using-local-llms-with-oobaboogatext-generation-webui)\n\nThen start an API server:\n\n``` sh\ncd ~\u002F.emacs.d\u002Forg-ai\u002Ftext-generation-webui\nconda activate org-ai\npython server.py --api --model SOME-MODEL\n```\n\nWhen you add a `:local` key to an org-ai block and request completions with `C-c C-c`, the block will be sent to the local API server instead of the OpenAI API. For example:\n\n```\n#+begin_ai :local\n...\n#+end_ai\n```\n\nThis will send a request to `org-ai-oobabooga-websocket-url` and stream the response into the org buffer.\n\n##### Other text models\n\nThe older completion models can also be prompted by adding the `:completion` option to the ai block.\n\n- `:completion` - instead of using the chatgpt model, use the completion model\n- `:model` - which model to use, see https:\u002F\u002Fplatform.openai.com\u002Fdocs\u002Fmodels for a list of models\n\nFor the detailed meaning of those parameters see the [OpenAI API documentation](https:\u002F\u002Fplatform.openai.com\u002Fdocs\u002Fapi-reference\u002Fchat).\n\nThe following custom variables can be used to configure the text generation:\n\n- `org-ai-default-completion-model` (default: `\"text-davinci-003\"`)\n\n\n\n### Image variation\n\nYou can also use an existing image as input to generate more similar looking images. The `org-ai-image-variation` command will prompt for a file path to an image, a size and a count and will then generate as many images and insert links to them inside the current `org-mode` buffer. Images will be stored inside `org-ai-image-directory`. See the [demo](#image-variations) below.\n\n[For more information see the OpenAI documentation](https:\u002F\u002Fplatform.openai.com\u002Fdocs\u002Fguides\u002Fimages\u002Fvariations). The input image needs to be square and its size needs to be less than 4MB. And you currently need curl available as a command line tool[^1].\n\n[^1]: __Note:__ Currenly the image variation implementation requires a command line curl to be installed. Reason for that is that the OpenAI API expects multipart\u002Fform-data requests and the emacs built-in `url-retrieve` does not support that (At least I haven't figured out how). Switching to `request.el` might be a better alternative. If you're interested in contributing, PRs are very welcome!\n\n\n\n### Global Commands\n\n`org-ai` can be used outside of `org-mode` buffers as well. When you enable `org-ai-global-mode`, the prefix `C-c M-a` will be bound to a number of commands:\n\n| command                          | keybinding  | description                                                                                                                                                                                                                                                                                                                                                    |\n|:---------------------------------|:------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n| `org-ai-on-region`               | `C-c M-a r` | Ask a question about the selected text or tell the AI to do something with it. The response will be opened in an org-mode buffer so that you can continue the conversation. Setting the variable `org-ai-on-region-file` (e.g. `(setq org-ai-on-region-file (expand-file-name \"org-ai-on-region.org\" org-directory))`) will associate a file with that buffer. |\n| `org-ai-summarize`               | `C-c M-a s` | Summarize the selected text.                                                                                                                                                                                                                                                                                                                                   |\n| `org-ai-refactor-code`           | `C-c M-a c` | Tell the AI how to change the selected code, a diff buffer will appear with the changes.                                                                                                                                                                                                                                                                       |\n| `org-ai-on-project`              | `C-c M-a p` | Run prompts and modify \u002F refactor multiple files at once. Will use [projectile](https:\u002F\u002Fgithub.com\u002Fbbatsov\u002Fprojectile) if available, falls back to the current directory if not.                                                                                                                                                                               |\n| `org-ai-prompt`                  | `C-c M-a P` | Prompt the user for a text and then print the AI's response in current buffer.                                                                                                                                                                                                                                                                                 |\n| `org-ai-switch-chat-model`       | `C-c M-a m` | Interactively change `org-ai-default-chat-model`                                                                                                                                                                                                                                                                                                               |\n| `org-ai-open-account-usage-page` | `C-c M-a $` | Opens https:\u002F\u002Fplatform.openai.com\u002Faccount\u002Fusage to see how much money you have burned.                                                                                                                                                                                                                                                                         |\n| `org-ai-open-request-buffer`     | `C-c M-a !` | Opens the `url` request buffer. If something doesn't work it can be helpful to take a look.                                                                                                                                                                                                                                                                    |\n| `org-ai-talk-input-toggle`       | `C-c M-a t` | Generally enable speech input for the different prompt commands.                                                                                                                                                                                                                                                                                               |\n| `org-ai-talk-output-toggle`      | `C-c M-a T` | Generally enable speech output.                                                                                                                                                                                                                                                                                                                                |\n\n#### org-ai-on-project\n\nUsing the org-ai-on-project buffer allows you to run commands on files in a project, alternatively also just on selected text in those files. You can e.g. select the readme of a project and ask \"what is it all about?\" or have code explained to you. You can also ask for code changes, which will generate a diff. If you know somehone who thinks only VS Code with Copilot enabled can do that, point them here.\n\nRunning the `org-ai-on-project` command will open a separate buffer that allows you to select choose multiple files (and optionally select a sub-region inside a file) and then run a prompt on it.\n\n![org-ai-on-project](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frksm_org-ai_readme_34bb2f1b63f5.png)\n\nIf you deactivate \"modify code\", the effect is similar to running `org-ai-on-region` just that the file contents all appear in the prompt.\n\nWith \"modify code\" activated, you can ask the AI to modify or refactor the code. By default (\"Request diffs\") deactivated, we will prompt to generate the new code for all selected files\u002Fregions and you can then see a diff per file and decide to apply it or not. With \"Request diffs\" active, the AI will be asked to directly create a unified diff that can then be applied.\n\n\n### Noweb Support\n\nGiven a named source block\n```\n#+name: sayhi\n#+begin_src shell\necho \"Hello there\"\n#+end_src\n```\nWe can try to reference it by name, but it doesn't work.\n```\n#+begin_ai\n[SYS]: You are a mimic. Whenever I say something, repeat back what I say to you. Say exactly what I said, do not add anything.\n\n[ME]: \u003C\u003Csayhi()>>\n\n\n[AI]: \u003C\u003Csayhi()>>\n\n[ME]:\n#+end_ai\n```\nWith `:noweb yes`\n\n```\n#+begin_ai :noweb yes\n[SYS]: You are a mimic. Whenever I say something, repeat back what I say to you. Say exactly what I said, do not add anything.\n\n[ME]: \u003C\u003Csayhi()>>\n\n\n[AI]: Hello there.\n\n[ME]:\n#+end_ai\n```\n\nYou can also trigger noweb expansion with an `org-ai-noweb: yes` heading proprty anywhere in the parent headings (header args takes precedence).\n\nTo see what your document will expand to when sent to the api, run `org-ai-expand-block`.\n\n#### Run arbitrary lisp inline\n\nThis is a hack but it works really well.\n\nCreate a block\n\n```\n#+name: identity\n#+begin_src emacs-lisp :var x=\"fill me in\"\n(format \"%s\" x)\n#+end_src\n```\n\nWe can invoke it and let noweb parameters (which support lisp) evaluate as code\n\n```\n#+begin_ai :noweb yes\nTell me some 3, simple ways to improve this dockerfile\n\n\u003C\u003Cidentity(x=(quelpa-slurp-file \"~\u002Fcode\u002Fibr-api\u002FDockerfile\"))>>\n\n\n\n[AI]: 1. Use a more specific version of Python, such as \"python:3.9.6-buster\" instead of \"python:3.9-buster\", to ensure compatibility with future updates.\n\n2. Add a cleanup step after installing poetry to remove any unnecessary files or dependencies, thus reducing the size of the final image.\n\n3. Use multi-stage builds to separate the build environment from the production environment, thus reducing the size of the final image and increasing security. For example, the first stage can be used to install dependencies and build the code, while the second stage can contain only the final artifacts and be used for deployment.\n\n[ME]:\n#+end_ai\n```\n\n\n## Installation\n\n### Melpa\n\norg-ai is on Melpa: https:\u002F\u002Fmelpa.org\u002F#\u002Forg-ai. If you have added Melpa to your package archives with\n\n```elisp\n(require 'package)\n(add-to-list 'package-archives '(\"melpa\" . \"http:\u002F\u002Fmelpa.org\u002Fpackages\u002F\") t)\n(package-initialize)\n```\n\nyou can install it with:\n\n```elisp\n(use-package org-ai\n  :ensure t\n  :commands (org-ai-mode\n             org-ai-global-mode)\n  :init\n  (add-hook 'org-mode-hook #'org-ai-mode) ; enable org-ai in org-mode\n  (org-ai-global-mode) ; installs global keybindings on C-c M-a\n  :config\n  (setq org-ai-default-chat-model \"gpt-4\") ; if you are on the gpt-4 beta:\n  (org-ai-install-yasnippets)) ; if you are using yasnippet and want `ai` snippets\n\n```\n\n### Straight.el\n\n```elisp\n(straight-use-package\n '(org-ai :type git :host github :repo \"rksm\u002Forg-ai\"\n          :local-repo \"org-ai\"\n          :files (\"*.el\" \"README.md\" \"snippets\")))\n```\n\n### Manual\n\nCheckout this repository.\n\n```sh\ngit clone\nhttps:\u002F\u002Fgithub.com\u002Frksm\u002Forg-ai\n```\n\nThen, if you use `use-package`:\n\n```elisp\n(use-package org-ai\n  :ensure t\n  :load-path (lambda () \"path\u002Fto\u002Forg-ai\"))\n  ;; ...rest as above...\n\n```\n\nor just with `require`:\n\n```elisp\n(package-install 'websocket)\n(add-to-list 'load-path \"path\u002Fto\u002Forg-ai\")\n(require 'org)\n(require 'org-ai)\n(add-hook 'org-mode-hook #'org-ai-mode)\n(org-ai-global-mode)\n(setq org-ai-default-chat-model \"gpt-4\") ; if you are on the gpt-4 beta:\n(org-ai-install-yasnippets) ; if you are using yasnippet and want `ai` snippets\n```\n\n### OpenAI API key\n\nYou can either directly set your api token in your config:\n\n```elisp\n(setq org-ai-openai-api-token \"\u003CENTER YOUR API TOKEN HERE>\")\n\n```\n\nAlternatively, `org-ai` supports `auth-source` for retrieving your API key. You can store a secret in the format\n\n```\nmachine api.openai.com login org-ai password \u003Cyour-api-key>\n```\n\nin your `~\u002Fauthinfo.gpg` file. If this is present, org-ai will use this mechanism to retrieve the token when a request is made. If you do not want `org-ai` to try to retrieve the key from `auth-source`, you can set `org-ai-use-auth-source` to `nil` before loading `org-ai`.\n\n#### Using other services than OpenAI\n\n##### Azure\n\nYou can switch to Azure by customizing these variables, either interactively with `M-x customize-variable` or by adding them to your config:\n\n```elisp\n(setq org-ai-service 'azure-openai\n      org-ai-azure-openai-api-base \"https:\u002F\u002Fyour-instance.openai.azure.com\"\n      org-ai-azure-openai-deployment \"azure-openai-deployment-name\"\n      org-ai-azure-openai-api-version \"2023-07-01-preview\")\n```\n\nTo store the API credentials, follow the authinfo instructions above but use `org-ai-azure-openai-api-base` as the machine name.\n\n##### perplexity.ai\n\nFor a list of available models see the [perplexity.ai documentation](https:\u002F\u002Fdocs.perplexity.ai\u002Fdocs\u002Fmodel-cards).\n\nEither switch the default service in your config:\n\n```elisp\n(setq org-ai-service 'perplexity.ai)\n(setq org-ai-default-chat-model \"llama-3-sonar-large-32k-online\")\n```\n\nor per block:\n\n```org\n#+begin_ai :service perplexity.ai :model llama-3-sonar-large-32k-online\n[ME]: Tell me fun facts about Emacs.\n#+end_ai\n```\n\nFor the authentication have an entry like `machine api.perplexity.ai login org-ai password pplx-***` in your `authinfo.gpg` or set `org-ai-openai-api-token`.\n\n__Note:__ Currently the perplexity.ai does not give access to references\u002Flinks via the API so Emacs will not be able to display references. They have a beta program for that running and I sure hope that this will be available generally soon.\n\n##### Anthropic \u002F Claude\n\nSimilar to the above. E.g. \n\n```org\n#+begin_ai :service anthropic :model claude-3-opus-20240229\n[ME]: Tell me fun facts about Emacs.\n#+end_ai\n```\n\nAnthropic models are [here](https:\u002F\u002Fdocs.anthropic.com\u002Fclaude\u002Fdocs\u002Fmodels-overview).\nThere is currently only one API version that is set via `org-ai-anthropic-api-version`. If other version come out you can find them [here](https:\u002F\u002Fdocs.anthropic.com\u002Fclaude\u002Freference\u002Fversions).\n\nFor the API token use `machine api.anthropic.com login org-ai password sk-ant-***` in your `authinfo.gpg`.\n\n### Setting up speech input \u002F output\n\n#### Whisper\n\nThese setup steps are optional. If you don't want to use speech input \u002F output, you can skip this section.\n\n_Note: My personal config for org-ai can be found in [this gist](https:\u002F\u002Fgist.github.com\u002Frksm\u002F04be012be07671cd5e1dc6ec5b077e34). It contains a working whisper setup._\n\nThis has been tested on macOS and Linux. Someone with a Windows computer, please test this and let me know what needs to be done to make it work (Thank You!).\n\nThe speech input uses [whisper.el](https:\u002F\u002Fgithub.com\u002Fnatrys\u002Fwhisper.el) and `ffmpeg`. You need to clone the repo directly or use [straight.el](https:\u002F\u002Fgithub.com\u002Fradian-software\u002Fstraight.el) to install it.\n\n1. install ffmpeg (e.g. `brew install ffmpeg` on macOS) or `sudo apt install ffmpeg` on Linux.\n2. Clone whisper.el: `git clone https:\u002F\u002Fgithub.com\u002Fnatrys\u002Fwhisper.el path\u002Fto\u002Fwhisper.el`\n\nYou should now be able to load it inside Emacs:\n\n```elisp\n(use-package whisper\n  :load-path \"path\u002Fto\u002Fwhisper.el\"\n  :bind (\"M-s-r\" . whisper-run))\n```\n\nNow also load:\n\n```elisp\n(use-package greader :ensure)\n(require 'whisper)\n(require 'org-ai-talk)\n\n;; macOS speech settings, optional\n(setq org-ai-talk-say-words-per-minute 210)\n(setq org-ai-talk-say-voice \"Karen\")\n```\n\n##### macOS specific steps\n\nOn macOS you will need to do two more things:\n1. Allow Emacs to record audio\n2. Tell whisper.el what microphone to use\n\n###### 1. Allow Emacs to record audio\nYou can use the [tccutil helper](https:\u002F\u002Fgithub.com\u002FDocSystem\u002Ftccutil):\n\n```sh\ngit clone https:\u002F\u002Fgithub.com\u002FDocSystem\u002Ftccutil\ncd tccutil\nsudo python .\u002Ftccutil.py -p \u002FApplications\u002FEmacs.app -e --microphone\n```\n\nWhen you now run `ffmpeg -f avfoundation -i :0 output.mp3` from within an Emacs shell, there should be no `abort trap: 6` error.\n\n(As an alternative to tccutil.py see the method mentioned in [this issue](https:\u002F\u002Fgithub.com\u002Frksm\u002Forg-ai\u002Fissues\u002F86).)\n\n###### 2. Tell whisper.el what microphone to use\n\nYou can use the output of `ffmpeg -f avfoundation -list_devices true -i \"\"` to list the audio input devices and then tell whisper.el about it: `(setq whisper--ffmpeg-input-device \":0\")`. `:0` is the microphone index, see the output of the command above to use another one.\n\nI've created an emacs helper that let's you select the microphone interactively. See [this gist](https:\u002F\u002Fgist.github.com\u002Frksm\u002F04be012be07671cd5e1dc6ec5b077e34#file-init-org-ai-el-L6).\n\nMy full speech enabled config then looks like:\n\n```elisp\n(use-package whisper\n  :load-path (lambda () (expand-file-name \"lisp\u002Fother-libs\u002Fwhisper.el\" user-emacs-directory))\n  :config\n  (setq whisper-model \"base\"\n        whisper-language \"en\"\n        whisper-translate nil)\n  (when *is-a-mac*\n    (rk\u002Fselect-default-audio-device \"Macbook Pro Microphone\")\n    (when rk\u002Fdefault-audio-device)\n    (setq whisper--ffmpeg-input-device (format \":%s\" rk\u002Fdefault-audio-device))))\n```\n\n###### macOS alternative: Siri dictation\n\nOn macOS, instead of whisper, you can also use the built-in Siri dictation. To enable that, go to `Preferences -> Keyboard -> Dictation`, enable it and set up a shortcut. The default is ctrl-ctrl.\n\n##### Windows specific steps\n\nThe way (defun whisper--check-install-and-run) is implemented does not work on Win10 (see https:\u002F\u002Fgithub.com\u002Frksm\u002Forg-ai\u002Fissues\u002F66).\n\nA workaround is to install whisper.cpp and model manually and patch:\n\n``` elisp\n(defun whisper--check-install-and-run (buffer status)\n  (whisper--record-audio))\n```\n\n#### espeak \u002F greader\n\nSpeech output on non-macOS systems defaults to using the [greader](http:\u002F\u002Felpa.gnu.org\u002Fpackages\u002Fgreader.html) package which uses [espeak](https:\u002F\u002Fespeak.sourceforge.net\u002F) underneath to synthesize speech. You will need to install greader manually (e.g. via `M-x package-install`). From that point on it should \"just work\". You can test it by selecting some text and calling `M-x org-ai-talk-read-region`.\n\n### Setting up Stable Diffusion\n\nAn API for Stable Diffusion can be hosted with the [stable-diffusion-webui](https:\u002F\u002Fgithub.com\u002FAUTOMATIC1111\u002Fstable-diffusion-webui) project. Go through the [install steps for your platform](https:\u002F\u002Fgithub.com\u002FAUTOMATIC1111\u002Fstable-diffusion-webui#installation-and-running), then start an API-only server:\n\n```sh\ncd path\u002Fto\u002Fstable-diffusion-webui\n.\u002Fwebui.sh --nowebui\n```\n\nThis will start a server on http:\u002F\u002F127.0.0.1:7861 by default. In order to use it with org-ai, you need to set `org-ai-sd-endpoint-base`:\n\n```elisp\n(setq org-ai-sd-endpoint-base \"http:\u002F\u002Flocalhost:7861\u002Fsdapi\u002Fv1\u002F\")\n```\n\nIf you use a server hosted elsewhere, change that URL accordingly.\n\n### Using local LLMs with oobabooga\u002Ftext-generation-webui\nSince version 0.4 org-ai supports local models served with [oobabooga\u002Ftext-generation-webui](https:\u002F\u002Fgithub.com\u002Foobabooga\u002Ftext-generation-webui). See the [installation instructions](https:\u002F\u002Fgithub.com\u002Foobabooga\u002Ftext-generation-webui#installation) to set it up for your system.\n\nHere is a setup walk-through that was tested on Ubuntu 22.04. It assumes [miniconda or Anaconda](https:\u002F\u002Fdocs.conda.io\u002Fprojects\u002Fconda\u002Fen\u002Fstable\u002Fuser-guide\u002Finstall\u002Fdownload.html#anaconda-or-miniconda) as well as [git-lfs](https:\u002F\u002Fgit-lfs.com\u002F) to be installed.\n\n#### Step 1: Setup conda env and install pytorch\n\n```sh\nconda create -n org-ai python=3.10.9\nconda activate org-ai\npip3 install torch torchvision torchaudio\n```\n\n#### Step 2: Install oobabooga\u002Ftext-generation-webui\n\n```sh\nmkdir -p ~\u002F.emacs.d\u002Forg-ai\u002F\ncd ~\u002F.emacs.d\u002Forg-ai\u002F\ngit clone https:\u002F\u002Fgithub.com\u002Foobabooga\u002Ftext-generation-webui\ncd text-generation-webui\npip install -r requirements.txt\n```\n\n#### Step 3: Install a language model\n\noobabooga\u002Ftext-generation-webui supports [a number of language models](https:\u002F\u002Fgithub.com\u002Foobabooga\u002Ftext-generation-webui#downloading-models). Normally, you would install them from [huggingface](https:\u002F\u002Fhuggingface.co\u002Fmodels?pipeline_tag=text-generation&sort=downloads). For example, to install the `CodeLlama-7b-Instruct` model:\n\n```sh\ncd ~\u002F.emacs.d\u002Forg-ai\u002Ftext-generation-webui\u002Fmodels\ngit clone git@hf.co:codellama\u002FCodeLlama-7b-Instruct-hf\n```\n\n#### Step 4: Start the API server\n\n```sh\ncd ~\u002F.emacs.d\u002Forg-ai\u002Ftext-generation-webui\nconda activate org-ai\npython server.py --api --model CodeLlama-7b-Instruct-hf\n```\n\nDepending on your hardware and the model used you might need to adjust the server parameters, e.g. use `--load-in-8bit` to reduce memory usage or `--cpu` if you don't have a suitable GPU.\n\nYou should now be able to use the local model with org-ai by adding the `:local` option to the `#+begin_ai` block:\n\n```\n#+begin_ai :local\nHello CodeLlama!\n#+end_ai\n```\n\n## FAQ\n\n### Is this OpenAI specfic?\nNo, OpenAI is the easiest to setup (you only need an API key) but you can use local models as well. See how to use Stable Diffusion and local LLMs with oobabooga\u002Ftext-generation-webui above. Anthropic Claude and perplexity.ai are also supported. Please open an issue or PR for other services you'd like to see supported. I can be slow to respond but will add support if there is enough interest.\n\n### Are there similar projects around?\n\nThe gptel package provides an alternative interface to the OpenAI ChatGPT API: https:\u002F\u002Fgithub.com\u002Fkarthink\u002Fgptel\n\n\n## Sponsoring\n\nIf you find this project useful please consider [sponsoring](https:\u002F\u002Fgithub.com\u002Fsponsors\u002Frksm). Thank you!\n","# org-ai [![MELPA](https:\u002F\u002Fmelpa.org\u002Fpackages\u002Forg-ai-badge.svg)](https:\u002F\u002Fmelpa.org\u002F#\u002Forg-ai)\n\n[![org-ai 视频](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frksm_org-ai_readme_d85a18e1c7a8.png)](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=fvBDxiFPG6I)\n\nEmacs org-mode 的一个次要模式，提供对生成式 AI 模型的访问。目前支持以下模型：\n- OpenAI API（ChatGPT、DALL-E 以及其他文本模型），可选择使用 Azure API 而非 OpenAI\n- 通过 [stable-diffusion-webui](https:\u002F\u002Fgithub.com\u002FAUTOMATIC1111\u002Fstable-diffusion-webui) 使用 Stable Diffusion\n\n在 org-mode 缓冲区中，您可以：\n- 使用 ChatGPT 生成文本，并完全控制系统和用户提示（[演示](#chatgpt-in-org-mode)）\n- 支持语音输入与输出！与您的 AI 对话！\n- 使用 Stable Diffusion 或 DALL-E 根据文本提示生成图像及图像变体（[演示 1](#dall-e-in-org-mode)，[演示 2](#image-variations)）\n- org-ai 无处不在：各种命令可在 org-mode 外部使用，基于选定文本或多个文件进行提示。\n\n_注意：要使用 OpenAI API，您需要拥有一个 [OpenAI 账户](https:\u002F\u002Fplatform.openai.com\u002F) 并获取 API 密钥。据我所知，免费套餐的当前使用限制已经足够满足大部分需求。_\n\n------------------------------\n\n## 目录\n\n- [演示](#demos)\n    - [ChatGPT 在 org-mode 中](#chatgpt-in-org-mode)\n    - [DALL-E 在 org-mode 中](#dall-e-in-org-mode)\n    - [图像变体](#image-variations)\n- [功能与使用](#features-and-usage)\n    - [`#+begin_ai...#+end_ai` 特殊块](#begin_aiend_ai-special-blocks)\n        - [ai 块中的语法高亮](#syntax-highlighting-in-ai-blocks)\n        - [完成后跳转到块末尾](#jump-to-the-end-of-the-block-after-completion)\n        - [插入时自动填充段落](#auto-fill-paragraphs-on-insertion)\n        - [块选项](#block-options)\n            - [适用于 ChatGPT](#for-chatgpt)\n            - [适用于 DALL-E](#for-dall-e)\n            - [其他文本模型](#other-text-models)\n    - [图像变体](#image-variation)\n    - [全局命令](#global-commands)\n        - [org-ai-on-project](#org-ai-on-project)\n    - [Noweb 支持](#noweb-support)\n- [安装](#installation)\n    - [Melpa](#melpa)\n    - [Straight.el](#straightel)\n    - [手动](#manual)\n    - [OpenAI API 密钥](#openai-api-key)\n        - [使用 OpenAI 以外的服务](#using-other-services-than-openai)\n            - [Azure](#azure)\n            - [perplexity.ai](#perplexityai)\n            - [Anthropic \u002F Claude](#anthropic--claude)\n    - [设置语音输入\u002F输出](#setting-up-speech-input--output)\n        - [Whisper](#whisper)\n            - [macOS 特定步骤](#macos-specific-steps)\n                - [macOS 替代方案：Siri 听写](#macos-alternative-siri-dictation)\n            - [Windows 特定步骤](#windows-specific-steps)\n        - [espeak \u002F greader](#espeak--greader)\n    - [设置 Stable Diffusion](#setting-up-stable-diffusion)\n    - [使用本地 LLM 与 oobabooga\u002Ftext-generation-webui](#using-local-llms-with-oobaboogatext-generation-webui)\n- [常见问题](#faq)\n- [赞助](#sponsoring)\n\n## 演示\n\n### ChatGPT 在 org-mode 中\n\n```org\n#+begin_ai\nEmacs 是最棒的编辑器吗？\n#+end_ai\n```\n\n![chat-gpt 在 org-mode 中](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frksm_org-ai_readme_b257165d92ca.gif)\n\n您可以继续输入并按 `C-c C-c` 创建对话。`C-g` 可中断正在运行的请求。\n\n\n### DALL-E 在 org-mode 中\n\n使用 `:image` 关键字生成图像。默认情况下使用 DALL·E-3。\n\n```org\n#+begin_ai :image :size 1024x1024\n超逼真的科幻渲染，展示一台超级复杂的科技机器。\n#+end_ai\n```\n\n![dall-e 在 org-mode 中](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frksm_org-ai_readme_019876e0e636.gif)\n\n您可以使用以下关键字控制图像生成：\n- `:size \u003Cwidth>x\u003Cheight>` - 生成图像的尺寸（默认：1024x1024）\n- `:model \u003Cmodel>` - 使用的模型（默认：“dall-e-3”）\n- `:quality \u003Cquality>` - 图像质量（可选：`hd`、`standard`）\n- `:style \u003Cstyle>` - 使用的风格（可选：`vivid`、`natural`）\n- `:n \u003Ccount>` - 生成的图像数量（默认：1）\n\n（有关这些设置的更多信息，请参阅 [这篇 OpenAI 博客文章](https:\u002F\u002Fcookbook.openai.com\u002Farticles\u002Fwhat_is_new_with_dalle_3)。\n\n您可以通过 `customize-variable` 或在配置中设置这些变量的默认值来自定义它们：\n\n```elisp\n(setq org-ai-image-model \"dall-e-3\")\n(setq org-ai-image-default-size \"1792x1024\")\n(setq org-ai-image-default-count 2)\n(setq org-ai-image-default-style 'vivid)\n(setq org-ai-image-default-quality 'hd)\n(setq org-ai-image-directory (expand-file-name \"org-ai-images\u002F\" org-directory))\n```\n\n\n### 图像变体\n\n![dall-e 图像生成在 org-mode 中](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frksm_org-ai_readme_5f850f83616b.gif)\n\n\n\n## 功能与使用\n### `#+begin_ai...#+end_ai` 特殊块\n\n类似于 org-babel，这些块用于标记 AI 模型的输入（对于 ChatGPT 还包括输出）。您可以将其用于 AI 聊天、文本补全以及文本转图像。更多相关信息请参见下方的 [块选项](#block-options)。\n\n创建一个类似如下的块：\n\n```org\n#+begin_ai\nEmacs 是最棒的编辑器吗？\n#+end_ai\n```\n\n然后按 `C-c C-c`。聊天输入会以行内方式出现，一旦回复完成，您可以继续输入回复，依此类推。请参见下方的 [演示](#chatgpt-in-org-mode)。当 AI 请求正在运行时，您可以按 `C-g` 取消它。\n\n您还可以修改使用的 _系统_ 提示及其他参数。系统提示会在用户输入之前注入，从而“引导”模型以特定风格作答。例如，您可以这样做：\n\n```org\n#+begin_ai :max-tokens 250\n[SYS]: 假装自己是一位强大的中世纪国王。\n[ME]: 今天您要吃什么呢？\n#+end_ai\n```\n\n这将生成如下 API 负载：\n\n```json\n{\n  \"messages\": [\n    {\n      \"role\": \"system\",\n      \"content\": \"假装自己是一位强大的中世纪国王。\"\n    },\n    {\n      \"role\": \"user\",\n      \"content\": \"今天您要吃什么呢？\"\n    }\n  ],\n  \"model\": \"gpt-4o-mini\",\n  \"stream\": true,\n  \"max_tokens\": 250,\n  \"temperature\": 1.2\n}\n```\n\n有关一些提示创意，可以参考例如 [Awesome ChatGPT Prompts](https:\u002F\u002Fgithub.com\u002Ff\u002Fawesome-chatgpt-prompts)。\n\n使用 `:image` 标志生成图像时，图像会以内联方式显示在 ai 块下方。图像及其提示将存储在 `org-ai-image-directory` 中，默认路径为 `~\u002Forg\u002Forg-ai-images\u002F`。\n\n您也可以使用语音输入来转录输入内容。按 `C-c r` 以启动 `org-ai-talk-capture-in-org` 录音。请注意，这需要您先设置 [语音识别](#setting-up-speech-input--output)（见下文）。语音输出可通过 `org-ai-talk-output-enable` 开启。\n\n在 `#+begin_ai...#+end_ai` 内，您可以使用以下命令修改和选择聊天部分：\n- 按 `C-c \u003Cbackspace>`（`org-ai-kill-region-at-point`）删除光标下的聊天部分。\n- `org-ai-mark-region-at-point` 将标记光标处的区域。\n- `org-ai-mark-last-region` 将标记上一次的聊天部分。\n\n#### ai 块中的语法高亮\n\n要为你的 `#+begin_ai ...` 块应用语法高亮，只需在 `_ai` 后面加上一种语言的主要模式名称。例如：`#+begin_ai markdown`。对于 Markdown 特别是，若还想正确高亮反引号中的代码，可以设置 `(setq markdown-fontify-code-blocks-natively t)`。请确保你也已安装了 [markdown-mode 包](https:\u002F\u002Fmelpa.org\u002F#\u002Fmarkdown-mode)。感谢 @tavisrudd 提供的这个小技巧！\n\n#### 完成后跳转到块的末尾\n默认情况下启用此行为，以便交互更像聊天。但当输出较长且缓冲区在你阅读时自动滚动时，这可能会让人感到烦扰。因此，你可以通过以下方式禁用它：\n\n```elisp\n(setq org-ai-jump-to-end-of-block nil)\n```\n\n#### 插入时自动填充段落\n将 `(setq org-ai-auto-fill t)` 设置为“填充”（根据 `fill-column` 自动换行）插入的文本。基本上就像 `auto-fill-mode`，只不过针对的是 AI。\n\n#### 块选项\n`#+begin_ai...#+end_ai` 块可以接受以下选项。\n\n##### 针对 ChatGPT\n默认情况下，AI 块的内容会被解释为 ChatGPT 的消息。紧跟 `[ME]:` 的文本被视为用户输入，而紧跟 `[AI]:` 的文本则被视为模型的回复。你也可以选择在块开头添加一个 `[SYS]: \u003C行为>` 输入来初始化模型（参见下文的 `org-ai-default-chat-system-prompt`）。\n\n- `:max-tokens number` - 最大生成的 token 数量（默认：nil，使用 OpenAI 的默认值）\n- `:temperature number` - 模型的温度（默认：1）\n- `:top-p number` - 模型的 top_p 参数（默认：1）\n- `:frequency-penalty number` - 模型的频率惩罚（默认：0）\n- `:presence-penalty` - 模型的存在惩罚（默认：0）\n- `:sys-everywhere` - 将系统提示重复应用于每条用户消息（默认：nil）\n\n如果你有许多关于同一主题和设置（系统提示、温度等）的不同对话线程，并且不想每次都重复所有选项，可以在 Org 文件范围内设置属性，或创建一个带有属性抽屉的 Org 标题，这样该标题下的所有 `#+begin_ai...#+end_ai` 块都会继承这些设置。\n\n示例：\n```org\n* Emacs（多个关于 Emacs 的对话延续在这个子树中）\n:PROPERTIES:\n:SYS: 你是 Emacs 专家。你可以通过回答我的问题来帮助我。你也可以向我提问以澄清我的意图。\n:temperature: 0.5\n:model: gpt-4o-mini\n:END:\n\n** 通过 elisp 进行 Web 编程\n#+begin_ai\n如何调用 REST API 并解析其 JSON 响应？\n#+end_ai\n\n** 其他 Emacs 任务\n#+begin_ai...#+end_ai\n\n* Python（多个关于 Python 的对话延续在这个子树中）\n:PROPERTIES:\n:SYS: 你是 Python 程序员。请以详细的分步说明和代码来回答任务。\n:temperature: 0.1\n:model: gpt-4\n:END:\n\n** 学习 QUIC\n#+begin_ai\n如何搭建支持 http3 的 Web 服务器？\n#+end_ai\n\n** 其他 Python 任务\n#+begin_ai...#+end_ai\n```\n\n以下自定义变量可用于配置聊天：\n\n- `org-ai-default-chat-model`（默认：“gpt-4o-mini”）\n- `org-ai-default-max-tokens`——响应应有多长。目前不能超过 4096。如果这个值太小，答案可能会被截断（默认：nil）\n- `org-ai-default-chat-system-prompt`——如何“初始化”模型。这是一个在用户输入前注入的提示。（默认：“你是 Emacs 内部的贴心助手。”）\n- `org-ai-default-inject-sys-prompt-for-all-messages`——是否在每条用户消息前重复系统提示。有时模型会“忘记”它是如何被初始化的，这有助于提醒它。（默认：`nil`）\n\n##### 针对 DALL-E\n当你在 AI 块中添加 `:image` 选项时，提示将用于图像生成。\n\n- `:image`——生成图像而非文本\n- `:size`——生成图像的尺寸（默认：256x256，也可为 512x512 或 1024x1024）\n- `:n`——生成的图像数量（默认：1）\n\n以下自定义变量可用于配置图像生成：\n- `org-ai-image-directory`——存放生成图像的目录（默认：`~\u002Forg\u002Forg-ai-images\u002F`）\n\n##### 针对 Stable Diffusion\n与 DALL-E 类似，但使用\n\n```\n#+begin_ai :sd-image\n\u003CPROMPT>\n#+end_ai\n```\n\n你可以通过在 Org 模式图像上标注 #+name，并在 Org-AI 块中引用 :image-ref 来运行 img2img。\n\n```\n#+begin_ai :sd-image :image-ref label1\n森林，梵高风格\n#+end_ai\n```\n\nM-x org-ai-sd-clip 会通过 CLIP 查询器猜测 Org 模式中前一张图像的提示，并将其保存到剪贴板中。\n\nM-x org-ai-sd-deepdanbooru 则通过 DeepDanbooru 查询器猜测 Org 模式中前一张图像的提示，并将其保存到剪贴板中。\n\n##### 针对本地模型\n要从由 [oobabooga\u002Ftext-generation-webui](https:\u002F\u002Fgithub.com\u002Foobabooga\u002Ftext-generation-webui) 提供服务的本地模型请求补全，需按照下面介绍的步骤进行设置[见#使用 oobabooga text-generation-webui 的本地 LLM]。\n\n然后启动一个 API 服务器：\n\n``` sh\ncd ~\u002F.emacs.d\u002Forg-ai\u002Ftext-generation-webui\nconda activate org-ai\npython server.py --api --model SOME-MODEL\n```\n\n当你在 Org-AI 块中添加 `:local` 键并使用 `C-c C-c` 请求补全时，该块将被发送到本地 API 服务器，而不是 OpenAI API。例如：\n\n```\n#+begin_ai :local\n...\n#+end_ai\n```\n\n这会向 `org-ai-oobabooga-websocket-url` 发送请求，并将响应流式传输到 Org 缓冲区。\n\n##### 其他文本模型\n较旧的补全模型也可以通过在 AI 块中添加 `:completion` 选项来触发。\n\n- `:completion`——不使用 ChatGPT 模型，而是使用补全模型\n- `:model`——使用哪个模型，可参见 https:\u002F\u002Fplatform.openai.com\u002Fdocs\u002Fmodels 查看模型列表\n\n有关这些参数的详细含义，请参阅 [OpenAI API 文档](https:\u002F\u002Fplatform.openai.com\u002Fdocs\u002Fapi-reference\u002Fchat)。\n\n以下自定义变量可用于配置文本生成：\n\n- `org-ai-default-completion-model`（默认：“text-davinci-003”）\n\n### 图像变体\n\n您也可以使用一张现有图像作为输入，以生成更多外观相似的图像。`org-ai-image-variation` 命令会提示您输入一张图像的文件路径、指定图像的尺寸与数量，随后生成相应数量的图像，并在当前 `org-mode` 缓冲区中插入指向这些图像的链接。生成的图像将存储在 `org-ai-image-directory` 目录下。请参阅下方的[演示](#image-variations)。\n\n[有关更多信息，请参阅 OpenAI 文档](https:\u002F\u002Fplatform.openai.com\u002Fdocs\u002Fguides\u002Fimages\u002Fvariations)。输入图像必须为正方形，且文件大小不得超过 4MB。此外，目前需要在命令行工具中安装 `curl`[^1]。\n\n[^1]: __注：__ 当前的图像变体实现要求系统中已安装命令行版 `curl`。原因是 OpenAI API 需要发送 multipart\u002Fform-data 格式的请求，而 Emacs 自带的 `url-retrieve` 函数并不支持这种格式（至少我尚未找到解决办法）。改用 `request.el` 或许是更好的替代方案。如果您有意参与贡献，欢迎提交 Pull Request！\n\n### 全局命令\n\n`org-ai` 也可以在 `org-mode` 缓冲区之外使用。启用 `org-ai-global-mode` 后，前缀键 `C-c M-a` 将绑定到一系列命令：\n\n| 命令                          | 键绑定  | 描述                                                                                                                                                                                                                                                                                                                                                    |\n|:---------------------------------|:------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n| `org-ai-on-region`               | `C-c M-a r` | 对选中文本提问或指示 AI 对其执行某项操作。响应结果将在一个 org-mode 缓冲区中打开，以便您继续对话。通过设置变量 `org-ai-on-region-file`（例如 `(setq org-ai-on-region-file (expand-file-name \"org-ai-on-region.org\" org-directory))`），可将该缓冲区与特定文件关联。 |\n| `org-ai-summarize`               | `C-c M-a s` | 概括选中文本内容。                                                                                                                                                                                                                                                                                                                                   |\n| `org-ai-refactor-code`           | `C-c M-a c` | 指示 AI 如何修改选中的代码，随后将弹出一个差异缓冲区显示修改内容。                                                                                                                                                                                                                                                                       |\n| `org-ai-on-project`              | `C-c M-a p` | 批量运行提示并同时修改\u002F重构多个文件。若已安装 [projectile](https:\u002F\u002Fgithub.com\u002Fbbatsov\u002Fprojectile)，则优先使用；否则回退到当前目录。                                                                                                                                                                               |\n| `org-ai-prompt`                  | `C-c M-a P` | 提示用户输入一段文本，然后在当前缓冲区中打印 AI 的回复。                                                                                                                                                                                                                                                                                 |\n| `org-ai-switch-chat-model`       | `C-c M-a m` | 交互式地切换 `org-ai-default-chat-model`。                                                                                                                                                                                                                                                                                                               |\n| `org-ai-open-account-usage-page` | `C-c M-a $` | 打开 https:\u002F\u002Fplatform.openai.com\u002Faccount\u002Fusage 页面，查看您的费用消耗情况。                                                                                                                                                                                                                                                                         |\n| `org-ai-open-request-buffer`     | `C-c M-a !` | 打开 `url` 请求缓冲区。如果某些功能无法正常工作，查看该缓冲区可能会有所帮助。                                                                                                                                                                                                                                                                    |\n| `org-ai-talk-input-toggle`       | `C-c M-a t` | 通常为各类提示命令启用语音输入功能。                                                                                                                                                                                                                                                                                               |\n| `org-ai-talk-output-toggle`      | `C-c M-a T` | 通常启用语音输出功能。                                                                                                                                                                                                                                                                                                                                |\n\n#### org-ai-on-project\n\n使用 `org-ai-on-project` 缓冲区，您可以在项目中的文件上运行命令，或者仅对这些文件中的选中文本执行操作。例如，您可以选中某个项目的自述文件并询问“它到底是关于什么的？”或让 AI 解释一段代码。您还可以请求对代码进行修改，AI 将生成相应的差异补丁。如果您认识有人认为只有启用了 Copilot 的 VS Code 才能做到这一点，请把他们引到这里。\n\n运行 `org-ai-on-project` 命令后，会打开一个独立的缓冲区，允许您选择多个文件（并可选地在文件内选择一个子区域），然后对其运行提示。\n\n![org-ai-on-project](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frksm_org-ai_readme_34bb2f1b63f5.png)\n\n如果取消勾选“修改代码”，效果类似于运行 `org-ai-on-region`，只是所有文件内容都会出现在提示中。\n\n当启用“修改代码”时，您可以要求 AI 修改或重构代码。默认情况下（“请求差异”未启用），我们会提示 AI 为所有选定的文件\u002F区域生成新代码，随后您可以逐个查看差异并决定是否应用。而当“请求差异”启用时，AI 将直接生成一个统一的差异补丁，您可以直接应用该补丁。\n\n### Noweb 支持\n\n给定一个具名源代码块：\n```\n#+name: sayhi\n#+begin_src shell\necho \"Hello there\"\n#+end_src\n```\n我们可以尝试按名称引用它，但行不通。\n```\n#+begin_ai\n[SYS]: 你是一个模仿者。每当我讲一句话时，你就原封不动地重复我说的话。一字不差地照我说的说，不要添加任何内容。\n\n[ME]: \u003C\u003Csayhi()>>\n\n\n[AI]: \u003C\u003Csayhi()>>\n\n[ME]:\n#+end_ai\n```\n在启用 `:noweb yes` 的情况下：\n```\n#+begin_ai :noweb yes\n[SYS]: 你是一个模仿者。每当我讲一句话时，你就原封不动地重复我说的话。一字不差地照我说的说，不要添加任何内容。\n\n[ME]: \u003C\u003Csayhi()>>\n\n\n[AI]: Hello there.\n\n[ME]:\n#+end_ai\n```\n\n此外，你还可以通过在父级标题中的任意位置设置 `org-ai-noweb: yes` 标题属性来触发 Noweb 展开。\n要查看你的文档在发送到 API 时将展开为何种形式，运行 `org-ai-expand-block`。\n\n#### 运行任意 Lisp 内联代码\n\n这虽然算个“小技巧”，但效果非常好。\n\n创建一个代码块：\n```\n#+name: identity\n#+begin_src emacs-lisp :var x=\"fill me in\"\n(format \"%s\" x)\n#+end_src\n```\n\n我们可以调用它，并让 Noweb 参数（支持 Lisp）作为代码进行求值：\n```\n#+begin_ai :noweb yes\n告诉我改进这个 Dockerfile 的三种简单方法\n\n\u003C\u003Cidentity(x=(quelpa-slurp-file \"~\u002Fcode\u002Fibr-api\u002FDockerfile\"))>>\n\n\n\n[AI]: 1. 使用更具体的 Python 版本，比如“python:3.9.6-buster”而不是“python:3.9-buster”，以确保与未来更新的兼容性。\n\n2. 在安装 Poetry 后添加清理步骤，删除所有不必要的文件或依赖项，从而减小最终镜像的大小。\n\n3. 使用多阶段构建，将构建环境与生产环境分离，这样既能减小最终镜像的大小，又能提高安全性。例如，第一阶段可以用来安装依赖并构建代码，而第二阶段只包含最终产物，用于部署。\n\n[ME]:\n#+end_ai\n```\n\n\n## 安装\n\n### Melpa\n\norg-ai 已经上架 Melpa：https:\u002F\u002Fmelpa.org\u002F#\u002Forg-ai。如果你已经将 Melpa 添加到你的包仓库中，方法如下：\n```elisp\n(require 'package)\n(add-to-list 'package-archives '(\"melpa\" . \"http:\u002F\u002Fmelpa.org\u002Fpackages\u002F\") t)\n(package-initialize)\n```\n那么你可以通过以下命令安装：\n```elisp\n(use-package org-ai\n  :ensure t\n  :commands (org-ai-mode\n             org-ai-global-mode)\n  :init\n  (add-hook 'org-mode-hook #'org-ai-mode) ; 在 Org 模式下启用 org-ai\n  (org-ai-global-mode) ; 安装全局快捷键 C-c M-a\n  :config\n  (setq org-ai-default-chat-model \"gpt-4\") ; 如果你使用的是 GPT-4 测试版：\n  (org-ai-install-yasnippets)) ; 如果你使用 yasnippet 并希望获得 `ai` 提示词\n```\n\n### Straight.el\n\n```elisp\n(straight-use-package\n '(org-ai :type git :host github :repo \"rksm\u002Forg-ai\"\n          :local-repo \"org-ai\"\n          :files (\"*.el\" \"README.md\" \"snippets\")))\n```\n\n### 手动安装\n\n克隆此仓库：\n```sh\ngit clone\nhttps:\u002F\u002Fgithub.com\u002Frksm\u002Forg-ai\n```\n\n然后，如果你使用 `use-package`：\n```elisp\n(use-package org-ai\n  :ensure t\n  :load-path (lambda () \"path\u002Fto\u002Forg-ai\"))\n  ;; …其余部分同上…\n\n```\n\n或者直接用 `require`：\n```elisp\n(package-install 'websocket)\n(add-to-list 'load-path \"path\u002Fto\u002Forg-ai\")\n(require 'org)\n(require 'org-ai)\n(add-hook 'org-mode-hook #'org-ai-mode)\n(org-ai-global-mode)\n(setq org-ai-default-chat-model \"gpt-4\") ; 如果你使用的是 GPT-4 测试版：\n(org-ai-install-yasnippets) ; 如果你使用 yasnippet 并希望获得 `ai` 提示词\n```\n\n### OpenAI API 密钥\n\n你可以直接在配置中设置你的 API 密钥：\n```elisp\n(setq org-ai-openai-api-token \"\u003CENTER YOUR API TOKEN HERE>\")\n\n```\n\n或者，org-ai 支持通过 `auth-source` 获取你的 API 密钥。你可以在 `~\u002Fauthinfo.gpg` 文件中以如下格式存储密钥：\n```\nmachine api.openai.com login org-ai password \u003Cyour-api-key>\n```\n如果该文件存在，org-ai 就会在发起请求时自动使用这一机制获取密钥。如果你不希望 org-ai 从 `auth-source` 获取密钥，可以在加载 org-ai 之前将 `org-ai-use-auth-source` 设置为 `nil`。\n\n#### 使用 OpenAI 以外的服务\n\n##### Azure\n\n你可以通过自定义这些变量切换到 Azure，既可以通过交互式方式使用 `M-x customize-variable`，也可以直接在配置中添加：\n```elisp\n(setq org-ai-service 'azure-openai\n      org-ai-azure-openai-api-base \"https:\u002F\u002Fyour-instance.openai.azure.com\"\n      org-ai-azure-openai-deployment \"azure-openai-deployment-name\"\n      org-ai-azure-openai-api-version \"2023-07-01-preview\")\n```\n要存储 API 凭证，按照上述 authinfo 的说明操作，只是机器名改为 `org-ai-azure-openai-api-base`。\n\n##### perplexity.ai\n\n可用模型列表请参见 [perplexity.ai 文档](https:\u002F\u002Fdocs.perplexity.ai\u002Fdocs\u002Fmodel-cards)。\n\n你可以在配置中切换默认服务：\n```elisp\n(setq org-ai-service 'perplexity.ai)\n(setq org-ai-default-chat-model \"llama-3-sonar-large-32k-online\")\n```\n或者按每个代码块单独设置：\n```org\n#+begin_ai :service perplexity.ai :model llama-3-sonar-large-32k-online\n[ME]: 告诉我一些关于 Emacs 的有趣事实。\n#+end_ai\n```\n认证方面，在 `authinfo.gpg` 中添加类似条目 `machine api.perplexity.ai login org-ai password pplx-***`，或者直接设置 `org-ai-openai-api-token`。\n\n__注意：__ 目前 perplexity.ai 的 API 不支持返回参考文献\u002F链接，因此 Emacs 无法显示参考信息。他们正在开展一项测试计划，希望能尽快全面开放这项功能。\n\n##### Anthropic \u002F Claude\n\n与上述类似。例如：\n```org\n#+begin_ai :service anthropic :model claude-3-opus-20240229\n[ME]: 告诉我一些关于 Emacs 的有趣事实。\n#+end_ai\n```\nAnthropic 的模型请参见 [Anthropic 文档](https:\u002F\u002Fdocs.anthropic.com\u002Fclaude\u002Fdocs\u002Fmodels-overview)。\n目前只有一个 API 版本可通过 `org-ai-anthropic-api-version` 设置。如果有其他版本推出，可在 [Anthropic 文档](https:\u002F\u002Fdocs.anthropic.com\u002Fclaude\u002Freference\u002Fversions) 中找到。\n\n至于 API 密钥，可在 `authinfo.gpg` 中添加类似条目 `machine api.anthropic.com login org-ai password sk-ant-***`。\n\n### 设置语音输入\u002F输出\n\n#### Whisper\n\n这些设置步骤是可选的。如果您不想使用语音输入\u002F输出，可以跳过本节。\n\n_注：我的 org-ai 个人配置可以在[这个 gist](https:\u002F\u002Fgist.github.com\u002Frksm\u002F04be012be07671cd5e1dc6ec5b077e34)中找到。其中包含一个可用的 Whisper 设置。_\n\n此方法已在 macOS 和 Linux 上测试通过。如果有 Windows 用户，请帮忙测试并告知我需要做哪些调整才能使其正常工作（谢谢！）。\n\n语音输入使用了 [whisper.el](https:\u002F\u002Fgithub.com\u002Fnatrys\u002Fwhisper.el) 和 `ffmpeg`。您需要直接克隆该仓库，或使用 [straight.el](https:\u002F\u002Fgithub.com\u002Fradian-software\u002Fstraight.el) 来安装它。\n\n1. 安装 ffmpeg（例如，在 macOS 上运行 `brew install ffmpeg`），或在 Linux 上运行 `sudo apt install ffmpeg`。\n2. 克隆 whisper.el：`git clone https:\u002F\u002Fgithub.com\u002Fnatrys\u002Fwhisper.el path\u002Fto\u002Fwhisper.el`\n\n现在您应该能够在 Emacs 中加载它：\n\n```elisp\n(use-package whisper\n  :load-path \"path\u002Fto\u002Fwhisper.el\"\n  :bind (\"M-s-r\" . whisper-run))\n```\n\n然后还要加载：\n\n```elisp\n(use-package greader :ensure)\n(require 'whisper)\n(require 'org-ai-talk)\n\n;; macOS 语音设置，可选\n(setq org-ai-talk-say-words-per-minute 210)\n(setq org-ai-talk-say-voice \"Karen\")\n```\n\n##### macOS 特定步骤\n\n在 macOS 上，您还需要完成两项额外操作：\n1. 允许 Emacs 录制音频\n2. 告诉 whisper.el 使用哪个麦克风\n\n###### 1. 允许 Emacs 录制音频\n您可以使用 [tccutil 工具](https:\u002F\u002Fgithub.com\u002FDocSystem\u002Ftccutil)：\n\n```sh\ngit clone https:\u002F\u002Fgithub.com\u002FDocSystem\u002Ftccutil\ncd tccutil\nsudo python .\u002Ftccutil.py -p \u002FApplications\u002FEmacs.app -e --microphone\n```\n\n现在，如果您在 Emacs 的 shell 中运行 `ffmpeg -f avfoundation -i :0 output.mp3`，就不应该再出现 `abort trap: 6` 错误了。\n\n（作为 tccutil.py 的替代方案，也可以参考[这个 issue](https:\u002F\u002Fgithub.com\u002Frksm\u002Forg-ai\u002Fissues\u002F86)中提到的方法。）\n\n###### 2. 告诉 whisper.el 使用哪个麦克风\n\n您可以使用 `ffmpeg -f avfoundation -list_devices true -i \"\"` 的输出来列出音频输入设备，然后告诉 whisper.el 使用哪一个：`(setq whisper--ffmpeg-input-device \":0\")`。`:0` 是麦克风的索引，如果想使用其他设备，可以查看上述命令的输出。\n\n我还创建了一个 Emacs 辅助函数，可以让您以交互方式选择麦克风。请参见[这个 gist](https:\u002F\u002Fgist.github.com\u002Frksm\u002F04be012be07671cd5e1dc6ec5b077e34#file-init-org-ai-el-L6)。\n\n我的完整语音启用配置如下：\n\n```elisp\n(use-package whisper\n  :load-path (lambda () (expand-file-name \"lisp\u002Fother-libs\u002Fwhisper.el\" user-emacs-directory))\n  :config\n  (setq whisper-model \"base\"\n        whisper-language \"en\"\n        whisper-translate nil)\n  (when *is-a-mac*\n    (rk\u002Fselect-default-audio-device \"Macbook Pro Microphone\")\n    (when rk\u002Fdefault-audio-device)\n    (setq whisper--ffmpeg-input-device (format \":%s\" rk\u002Fdefault-audio-device))))\n```\n\n###### macOS 替代方案：Siri 听写\n\n在 macOS 上，除了 Whisper，您还可以使用内置的 Siri 听写功能。要启用它，前往“系统偏好设置 -> 键盘 -> 听写”，开启该功能并设置快捷键。默认是 ctrl-ctrl。\n\n##### Windows 特定步骤\n\n目前实现的 `(defun whisper--check-install-and-run)` 方法在 Win10 上无法正常工作（详见 https:\u002F\u002Fgithub.com\u002Frksm\u002Forg-ai\u002Fissues\u002F66）。\n\n一种解决办法是手动安装 whisper.cpp 和模型，并进行修补：\n\n```elisp\n(defun whisper--check-install-and-run (buffer status)\n  (whisper--record-audio))\n```\n\n#### espeak \u002F greader\n\n非 macOS 系统上的语音输出默认使用 [greader](http:\u002F\u002Felpa.gnu.org\u002Fpackages\u002Fgreader.html) 包，该包底层使用 [espeak](https:\u002F\u002Fespeak.sourceforge.net\u002F) 进行语音合成。您需要手动安装 greader（例如通过 `M-x package-install`）。此后应该就能“直接使用”。您可以通过选中文本并调用 `M-x org-ai-talk-read-region` 来测试。\n\n### 设置 Stable Diffusion\n\nStable Diffusion 的 API 可以通过 [stable-diffusion-webui](https:\u002F\u002Fgithub.com\u002FAUTOMATIC1111\u002Fstable-diffusion-webui) 项目来托管。按照[适用于您平台的安装步骤](https:\u002F\u002Fgithub.com\u002FAUTOMATIC1111\u002Fstable-diffusion-webui#installation-and-running)，然后启动一个仅 API 的服务器：\n\n```sh\ncd path\u002Fto\u002Fstable-diffusion-webui\n.\u002Fwebui.sh --nowebui\n```\n\n这将默认在 http:\u002F\u002F127.0.0.1:7861 上启动一个服务器。为了在 org-ai 中使用它，您需要设置 `org-ai-sd-endpoint-base`：\n\n```elisp\n(setq org-ai-sd-endpoint-base \"http:\u002F\u002Flocalhost:7861\u002Fsdapi\u002Fv1\u002F\")\n```\n\n如果您使用的是其他地方托管的服务器，请相应地修改该 URL。\n\n### 使用本地 LLM 与 oobabooga\u002Ftext-generation-webui\n自版本 0.4 起，org-ai 支持使用 [oobabooga\u002Ftext-generation-webui](https:\u002F\u002Fgithub.com\u002Foobabooga\u002Ftext-generation-webui) 提供的本地模型。请参阅[安装说明](https:\u002F\u002Fgithub.com\u002Foobabooga\u002Ftext-generation-webui#installation)以根据您的系统进行设置。\n\n以下是在 Ubuntu 22.04 上测试过的设置流程。该流程假定已安装 [miniconda 或 Anaconda](https:\u002F\u002Fdocs.conda.io\u002Fprojects\u002Fconda\u002Fen\u002Fstable\u002Fuser-guide\u002Finstall\u002Fdownload.html#anaconda-or-miniconda) 以及 [git-lfs](https:\u002F\u002Fgit-lfs.com\u002F)。\n\n#### 第一步：设置 conda 环境并安装 pytorch\n\n```sh\nconda create -n org-ai python=3.10.9\nconda activate org-ai\npip3 install torch torchvision torchaudio\n```\n\n#### 第二步：安装 oobabooga\u002Ftext-generation-webui\n\n```sh\nmkdir -p ~\u002F.emacs.d\u002Forg-ai\u002F\ncd ~\u002F.emacs.d\u002Forg-ai\u002F\ngit clone https:\u002F\u002Fgithub.com\u002Foobabooga\u002Ftext-generation-webui\ncd text-generation-webui\npip install -r requirements.txt\n```\n\n#### 第三步：安装语言模型\n\noobabooga\u002Ftext-generation-webui 支持[多种语言模型](https:\u002F\u002Fgithub.com\u002Foobabooga\u002Ftext-generation-webui#downloading-models)。通常情况下，您会从[Hugging Face](https:\u002F\u002Fhuggingface.co\u002Fmodels?pipeline_tag=text-generation&sort=downloads)下载它们。例如，要安装 `CodeLlama-7b-Instruct` 模型：\n\n```sh\ncd ~\u002F.emacs.d\u002Forg-ai\u002Ftext-generation-webui\u002Fmodels\ngit clone git@hf.co:codellama\u002FCodeLlama-7b-Instruct-hf\n```\n\n#### 第四步：启动 API 服务器\n\n```sh\ncd ~\u002F.emacs.d\u002Forg-ai\u002Ftext-generation-webui\nconda activate org-ai\npython server.py --api --model CodeLlama-7b-Instruct-hf\n```\n\n根据您的硬件和所使用的模型，您可能需要调整服务器参数，例如使用 `--load-in-8bit` 来减少内存占用，或者在没有合适 GPU 的情况下使用 `--cpu`。\n\n现在您应该能够通过在 `#+begin_ai` 块中添加 `:local` 选项，来在 org-ai 中使用本地模型：\n\n```\n#+begin_ai :local\nHello CodeLlama!\n#+end_ai\n```\n\n## 常见问题\n\n### 这是 OpenAI 特有的吗？\n不是，OpenAI 是最容易搭建的（你只需要一个 API 密钥），但你也可以使用本地模型。请参阅上文关于如何使用 Stable Diffusion 和本地 LLM 与 oobabooga\u002Ftext-generation-webui 的说明。Anthropic 的 Claude 和 perplexity.ai 也受支持。如果你希望支持其他服务，请提交一个问题或拉取请求。我响应可能会慢一些，但如果兴趣足够多，我会添加相应支持。\n\n### 周围是否有类似的项目？\n\ngptel 包为 OpenAI 的 ChatGPT API 提供了一个替代接口：https:\u002F\u002Fgithub.com\u002Fkarthink\u002Fgptel\n\n\n## 赞助\n\n如果你觉得这个项目有用，请考虑 [赞助](https:\u002F\u002Fgithub.com\u002Fsponsors\u002Frksm)。谢谢！","# org-ai 快速上手指南\n\n## 环境准备\n\n### 系统要求\n- Emacs（推荐使用最新版本）\n- 一个支持 Org-mode 的 Emacs 配置\n- 网络连接（用于访问 OpenAI API 或 Stable Diffusion WebUI）\n\n### 前置依赖\n- 安装好 `org-mode` 和 `org-babel`\n- 安装 `org-ai` 所需的依赖包，如 `markdown-mode`（用于语法高亮）\n- 如果使用语音输入\u002F输出功能，需要安装：\n  - Whisper（语音识别）\n  - espeak \u002F greader（语音合成）\n- 如果使用 Stable Diffusion，需要运行 [stable-diffusion-webui](https:\u002F\u002Fgithub.com\u002FAUTOMATIC1111\u002Fstable-diffusion-webui)\n\n> 📌 提示：如果在国内，建议使用国内镜像源加速安装过程。\n\n---\n\n## 安装步骤\n\n### 使用 MELPA 安装（推荐）\n\n在 Emacs 中执行以下命令：\n\n```elisp\nM-x package-install RET org-ai RET\n```\n\n### 使用 straight.el 安装\n\n将以下代码添加到你的 Emacs 配置中：\n\n```elisp\n(straight-use-package '(org-ai :type git :host github :repo \"tj\u002Forg-ai\"))\n```\n\n然后执行：\n\n```elisp\nM-x straight-fetch RET\nM-x straight-build RET\n```\n\n### 手动安装\n\n从 GitHub 克隆项目：\n\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Ftj\u002Forg-ai.git ~\u002F.emacs.d\u002Forg-ai\n```\n\n然后将以下代码添加到你的 Emacs 配置文件中：\n\n```elisp\n(add-to-list 'load-path \"~\u002F.emacs.d\u002Forg-ai\")\n(require 'org-ai)\n```\n\n### 设置 OpenAI API 密钥\n\n获取 OpenAI 账户并创建 API Key：[OpenAI 平台](https:\u002F\u002Fplatform.openai.com\u002F)\n\n在 Emacs 配置中设置 API Key：\n\n```elisp\n(setq org-ai-openai-api-key \"your_openai_api_key_here\")\n```\n\n> 💡 可选：如果你希望使用 Azure、perplexity.ai 或 Anthropic \u002F Claude，可以参考 README 中的相关配置说明。\n\n### 设置语音输入\u002F输出（可选）\n\n#### Whisper（语音识别）\n\n- macOS 用户：使用 Apple 内置的 Siri 语音输入（或安装其他工具）\n- Windows 用户：安装 Whisper 并配置路径\n\n#### espeak \u002F greader（语音合成）\n\n安装并配置 espeak 或 greader：\n\n```bash\nsudo apt-get install espeak  # Debian\u002FUbuntu\nbrew install espeak          # macOS\n```\n\n启用语音输出：\n\n```elisp\n(setq org-ai-talk-output-enable t)\n```\n\n### 设置 Stable Diffusion（可选）\n\n运行 [stable-diffusion-webui](https:\u002F\u002Fgithub.com\u002FAUTOMATIC1111\u002Fstable-diffusion-webui) 并确保其服务正常启动。\n\n---\n\n## 基本使用\n\n### 最简单的使用示例\n\n在 Org-mode 缓冲区中插入以下内容：\n\n```org\n#+begin_ai\nIs Emacs the greatest editor?\n#+end_ai\n```\n\n按下 `C-c C-c`，ChatGPT 将生成回答。你可以继续对话，按 `C-g` 中断请求。\n\n### 图像生成示例\n\n要使用 DALL-E 生成图像，添加 `:image` 关键字：\n\n```org\n#+begin_ai :image :size 1024x1024\nHyper realistic sci-fi rendering of super complicated technical machine.\n#+end_ai\n```\n\n按下 `C-c C-c`，DALL-E 将根据提示生成图像，并显示在当前块下方。\n\n### 使用 Stable Diffusion 生成图像\n\n```org\n#+begin_ai :sd-image\nA beautiful sunset over a mountain range\n#+end_ai\n```\n\n你也可以通过引用已有图像进行 img2img 操作：\n\n```org\n#+name: my-image\n[[file:image.png]]\n\n#+begin_ai :sd-image :image-ref my-image\nforest, Gogh style\n#+end_ai\n```\n\n---\n\n## 小贴士\n\n- 你可以通过 `customize-variable` 自定义默认参数，例如模型、尺寸等。\n- 在 `#+begin_ai...#+end_ai` 块中，可以使用 `[SYS]:` 添加系统提示，以引导 AI 的行为。\n- 启用语法高亮：在块中指定语言，如 `#+begin_ai markdown`。\n\n---\n\n完成以上步骤后，你就可以在 Org-mode 中快速使用 AI 功能了！","一个研究人员正在撰写一篇关于人工智能在医疗诊断中的应用的综述论文，需要频繁查阅大量文献、生成摘要、绘制示意图，并与AI进行交互式讨论。\n\n### 没有 org-ai 时\n\n- 需要频繁切换多个工具（如浏览器、文本编辑器、图像生成工具）来完成文献检索、内容撰写和图像生成，效率低下。\n- 无法直接在文档中调用AI模型生成内容，导致写作流程中断，影响思路连贯性。\n- 图像生成需要单独使用DALL-E或Stable Diffusion等工具，难以与文档内容实时结合。\n- 无法通过语音输入与AI交互，对于长时间写作容易产生疲劳感。\n\n### 使用 org-ai 后\n\n- 可以直接在Org-mode文档中嵌入AI调用块，实现文献查询、内容生成、图像绘制等功能的一体化操作，提升工作效率。\n- 支持在文档内直接调用ChatGPT生成文本内容，例如文献摘要、章节草稿等，保持写作流程的连贯性。\n- 通过`:image`关键字可直接在文档中生成与内容相关的示意图，无需跳转到其他工具，增强可视化表达。\n- 支持语音输入和输出，用户可以通过语音与AI对话，减少键盘输入负担，提高写作舒适度。\n\n核心价值：org-ai 将AI能力无缝集成到日常写作流程中，显著提升了科研人员在文献整理、内容创作和视觉表达方面的效率与体验。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Frksm_org-ai_d85a18e1.png","rksm","Robert Krahn","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Frksm_07de2ba0.png","Hyper hyper!",null,"robert.krahn@gmail.com","https:\u002F\u002Frobert.kra.hn","https:\u002F\u002Fgithub.com\u002Frksm",[84,88],{"name":85,"color":86,"percentage":87},"Emacs Lisp","#c065db",99.7,{"name":89,"color":90,"percentage":91},"YASnippet","#32AB90",0.3,815,64,"2026-03-28T14:57:19","GPL-3.0","Linux, macOS, Windows","需要 NVIDIA GPU，显存 8GB+，CUDA 11.7+","16GB+",{"notes":100,"python":101,"dependencies":102},"建议使用 conda 管理环境，首次运行需下载约 5GB 模型文件。Stable Diffusion 需要额外安装 stable-diffusion-webui，并根据文档配置相关依赖。","3.8+",[103,104,105,106,107,108,109],"torch>=2.0","transformers>=4.30","accelerate","markdown-mode","espeak","greader","whisper",[15,26,14,13,53],[112,113,114,115,116,117],"ai","chatgpt","emacs","gpt","generative-models","llms","2026-03-27T02:49:30.150509","2026-04-06T05:37:46.517298",[121,126,131,136,141,146],{"id":122,"question_zh":123,"answer_zh":124,"source_url":125},5817,"如何解决模型参数未传递的问题？","确保 API 密钥中没有多余的换行符。可以使用以下代码片段来去除末尾的换行符：\r\n\r\n```elisp\r\n(defun db\u002Fopenai-api-key ()\r\n  (substring (shell-command-to-string \"gpg -q -d ~\u002F.openai-api.key.gpg\") 0 -1))\r\n```\r\n\r\n然后将 `org-ai-openai-api-token` 设置为该函数的返回值。","https:\u002F\u002Fgithub.com\u002Frksm\u002Forg-ai\u002Fissues\u002F8",{"id":127,"question_zh":128,"answer_zh":129,"source_url":130},5818,"如何解决中文输入输出乱码的问题？","请确保 API 密钥以 UTF-8 编码读取。如果问题仍然存在，可以尝试更新到最新版本，或者参考此提交的更改：https:\u002F\u002Fgithub.com\u002Frksm\u002Forg-ai\u002Fcommit\u002F70223e5017e1cd93b618f174fc7ce7c96b946c77。","https:\u002F\u002Fgithub.com\u002Frksm\u002Forg-ai\u002Fissues\u002F3",{"id":132,"question_zh":133,"answer_zh":134,"source_url":135},5819,"如何解决 `void-variable evil-org-ai-on-region` 错误？","在配置文件顶部添加 `(require 'evil)` 可以解决变量未定义的问题。如果遇到其他问题（如无限循环），请单独提交新的 Issue 进行报告。","https:\u002F\u002Fgithub.com\u002Frksm\u002Forg-ai\u002Fissues\u002F133",{"id":137,"question_zh":138,"answer_zh":139,"source_url":140},5820,"如何支持 Code Llama 等本地模型？","可以通过自定义提示模板来支持本地模型。例如，修改 `org-ai-oobabooga-create-prompt-function` 来生成适合本地模型的提示格式。具体设置步骤可参考：https:\u002F\u002Fgithub.com\u002Frksm\u002Forg-ai#using-local-llms-with-oobaboogatext-generation-webui。","https:\u002F\u002Fgithub.com\u002Frksm\u002Forg-ai\u002Fissues\u002F78",{"id":142,"question_zh":143,"answer_zh":144,"source_url":145},5821,"如何保存和继续 org-ai-on-region 的对话？","可以通过设置 `org-ai-on-region-file` 变量，将对话保存到指定的 Org 文件中。例如：\r\n\r\n```elisp\r\n(setq org-ai-on-region-file (expand-file-name \"org-ai-on-region.org\" org-directory))\r\n```\r\n\r\n这样每次调用 `org-ai-on-region` 时，对话内容会自动追加到该文件中。","https:\u002F\u002Fgithub.com\u002Frksm\u002Forg-ai\u002Fissues\u002F29",{"id":147,"question_zh":148,"answer_zh":149,"source_url":125},5822,"如何通过 auth-sources 配置 OpenAI API 密钥？","可以使用 `auth-sources` 来管理 API 密钥，避免直接在配置中硬编码。官方文档提供了相关说明：https:\u002F\u002Fgithub.com\u002Frksm\u002Forg-ai#api-key-with-auth-source。",[151,154,157,160,163,166,169,172,175,178,181,184,187,190,193,196,199,202,205,208],{"id":152,"version":153,"summary_zh":79,"released_at":79},105472,"v0.5.5",{"id":155,"version":156,"summary_zh":79,"released_at":79},105473,"v0.5.4",{"id":158,"version":159,"summary_zh":79,"released_at":79},105474,"v0.5.3",{"id":161,"version":162,"summary_zh":79,"released_at":79},105475,"v0.5.2",{"id":164,"version":165,"summary_zh":79,"released_at":79},105476,"v0.5.1",{"id":167,"version":168,"summary_zh":79,"released_at":79},105477,"v0.5.0",{"id":170,"version":171,"summary_zh":79,"released_at":79},105478,"v0.4.9",{"id":173,"version":174,"summary_zh":79,"released_at":79},105479,"v0.4.8",{"id":176,"version":177,"summary_zh":79,"released_at":79},105480,"v0.4.7",{"id":179,"version":180,"summary_zh":79,"released_at":79},105481,"v0.4.6",{"id":182,"version":183,"summary_zh":79,"released_at":79},105482,"v0.4.5",{"id":185,"version":186,"summary_zh":79,"released_at":79},105483,"v0.4.4",{"id":188,"version":189,"summary_zh":79,"released_at":79},105484,"v0.4.3",{"id":191,"version":192,"summary_zh":79,"released_at":79},105485,"v0.4.2",{"id":194,"version":195,"summary_zh":79,"released_at":79},105486,"v0.4.1",{"id":197,"version":198,"summary_zh":79,"released_at":79},105487,"v0.4.0",{"id":200,"version":201,"summary_zh":79,"released_at":79},105488,"v0.3.13",{"id":203,"version":204,"summary_zh":79,"released_at":79},105489,"v0.3.12",{"id":206,"version":207,"summary_zh":79,"released_at":79},105490,"v0.3.11",{"id":209,"version":210,"summary_zh":79,"released_at":79},105491,"v0.3.10"]