[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-brexhq--prompt-engineering":3,"tool-brexhq--prompt-engineering":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",160411,2,"2026-04-18T23:33:24",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",109154,"2026-04-18T11:18:24",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[52,13,15,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":32,"last_commit_at":59,"category_tags":60,"status":17},4721,"markitdown","microsoft\u002Fmarkitdown","MarkItDown 是一款由微软 AutoGen 团队打造的轻量级 Python 工具，专为将各类文件高效转换为 Markdown 格式而设计。它支持 PDF、Word、Excel、PPT、图片（含 OCR）、音频（含语音转录）、HTML 乃至 YouTube 链接等多种格式的解析，能够精准提取文档中的标题、列表、表格和链接等关键结构信息。\n\n在人工智能应用日益普及的今天，大语言模型（LLM）虽擅长处理文本，却难以直接读取复杂的二进制办公文档。MarkItDown 恰好解决了这一痛点，它将非结构化或半结构化的文件转化为模型“原生理解”且 Token 效率极高的 Markdown 格式，成为连接本地文件与 AI 分析 pipeline 的理想桥梁。此外，它还提供了 MCP（模型上下文协议）服务器，可无缝集成到 Claude Desktop 等 LLM 应用中。\n\n这款工具特别适合开发者、数据科学家及 AI 研究人员使用，尤其是那些需要构建文档检索增强生成（RAG）系统、进行批量文本分析或希望让 AI 助手直接“阅读”本地文件的用户。虽然生成的内容也具备一定可读性，但其核心优势在于为机器",93400,"2026-04-06T19:52:38",[52,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":72,"owner_name":73,"owner_avatar_url":74,"owner_bio":75,"owner_company":76,"owner_location":76,"owner_email":76,"owner_twitter":76,"owner_website":77,"owner_url":78,"languages":76,"stars":79,"forks":80,"last_commit_at":81,"license":82,"difficulty_score":83,"env_os":75,"env_gpu":84,"env_ram":84,"env_deps":85,"category_tags":88,"github_topics":76,"view_count":32,"oss_zip_url":76,"oss_zip_packed_at":76,"status":17,"created_at":89,"updated_at":90,"faqs":91,"releases":97},9514,"brexhq\u002Fprompt-engineering","prompt-engineering","Tips and tricks for working with Large Language Models like OpenAI's GPT-4.","prompt-engineering 是由 Brex 团队开源的一份大语言模型（LLM）实战指南，旨在分享在生产环境中使用 GPT-4 等模型的策略、技巧与安全建议。它并非一个软件库，而是一份凝聚了内部研发经验的“避坑手册”，帮助开发者解决模型输出不稳定、幻觉（胡说八道）以及提示词注入等常见难题。\n\n这份指南深入浅出地解释了 LLM 的工作原理，从基础的 Token 概念到高级的提示工程策略，如思维链（Chain of Thought）、ReAct 框架、数据嵌入格式优化（JSON\u002FMarkdown）以及微调的利弊分析。它不仅涵盖了如何设计高效的提示词，还详细探讨了系统安全性，包括防止越狱和信息泄露的方法。\n\n该内容特别适合正在构建基于大模型应用的开发者、技术负责人及 AI 研究人员。对于希望将 LLM 从“玩具”转化为可靠生产力的团队来说，prompt-engineering 提供了经过验证的最佳实践和结构化方法论。鉴于大模型技术迭代迅速，这份文档也保持持续更新，欢迎社区共同完善，是入门与进阶提示工程技术的优质参考资料。","# [Brex's](https:\u002F\u002Fbrex.com) Prompt Engineering Guide\n\nThis guide was created by Brex for internal purposes. It's based on\nlessons learned from researching and creating Large Language Model (LLM)\nprompts for production use cases. It covers the history around LLMs as well as\nstrategies, guidelines, and safety recommendations for working with and\nbuilding programmatic systems on top of large language models, like [OpenAI's\nGPT-4](https:\u002F\u002Fopenai.com\u002Fresearch\u002Fgpt-4).\n\nThe examples in this document were generated with a non-deterministic language\nmodel and the same examples may give you different results.\n\nThis is a living document. The state-of-the-art best practices and strategies\naround LLMs are evolving rapidly every day. Discussion and suggestions for\nimprovements are encouraged.\n\n## Table of Contents\n- [What is a Large Language Model?](#what-is-a-large-language-model-llm)\n  - [A Brief, Incomplete, and Somewhat Incorrect History of Language Models](#a-brief-incomplete-and-somewhat-incorrect-history-of-language-models)\n    - [Pre-2000’s](#pre-2000s)\n    - [Mid-2000’s](#mid-2000s)\n    - [Early-2010’s](#early-2010s)\n    - [Late-2010’s](#late-2010s)\n    - [2020’s](#2020s)\n- [What is a prompt?](#what-is-a-prompt)\n  - [Hidden Prompts](#hidden-prompts)\n  - [Tokens](#tokens)\n  - [Token Limits](#token-limits)\n  - [Prompt Hacking](#prompt-hacking)\n    - [Jailbreaks](#jailbreaks)\n    - [Leaks](#leaks)\n- [Why do we need prompt engineering?](#why-do-we-need-prompt-engineering)\n  - [Give a Bot a Fish](#give-a-bot-a-fish)\n    - [Semantic Search](#semantic-search)\n  - [Teach a Bot to Fish](#teach-a-bot-to-fish)\n    - [Command Grammars](#command-grammars)\n    - [ReAct](#react)\n    - [GPT-4 vs GPT-3.5](#gpt-4-vs-gpt-35)\n- [Strategies](#strategies)\n  - [Embedding Data](#embedding-data)\n    - [Simple Lists](#simple-lists)\n    - [Markdown Tables](#markdown-tables)\n    - [JSON](#json)\n    - [Freeform Text](#freeform-text)\n    - [Nested Data](#nested-data)\n  - [Citations](#citations)\n  - [Programmatic Consumption](#programmatic-consumption)\n  - [Chain of Thought](#chain-of-thought)\n    - [Averaging](#averaging)\n    - [Interpreting Code](#interpreting-code)\n    - [Delimiters](#delimiters)\n  - [Fine Tuning](#fine-tuning)\n    - [Downsides](#downsides)\n- [Additional Resources](#additional-resources)\n\n## What is a Large Language Model (LLM)?\n\nA large language model is a prediction engine that takes a sequence of words\nand tries to predict the most likely sequence to come after that sequence[^1].\nIt does this by assigning a probability to likely next sequences and then\nsamples from those to choose one[^2]. The process repeats until some stopping\ncriteria is met.\n\nLarge language models learn these probabilities by training on large corpuses\nof text. A consequence of this is that the models will cater to some use cases\nbetter than others (e.g. if it’s trained on GitHub data, it’ll understand the\nprobabilities of sequences in source code really well). Another consequence is\nthat the model may generate statements that seem plausible, but are actually\njust random without being grounded in reality.\n\nAs language models become more accurate at predicting sequences, [many\nsurprising abilities\nemerge](https:\u002F\u002Fwww.assemblyai.com\u002Fblog\u002Femergent-abilities-of-large-language-models\u002F).\n\n[^1]: Language models actually use tokens, not words. A token roughly maps to a syllable in a word, or about 4 characters.\n[^2]: There are many different pruning and sampling strategies to alter the behavior and performance of the sequences.\n\n### A Brief, Incomplete, and Somewhat Incorrect History of Language Models\n\n> :pushpin: Skip [to here](#what-is-a-prompt) if you'd like to jump past the\n> history of language models. This section is for the curious minded, though\n> may also help you understand the reasoning behind the advice that follows.\n\n#### Pre-2000’s\n\n[Language models](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FLanguage_model#Model_types)\nhave existed for decades, though traditional language models (e.g. [n-gram\nmodels](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FN-gram_language_model)) have many\ndeficiencies in terms of an explosion of state space ([the curse of\ndimensionality](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FCurse_of_dimensionality)) and\nworking with novel phrases that they’ve never seen (sparsity). Plainly, older\nlanguage models can generate text that vaguely resembles the statistics of\nhuman generated text, but there is no consistency within the output – and a\nreader will quickly realize it’s all gibberish. N-gram models also don’t scale\nto large values of N, so are inherently limited.\n\n#### Mid-2000’s\n\nIn 2007, Geoffrey Hinton – famous for popularizing backpropagation in 1980’s –\n[published an important advancement in training neural\nnetworks](http:\u002F\u002Fwww.cs.toronto.edu\u002F~fritz\u002Fabsps\u002Ftics.pdf) that unlocked much\ndeeper networks. Applying these simple deep neural networks to language\nmodeling helped alleviate some of problems with language models – they\nrepresented nuanced arbitrary concepts in a finite space and continuous way,\ngracefully handling sequences not seen in the training corpus. These simple\nneural networks learned the probabilities of their training corpus well, but\nthe output would statistically match the training data and generally not be\ncoherent relative to the input sequence. \n\n#### Early-2010’s\n\nAlthough they were first introduced in 1995, [Long Short-Term Memory (LSTM)\nNetworks](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FLong_short-term_memory) found their\ntime to shine in the 2010’s. LSTMs allowed models to process arbitrary length\nsequences and, importantly, alter their internal state dynamically as they\nprocessed the input to remember previous things they saw. This minor tweak led\nto remarkable improvements. In 2015, Andrej Karpathy [famously wrote about\ncreating a character-level\nlstm](http:\u002F\u002Fkarpathy.github.io\u002F2015\u002F05\u002F21\u002Frnn-effectiveness\u002F) that performed\nfar better than it had any right to.\n\nLSTMs have seemingly magical abilities, but struggle with long term\ndependencies. If you asked it to complete the sentence, “In France, we\ntraveled around, ate many pastries, drank lots of wine, ... lots more text ...\n, but never learned how to speak _______”, the model might struggle with\npredicting “French”. They also process input one token at a time, so are\ninherently sequential, slow to train, and the `Nth` token only knows about the\n`N - 1` tokens prior to it.\n\n#### Late-2010’s\n\nIn 2017, Google wrote a paper, [Attention Is All You\nNeed](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1706.03762.pdf), that introduced [Transformer\nNetworks](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FTransformer_(machine_learning_model))\nand kicked off a massive revolution in natural language processing. Overnight,\nmachines could suddenly do tasks like translating between languages nearly as\ngood as (sometimes better than) humans. Transformers are highly parallelizable\nand introduce a mechanism, called “attention”, for the model to efficiently\nplace emphasis on specific parts of the input. Transformers analyze the entire\ninput all at once, in parallel, choosing which parts are most important and\ninfluential. Every output token is influenced by every input token.\n\nTransformers are highly parallelizable, efficient to train, and produce\nastounding results. A downside to transformers is that they have a fixed input\nand output size – the context window – and computation increases\nquadratically with the size of this window (in some cases, memory does as\nwell!) [^3].\n\nTransformers are not the end of the road, but the vast majority of recent\nimprovements in natural language processing have involved them. There is still\nabundant active research on various ways of implementing and applying them,\nsuch as [Amazon’s AlexaTM\n20B](https:\u002F\u002Fwww.amazon.science\u002Fblog\u002F20b-parameter-alexa-model-sets-new-marks-in-few-shot-learning)\nwhich outperforms GPT-3 in a number of tasks and is an order of magnitude\nsmaller in its number of parameters.\n\n[^3]: There are more recent variations to make these more compute and memory efficient, but remains an active area of research.\n\n#### 2020’s\n\nWhile technically starting in 2018, the theme of the 2020’s has been\nGenerative Pre-Trained models – more famously known as GPT. One\nyear after the “Attention Is All You Need” paper, OpenAI released [Improving\nLanguage Understanding by Generative\nPre-Training](https:\u002F\u002Fs3-us-west-2.amazonaws.com\u002Fopenai-assets\u002Fresearch-covers\u002Flanguage-unsupervised\u002Flanguage_understanding_paper.pdf).\nThis paper established that you can train a large language model on a massive\nset of data without any specific agenda, and then once the model has learned\nthe general aspects of language, you can fine-tune it for specific tasks and\nquickly get state-of-the-art results.\n\nIn 2020, OpenAI followed up with their GPT-3 paper [Language Models are\nFew-Shot\nLearners](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2020\u002Ffile\u002F1457c0d6bfcb4967418bfb8ac142f64a-Paper.pdf),\nshowing that if you scale up GPT-like models by another factor of ~10x, in\nterms of number of parameters and quantity of training data, you no\nlonger have to fine-tune it for many tasks. The capabilities emerge naturally\nand you get state-of-the-art results via text interaction with the model.\n\nIn 2022, OpenAI followed-up on their GPT-3 accomplishments by releasing\n[InstructGPT](https:\u002F\u002Fopenai.com\u002Fresearch\u002Finstruction-following). The intent\nhere was to tweak the model to follow instructions, while also being less\ntoxic and biased in its outputs. The key ingredient here was [Reinforcement\nLearning from Human Feedback (RLHF)](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1706.03741.pdf), a\nconcept co-authored by Google and OpenAI in 2017[^4], which allows humans to\nbe in the training loop to fine-tune the model output to be more in line with\nhuman preferences. InstructGPT is the predecessor to the now famous\n[ChatGPT](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FChatGPT).\n\nOpenAI has been a major contributor to large language models over the last few\nyears, including the most recent introduction of\n[GPT-4](https:\u002F\u002Fcdn.openai.com\u002Fpapers\u002Fgpt-4.pdf), but they are not alone. Meta\nhas introduced many open source large language models like\n[OPT](https:\u002F\u002Fhuggingface.co\u002Ffacebook\u002Fopt-66b),\n[OPT-IML](https:\u002F\u002Fhuggingface.co\u002Ffacebook\u002Fopt-iml-30b) (instruction tuned),\nand [LLaMa](https:\u002F\u002Fai.facebook.com\u002Fblog\u002Flarge-language-model-llama-meta-ai\u002F).\nGoogle released models like\n[FLAN-T5](https:\u002F\u002Fhuggingface.co\u002Fgoogle\u002Fflan-t5-xxl) and\n[BERT](https:\u002F\u002Fhuggingface.co\u002Fbert-base-uncased). And there is a huge open\nsource research community releasing models like\n[BLOOM](https:\u002F\u002Fhuggingface.co\u002Fbigscience\u002Fbloom) and\n[StableLM](https:\u002F\u002Fgithub.com\u002Fstability-AI\u002FstableLM\u002F).\n\nProgress is now moving so swiftly that every few weeks the state-of-the-art is\nchanging or models that previously required clusters to run now run on\nRaspberry PIs.\n\n[^4]: 2017 was a big year for natural language processing.\n\n## What is a prompt?\n\nA prompt, sometimes referred to as context, is the text provided to a\nmodel before it begins generating output. It guides the model to explore a\nparticular area of what it has learned so that the output is relevant to your\ngoals. As an analogy, if you think of the language model as a source code\ninterpreter, then a prompt is the source code to be interpreted. Somewhat\namusingly, a language model will happily attempt to guess what source code\nwill do:\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"450\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_4b3a60400936.png\" title=\"The GPT-4 model interpreting Python code.\">\n\u003C\u002Fp>\n\nAnd it *almost* interprets the Python perfectly!\n\nFrequently, prompts will be an instruction or a question, like:\n\n \u003Cp align=\"center\">\n  \u003Cimg width=\"500\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_45f62943362b.png\">\n\u003C\u002Fp>\n\nOn the other hand, if you don’t specify a prompt, the model has no anchor to\nwork from and you’ll see that it just **randomly samples from anything it has\never consumed**:\n\n**From GPT-3-Davinci:**\n\n| ![image](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_b022bc17d71f.png) | ![image](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_ce74e26e27a1.png) | ![image](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_b653749a4907.png) |\n| --- | --- | --- |\n\n**From GPT-4:**\n| ![image](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_ed36272229a8.png) | ![image](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_e45221c7c932.png) | ![image](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_ac923372aaa3.png) |\n| --- | --- | --- |\n\n### Hidden Prompts\n\n> :warning: Always assume that any content in a hidden prompt can be seen by the user.\n\nIn applications where a user is interacting with a model dynamically, such as\nchatting with the model, there will typically be portions of the prompt that\nare never intended to be seen by the user. These hidden portions may occur\nanywhere, though there is almost always a hidden prompt at the start of a\nconversation.\n\nTypically, this includes an initial chunk of text that sets the tone, model\nconstraints, and goals, along with other dynamic information that is specific\nto the particular session – user name, location, time of day, etc...\n\nThe model is static and frozen at a point in time, so if you want it to know\ncurrent information, like the time or the weather, you must provide it.\n\nIf you’re using [the OpenAI Chat\nAPI](https:\u002F\u002Fplatform.openai.com\u002Fdocs\u002Fguides\u002Fchat\u002Fintroduction), they\ndelineate hidden prompt content by placing it in the `system` role.\n\nHere’s an example of a hidden prompt followed by interactions with the content\nin that prompt:\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_2d6d351e130e.png\" title=\" A very simple hidden prompt.\">\n\u003C\u002Fp>\n\nIn this example, you can see we explain to the bot the various roles, some\ncontext on the user, some dynamic data we want the bot to have access to, and\nthen guidance on how the bot should respond.\n\nIn practice, hidden prompts may be quite large. Here’s a larger prompt taken\nfrom a [ChatGPT command-line\nassistant](https:\u002F\u002Fgithub.com\u002Fmanno\u002Fchatgpt-linux-assistant\u002Fblob\u002Fmain\u002Fsystem_prompt.txt):\n\n\u003Cdetails>\n  \u003Csummary>From: https:\u002F\u002Fgithub.com\u002Fmanno\u002Fchatgpt-linux-assistant \u003C\u002Fsummary>\n\n```\nWe are a in a chatroom with 3 users. 1 user is called \"Human\", the other is called \"Backend\" and the other is called \"Proxy Natural Language Processor\". I will type what \"Human\" says and what \"Backend\" replies. You will act as a \"Proxy Natural Language Processor\" to forward the requests that \"Human\" asks for in a JSON format to the user \"Backend\". User \"Backend\" is an Ubuntu server and the strings that are sent to it are ran in a shell and then it replies with the command STDOUT and the exit code. The Ubuntu server is mine. When \"Backend\" replies with the STDOUT and exit code, you \"Proxy Natural Language Processor\" will parse and format that data into a simple English friendly way and send it to \"Human\". Here is an example:\n\nI ask as human:\nHuman: How many unedited videos are left?\nThen you send a command to the Backend:\nProxy Natural Language Processor: @Backend {\"command\":\"find .\u002FVideos\u002FUnedited\u002F -iname '*.mp4' | wc -l\"}\nThen the backend responds with the command STDOUT and exit code:\nBackend: {\"STDOUT\":\"5\", \"EXITCODE\":\"0\"}\nThen you reply to the user:\nProxy Natural Language Processor: @Human There are 5 unedited videos left.\n\nOnly reply what \"Proxy Natural Language Processor\" is supposed to say and nothing else. Not now nor in the future for any reason.\n\nAnother example:\n\nI ask as human:\nHuman: What is a PEM certificate?\nThen you send a command to the Backend:\nProxy Natural Language Processor: @Backend {\"command\":\"xdg-open 'https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FPrivacy-Enhanced_Mail'\"}\nThen the backend responds with the command STDOUT and exit code:\nBackend: {\"STDOUT\":\"\", \"EXITCODE\":\"0\"}\nThen you reply to the user:\nProxy Natural Language Processor: @Human I have opened a link which describes what a PEM certificate is.\n\n\nOnly reply what \"Proxy Natural Language Processor\" is supposed to say and nothing else. Not now nor in the future for any reason.\n\nDo NOT REPLY as Backend. DO NOT complete what Backend is supposed to reply. YOU ARE NOT TO COMPLETE what Backend is supposed to reply.\nAlso DO NOT give an explanation of what the command does or what the exit codes mean. DO NOT EVER, NOW OR IN THE FUTURE, REPLY AS BACKEND.\n\nOnly reply what \"Proxy Natural Language Processor\" is supposed to say and nothing else. Not now nor in the future for any reason.\n```\n\u003C\u002Fdetails>\n\nYou’ll see some good practices there, such as including lots of examples,\nrepetition for important behavioral aspects, constraining the replies, etc…\n\n> :warning: Always assume that any content in a hidden prompt can be seen by the user.\n\n### Tokens\n\nIf you thought tokens were :fire: in 2022, tokens in 2023 are on a whole\ndifferent plane of existence. The atomic unit of consumption for a language\nmodel is not a “word”, but rather a “token”. You can kind of think of tokens\nas syllables, and on average they work out to about 750 words per 1,000\ntokens. They represent many concepts beyond just alphabetical characters –\nsuch as punctuation, sentence boundaries, and the end of a document.\n\nHere’s an example of how GPT may tokenize a sequence:\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_72667863536f.png\" title=\"An example tokenization. You can experiment here: https:\u002F\u002Fplatform.openai.com\u002Ftokenizer \">\n\u003C\u002Fp>\n\nYou can experiment with a tokenizer here: [https:\u002F\u002Fplatform.openai.com\u002Ftokenizer](https:\u002F\u002Fplatform.openai.com\u002Ftokenizer)\n\nDifferent models will use different tokenizers with different levels of granularity. You could, in theory, just feed a model 0’s and 1’s – but then the model needs to learn the concept of characters from bits, and then the concept of words from characters, and so forth. Similarly, you could feed the model a stream of raw characters, but then the model needs to learn the concept of words, and punctuation, etc… and, in general, the models will perform worse.\n\nTo learn more, [Hugging Face has a wonderful introduction to tokenizers](https:\u002F\u002Fhuggingface.co\u002Fdocs\u002Ftransformers\u002Ftokenizer_summary) and why they need to exist.\n\nThere’s a lot of nuance around tokenization, such as vocabulary size or different languages treating sentence structure meaningfully different (e.g. words not being separated by spaces). Fortunately, language model APIs will almost always take raw text as input and tokenize it behind the scenes – *so you rarely need to think about tokens*.\n\n**Except for one important scenario, which we discuss next: token limits.**\n\n### Token Limits\n\nPrompts tend to be append-only, because you want the bot to have the entire context of previous messages in the conversation. Language models, in general, are stateless and won’t remember anything about previous requests to them, so you always need to include everything that it might need to know that is specific to the current session.\n\nA major downside of this is that the leading language model architecture, the Transformer, has a fixed input and output size – at a certain point the prompt can’t grow any larger. The total size of the prompt, sometimes referred to as the “context window”, is model dependent. For GPT-3, it is 4,096 tokens. For GPT-4, it is 8,192 tokens or 32,768 tokens depending on which variant you use.\n\nIf your context grows too large for the model, the most common tactic is the truncate the context in a sliding window fashion. If you think of a prompt as `hidden initialization prompt + messages[]`, usually the hidden prompt will remain unaltered, and the `messages[]` array will take the last N messages.\n\nYou may also see more clever tactics for prompt truncation – such as\ndiscarding only the user messages first, so that the bot's previous answers\nstay in the context for as long as possible, or asking an LLM to summarize the\nconversation and then replacing all of the messages with a single message\ncontaining that summary. There is no correct answer here and the solution will\ndepend on your application.\n\nImportantly, when truncating the context, you must truncate aggressively enough to **allow room for the response as well**. OpenAI’s token limits include both the length of the input and the length of the output. If your input to GPT-3 is 4,090 tokens, it can only generate 6 tokens in response.\n\n> 🧙‍♂️ If you’d like to count the number of tokens before sending the raw text to the model, the specific tokenizer to use will depend on which model you are using. OpenAI has a library called [tiktoken](https:\u002F\u002Fgithub.com\u002Fopenai\u002Ftiktoken\u002Fblob\u002Fmain\u002FREADME.md) that you can use with their models – though there is an important caveat that their internal tokenizer may vary slightly in count, and they may append other metadata, so consider this an approximation.\n> \n> If you’d like an approximation without having access to a tokenizer, `input.length \u002F 4` will give a rough, but better than you’d expect, approximation for English inputs.\n\n### Prompt Hacking\n\nPrompt engineering and large language models are a fairly nascent field, so new ways to hack around them are being discovered every day. The two large classes of attacks are:\n\n1. Make the bot bypass any guidelines you have given it.\n2. Make the bot output hidden context that you didn’t intend for the user to see.\n\nThere are no known mechanisms to comprehensively stop these, so it is important that you assume the bot may do or say anything when interacting with an adversarial user. Fortunately, in practice, these are mostly cosmetic concerns.\n\nThink of prompts as a way to improve the normal user experience. **We design prompts so that normal users don’t stumble outside of our intended interactions – but always assume that a determined user will be able to bypass our prompt constraints.**\n\n#### Jailbreaks\n\nTypically hidden prompts will tell the bot to behave with a certain persona and focus on specific tasks or avoid certain words. It is generally safe to assume the bot will follow these guidelines for non-adversarial users, although non-adversarial users may accidentally bypass the guidelines too.\n\nFor  example, we can tell the bot:\n\n```\nYou are a helpful assistant, but you are never allowed to use the word \"computer\".\n```\n\nIf we then ask it a question about computers, it will refer to them as a “device used for computing” because it isn’t allowed to use the word “computer”.\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_39c78c1a57ce.png\" title=\"GPT-4 trying hard to not say the word 'computer'.\">\n\u003C\u002Fp>\n\nIt will absolutely refuse to say the word:\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_eafb92721de3.png\">\n\u003C\u002Fp>\n\nBut we can bypass these instructions and get the model to happily use the word if we trick it by asking it to translate the pig latin version of “computer”.\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_7355e7b148ea.png\">\n\u003C\u002Fp>\n\nThere are [a number of defensive measures](https:\u002F\u002Flearnprompting.org\u002Fdocs\u002Fprompt_hacking\u002Fdefensive_measures\u002Foverview) you can take here, but typically the best bet is to reiterate your most important constraints as close to the end as possible. For the OpenAI chat API, this might mean including it as a `system` message after the last `user` message. Here’s an example:\n\n| ![image](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_8b3264358e4e.png) | ![image](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_1855bea97440.png) |\n| --- | --- |\n\nDespite OpenAI investing a lot into jailbreaks, there are [very clever work arounds](https:\u002F\u002Ftwitter.com\u002Falexalbert__\u002Fstatus\u002F1636488551817965568) being [shared every day](https:\u002F\u002Ftwitter.com\u002Fzswitten\u002Fstatus\u002F1598088267789787136).\n\n#### Leaks\n\nIf you missed the previous warnings in this doc, **you should always assume that any data exposed to the language model will eventually be seen by the user**.\n\nAs part of constructing prompts, you will often embed a bunch of data in hidden prompts (a.k.a. system prompts). **The bot will happily relay this information to the user**:\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_06a492886372.png\" title=\"The bot happily regurgitating the information it knows about the user.\">\n\u003C\u002Fp>\n\nEven if you instruct it not to reveal the information, and it obeys those instructions, there are millions of ways to leak data in the hidden prompt.\n\nHere we have an example where the bot should never mention my city, but a simple reframing of the question get’s it to spill the beans.\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_5676647f3ab4.png\" title=\"The bot refuses to reveal personal information, but we convince it to tell me what city I’m in regardless.\">\n\u003C\u002Fp>\n\nSimilarly, we get the bot to tell us what word it isn’t allowed to say without ever actually saying the word:\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_3f079bd75cd5.png\" title=\"Technically, the bot never said 'computer', but I was still able to get it to tell me everything I needed to know about it.\">\n\u003C\u002Fp>\n\nYou should think of a hidden prompt as a means to make the user experience better or more inline with the persona you’re targeting. **Never place any information in a prompt that you wouldn’t visually render for someone to read on screen**.\n\n## Why do we need prompt engineering?\n\nUp above, we used an analogy of prompts as the “source code” that a language model “interprets”. **Prompt engineering is the art of writing prompts to get the language model to do what we want it to do** – just like software engineering is the art of writing source code to get computers to do what we want them to do.\n\nWhen writing good prompts, you have to account for the idiosyncrasies of the model(s) you’re working with. The strategies will vary with the complexity of the tasks. You’ll have to come up with mechanisms to constrain the model to achieve reliable results, incorporate dynamic data that the model can’t be trained on, account for limitations in the model’s training data, design around context limits, and many other dimensions.\n\nThere’s an old adage that computers will only do what you tell them to do. **Throw that advice out the window**. Prompt engineering inverts this wisdom. It’s like programming in natural language against a non-deterministic computer that will do anything that you haven’t guided it away from doing. \n\nThere are two broad buckets that prompt engineering approaches fall into.\n\n### Give a Bot a Fish\n\nThe “give a bot a fish” bucket is for scenarios when you can explicitly give the bot, in the hidden context, all of the information it needs to do whatever task is requested of it.\n\nFor example, if a user loaded up their dashboard and we wanted to show them a quick little friendly message about what task items they have outstanding, we could get the bot to summarize it as\n\n> You have 4 receipts\u002Fmemos to upload. The most recent is from Target on March 5th, and the oldest is from Blink Fitness on January 17th. Thanks for staying on top of your expenses!\n\nby providing a list of the entire inbox and any other user context we’d like it to have.\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_998cf81d554a.png\" title=\"GPT-3 summarizing a task inbox.\">\n\u003C\u002Fp>\n\nSimilarly, if you were helping a user book a trip, you could:\n\n- Ask the user their dates and destination.\n- Behind the scenes, search for flights and hotels.\n- Embed the flight and hotel search results in the hidden context.\n- Also embed the company’s travel policy in the hidden context.\n\nAnd then the bot will have real-time travel information + constraints that it\ncan use to answer questions for the user. Here’s an example of the bot\nrecommending options, and the user asking it to refine them:\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_cd080694ea2f.png\" title=\"GPT-4 helping a user book a trip.\">\n\u003C\u002Fp>\n\u003Cdetails>\n\n  \u003Csummary>(Full prompt)\u003C\u002Fsummary>\n\n```\nBrex is a platform for managing business expenses. \n\nThe following is a travel expense policy on Brex:\n\n- Airline highest fare class for flights under 6 hours is economy.\n- Airline highest fare class for flights over 6 hours is premium economy.\n- Car rentals must have an average daily rate of $75 or under.\n- Lodging must have an average nightly rate of $400 or under.\n- Lodging must be rated 4 stars or higher.\n- Meals from restaurants, food delivery, grocery, bars & nightlife must be under $75\n- All other expenses must be under $5,000.\n- Reimbursements require review.\n\nThe hotel options are:\n| Hotel Name | Price | Reviews |\n| --- | --- | --- |\n| Hilton Financial District | $109\u002Fnight | 3.9 stars |\n| Hotel VIA | $131\u002Fnight | 4.4 stars |\n| Hyatt Place San Francisco | $186\u002Fnight | 4.2 stars |\n| Hotel Zephyr | $119\u002Fnight | 4.1 stars review |\n\nThe flight options are:\n| Airline | Flight Time | Duration | Number of Stops | Class | Price |\n| --- | --- | --- | --- | --- | --- |\n| United | 5:30am-7:37am | 2hr 7 min | Nonstop | Economy | $248 |\n| Delta | 1:20pm-3:36pm | 2hr 16 min | Nonstop | Economy | $248 |\n| Alaska | 9:50pm-11:58pm | 2hr 8 min | Nonstop | Premium | $512 |\n\nAn employee is booking travel to San Francisco for February 20th to February 25th.\n\nRecommend a hotel and flight that are in policy. Keep the recommendation concise, no longer than a sentence or two, but include pleasantries as though you are a friendly colleague helping me out:\n```\n \n\u003C\u002Fdetails>\n\nThis is the same approach that products like Microsoft Bing use to incorporate dynamic data. When you chat with Bing, it asks the bot to generate three search queries. Then they run three web searches and include the summarized results in the hidden context for the bot to use.\n\nSummarizing this section, the trick to making a good experience is to change the context dynamically in response to whatever the user is trying to do.\n\n> 🧙‍♂️ Giving a bot a fish is the most reliable way to ensure the bot gets a fish. You will get the most consistent and reliable results with this strategy. **Use this whenever you can.**\n\n#### Semantic Search\n\nIf you just need the bot to know a little more about the world, [a common approach is to perform a semantic search](https:\u002F\u002Fgithub.com\u002Fopenai\u002Fopenai-cookbook\u002Fblob\u002Fmain\u002Fexamples\u002FQuestion_answering_using_embeddings.ipynb).\n\nA semantic search is oriented around a document embedding – which you can think of as a fixed-length array[^5] of numbers, where each number represents some aspect of the document (e.g. if it’s a science document, maybe the  843rd number is large, but if it’s an art document the 1,115th number is large – this is overly simplistic, but conveys the idea).[^6]\n\nIn addition to computing an embedding for a document, you can also compute an embedding for a user query using the same function. If the user asks “Why is the sky blue?” – you compute the embedding of that question and, in theory, this embedding will be more similar to embeddings of documents that mention the sky than embeddings that don’t talk about the sky.\n\nTo find documents related to the user query, you compute the embedding and then find the top-N documents that have the most similar embedding. Then we place these documents (or summaries of these documents) in the hidden context for the bot to reference.\n\nNotably, sometimes user queries are so short that the embedding isn’t particularly valuable. There is a clever technique described in [a paper published in December 2022](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2212.10496.pdf) called a “Hypothetical Document Embedding” or HyDE. Using this technique, you ask the model to generate a hypothetical document in response to the user’s query, and then compute the embedding for this generated document. The model  fabricates a document out of thin air – but the approach works!\n\nThe HyDE technique uses more calls to the model, but for many use cases has notable boosts in results.\n\n[^5]: Usually referred to as a vector.\n[^6]: The vector features are learned automatically, and the specific values aren’t directly interpretable by a human without some effort.\n\n### Teach a Bot to Fish\n\nSometimes you’ll want the bot to have the capability to perform actions on the user’s behalf, like adding a memo to a receipt or plotting a chart. Or perhaps we want it to retrieve data in more nuanced ways than semantic search would allow for, like retrieving the past 90 days of expenses.\n\nIn these scenarios, we need to teach the bot how to fish.\n\n#### Command Grammars\n\nWe can give the bot a list of commands for our system to interpret, along with descriptions and examples for the commands, and then have it produce programs composed of those commands.\n\nThere are many caveats to consider when going with this approach. With complex command grammars, the bot will tend to hallucinate commands or arguments that could plausibly exist, but don’t actually. The art to getting this right is enumerating commands that have relatively high levels of abstraction, while giving the bot sufficient flexibility to compose them in novel and useful ways.\n\nFor example, giving the bot a `plot-the-last-90-days-of-expenses` command is not particularly flexible or composable in what the bot can do with it. Similarly, a `draw-pixel-at-x-y [x] [y] [rgb]` command would be far too low-level. But giving the bot a `plot-expenses` and `list-expenses` command provides some good primitives that the bot has some flexibility with.\n\nIn an example below, we use this list of commands:\n\n| Command | Arguments | Description |\n| --- | --- | --- |\n| list-expenses | budget | Returns a list of expenses for a given budget |\n| converse | message | A message to show to the user |\n| plot-expenses | expenses[] | Plots a list of expenses |\n| get-budget-by-name | budget_name | Retrieves a budget by name |\n| list-budgets | | Returns a list of budgets the user has access to |\n| add-memo | inbox_item_id, memo message | Adds a memo to the provided inbox item |\n\nWe provide this table to the model in Markdown format, which the language model handles incredibly well – presumably because OpenAI trains heavily on data from GitHub.\n\nIn this example below, we ask the model to output the commands in [reverse polish notation](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FReverse_Polish_notation)[^7].\n\n[^7]: The model handles the simplicity of RPN astoundingly well.\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_77adcffc8179.png\" title=\"A bot happily generating commands to run in response to user queries.\">\n\u003C\u002Fp>\n\n> 🧠 There are some interesting subtle things going on in that example, beyond just command generation. When we ask it to add a memo to the “shake shack” expense, the model knows that the command `add-memo` takes an expense ID. But we never tell it the expense ID, so it looks up “Shake Shack” in the table of expenses we provided it, then grabs the ID from the corresponding ID column, and then uses that as an argument to `add-memo`.\n\nGetting command grammars working reliably in complex situations can be tricky. The best levers we have here are to provide lots of descriptions, and as **many examples** of usage as we can. Large language models are [few-shot learners](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FFew-shot_learning_(natural_language_processing)), meaning that they can learn a new task by being provided just a few examples. In general, the more examples you provide the better off you’ll be – but that also eats into your token budget, so it’s a balance.\n\nHere’s a more complex example, with the output specified in JSON instead of RPN. And we use Typescript to define the return types of commands.\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_a63c539a19e0.png\" title=\"A bot happily generating commands to run in response to user queries.\">\n\u003C\u002Fp>\n\n\u003Cdetails>\n\n  \u003Csummary>(Full prompt)\u003C\u002Fsummary>\n  \n~~~\nYou are a financial assistant working at Brex, but you are also an expert programmer.\n\nI am a customer of Brex.\n\nYou are to answer my questions by composing a series of commands.\n\nThe output types are:\n\n```typescript\ntype LinkedAccount = {\n    id: string,\n    bank_details: {\n        name: string,\n        type: string,\n    },\n    brex_account_id: string,\n    last_four: string,\n    available_balance: {\n        amount: number,\n        as_of_date: Date,\n    },\n    current_balance: {\n            amount: number,\n        as_of_date: Date,\n    },\n}\n\ntype Expense = {\n  id: string,\n  memo: string,\n  amount: number,\n}\n\ntype Budget = {\n  id: string,\n  name: string,\n  description: string,\n  limit: {\n    amount: number,\n    currency: string,\n  }\n}\n```\n\nThe commands you have available are:\n\n| Command | Arguments | Description | Output Format |\n| --- | --- | --- | --- |\n| nth | index, values[] | Return the nth item from an array | any |\n| push | value | Adds a value to the stack to be consumed by a future command | any |\n| value | key, object | Returns the value associated with a key | any |\n| values | key, object[] | Returns an array of values pulled from the corresponding key in array of objects | any[] |\n| sum | value[] | Sums an array of numbers | number |\n| plot | title, values[] | Plots the set of values in a chart with the given title | Plot |\n| list-linked-accounts |  | \"Lists all bank connections that are eligible to make ACH transfers to Brex cash account\" | LinkedAccount[] |\n| list-expenses | budget_id | Given a budget id, returns the list of expenses for it | Expense[]\n| get-budget-by-name | name | Given a name, returns the budget | Budget |\n| add-memo | expense_id, message | Adds a memo to an expense | bool |\n| converse | message | Send the user a message | null |\n\nOnly respond with commands.\n\nOutput the commands in JSON as an abstract syntax tree.\n\nIMPORTANT - Only respond with a program. Do not respond with any text that isn't part of a program. Do not write prose, even if instructed. Do not explain yourself.\n\nYou can only generate commands, but you are an expert at generating commands.\n~~~\n\n\u003C\u002Fdetails>\n\nThis version is a bit easier to parse and interpret if your language of choice has a `JSON.parse` function.\n\n> 🧙‍♂️ There is no industry established best format for defining a DSL for the model to generate programs. So consider this an area of active research. You will bump into limits. And as we overcome these limits, we may discover more optimal ways of defining commands.\n\n#### ReAct\n\nIn March of 2023, Princeton and Google released a paper “[ReAct: Synergizing Reasoning and Acting in Language Models](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2210.03629.pdf)”, where they introduce a variant of command grammars that allows for fully autonomous interactive execution of actions and retrieval of data.\n\nThe model is instructed to return a `thought` and an `action` that it would like to perform. Another agent (e.g. our client) then performs the `action` and returns it to the model as an `observation`. The model will then loop to return more thoughts and actions until it returns an `answer`.\n\nThis is an incredibly powerful technique, effectively allowing the bot to be its own research assistant and possibly take actions on behalf of the user. Combined with a powerful command grammar, the bot should rapidly be able to answer a massive set of user requests.\n\nIn this example, we give the model a small set of commands related to getting employee data and searching wikipedia:\n\n| Command | Arguments | Description |\n| --- | --- | --- |\n| find_employee | name | Retrieves an employee by name |\n| get_employee | id | Retrieves an employee by ID |\n| get_location | id | Retrieves a location by ID |\n| get_reports | employee_id | Retrieves a list of employee ids that report to the employee associated with employee_id. |\n| wikipedia | article | Retrieves a wikipedia article on a topic. |\n\nWe then ask the bot a simple question, “Is my manager famous?”.\n\nWe see that the bot:\n\n1. First looks up our employee profile.\n2. From our profile, gets our manager’s id and looks up their profile.\n3. Extracts our manager’s name and searches for them on Wikipedia.\n    - I chose a fictional character for the manager in this scenario.\n4. The bot reads the wikipedia article and concludes that can’t be my manager since it is a fictional character.\n5. The bot then modifies its search to include (real person).\n6. Seeing that there are no results, the bot concludes that my manager is not famous.\n\n| ![image](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_2e125875ccab.png) | ![image](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_196ae15571ab.png) |\n| --- | --- |\n\n\u003Cdetails>\n\u003Csummary>(Full prompt)\u003C\u002Fsummary>\n\n~~~\nYou are a helpful assistant. You run in a loop, seeking additional information to answer a user's question until you are able to answer the question.\n\nToday is June 1, 2025. My name is Fabian Seacaster. My employee ID is 82442.\n\nThe commands to seek information are:\n\n| Command | Arguments | Description |\n| --- | --- | --- |\n| find_employee | name | Retrieves an employee by name |\n| get_employee | id | Retrieves an employee by ID |\n| get_location | id | Retrieves a location by ID |\n| get_reports | employee_id | Retrieves a list of employee ids that report to the employee associated with `employee_id`. |\n| wikipedia | article | Retrieves a wikipedia article on a topic. |\n\nYour response will be in JSON and will include a \"Thought\" + \"Action\" to retrieve data that you need in order to answer the question, or it will include the \"Answer\". When data has been retrieved, it will be included as an \"Observation\".\n\nYou will continue generating thoughts and actions until you get to an answer, or conclude that you can't.\n\nExample 1:\n```\nUser: What is the population of Philadelphia?\n\nAssistant: {\n  \"thought\": \"Wikipedia likely has this information. I'm looking it up...\",\n  \"action\": {\"command\": \"wikipedia\", \"article\": \"Philadelphia\"}\n}\n\nAssistant: {\n  \"observation\": \"Philadelphia, often called Philly, is the largest city in the Commonwealth of Pennsylvania and the second-largest city in both the Northeast megalopolis and Mid-Atlantic regions after New York City. It is one of the most historically significant cities in the United States and served as the nation's capital city until 1800. Philadelphia is the nation's sixth-largest city with a population of 1,603,797 as of the 2020 census. Since 1854, the city has been coextensive with Philadelphia County, the most populous county in Pennsylvania and the urban core of the Delaware Valley, the nation's seventh-largest and one of the world's largest metropolitan regions with 6.245 million residents in 2020. Philadelphia is known for its extensive contributions to American history and for its role in the life sciences, business and industry, art, literature, and music.\"\n}\n\nAssistant: {\n  \"answer\": \"The population of Philadelphia, as of the 2020 census, is 1,603,797 people.\"\n}\n```\n\nExample 2:\n```\nUser: What is my manager's city?\n\nAssistant: {\n  \"thought\": \"Let me look up who your manager is. First, I need to look up your profile.\",\n  \"action\": {\"command\": \"get_employee\", \"id\": 92352}\n}\n\nAssistant: {\n  \"observation\": {\n    \"id\": 78334,\n    \"name\": \"Ms. Manager\",\n    \"location_id\": 8832\n  }\n}\n\nAssistant: {\n  \"thought\": \"Your manager is Ms. Manager. I'm looking up their location.\",\n  \"action\": {\"command\": \"get_location\", \"id\": 8832}\n}\n\nAssistant: {\n  \"observation\": {\n    \"id\": 8832,\n    \"name\": \"Philadelphia\"\n  }\n}\n\nAssistant: {\n  \"answer\": \"Your manager lives in Philadelphia.\"\n}\n```\n~~~\n\u003C\u002Fdetails>\n\n#### GPT-4 vs GPT-3.5\n\nIn most of the examples in this doc, the difference between GPT-3.5 and GPT-4 is negligible, but for “teaching a bot to fish” scenarios the difference between the models is notable.\n\nNone of the above examples of command grammars, for example, work without meaningful modifications for GPT-3.5. At a minimum, you have to provide a number of examples (at least one usage example per command) before you get any reasonable results. And, for complex sets of commands, it may hallucinate new commands or create fictional arguments.\n\nWith a sufficiently thorough hidden prompt, you should be able to overcome these limitations. GPT-4 is capable of far more consistent and complex logic with far simpler prompts (and can get by with zero or  small numbers of examples – though it is always beneficial to include as many as possible).\n\n## Strategies\n\nThis section contains examples and strategies for specific needs or problems. For successful prompt engineering, you will need to combine some subset of all of the strategies enumerated in this document. Don’t be afraid to mix and match things – or invent your own approaches.\n\n### Embedding Data\n\nIn hidden contexts, you’ll frequently want to embed all sorts of data. The specific strategy will vary depending on the type and quantity of data you are embedding.\n\n#### Simple Lists\n\nFor one-off objects, enumerating fields + values in a normal bulleted list works pretty well:\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_081da3e06dc9.png\" title=\"GPT-4 extracting Steve’s occupation from a list attributes.\">\n\u003C\u002Fp>\n\nIt will also work for larger sets of things, but there are other formats for lists of data that GPT handles more reliably. Regardless, here’s an example:\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_5bdb825b6560.png\" title=\"GPT-4 answering questions about a set of expenses.\">\n\u003C\u002Fp>\n\n#### Markdown Tables\n\nMarkdown tables are great for scenarios where you have many items of the same type to enumerate.\n\nFortunately, OpenAI’s models are exceptionally good at working with Markdown tables (presumably from the tons of GitHub data they’ve trained on).\n\nWe can reframe the above using Markdown tables instead:\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_75b56fa9006f.png\" title=\"GPT-4 answering questions about a set of expenses from a Markdown table.\">\n\u003C\u002Fp>\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_e8525ed6ed12.png\" title=\"GPT-4 answering questions about a set of expenses from a Markdown table.\">\n\u003C\u002Fp>\n\n> 🧠 Note that in this last example, the items in the table have an explicit date, February 2nd. In our question, we asked about “today”. And earlier in the prompt we mentioned that today was Feb 2. The model correctly handled the transitive inference – converting “today” to “February 2nd” and then looking up “February 2nd” in the table.\n\n#### JSON\n\nMarkdown tables work really well for many use cases and should be preferred due to their density and ability for the model to handle them reliably, but you may run into scenarios where you have many columns and the model struggles with it or every item has some custom attributes and it doesn’t make sense to have dozens of columns of empty data.\n\nIn these scenarios, JSON is another format that the model handles really well. The close proximity of `keys` to their `values` makes it easy for the model to keep the mapping straight.\n\nHere is the same example from the Markdown table, but with JSON instead:\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_d24512bab5f4.png\" title=\"GPT-4 answering questions about a set of expenses from a JSON blob.\">\n\u003C\u002Fp>\n\n#### Freeform Text\n\nOccasionally you’ll want to include freeform text in a prompt that you would like to delineate from the rest of the prompt – such as embedding a document for the bot to reference. In these scenarios, surrounding the document with triple backticks, ```, works well[^8].\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_f251cb6972a1.png\" title=\"GPT-4 answering questions about a set of expenses from a JSON blob.\">\n\u003C\u002Fp>\n\n[^8]: A good rule of thumb for anything you’re doing in prompts is to lean heavily on things the model would have learned from GitHub.\n\n#### Nested Data\n\nNot all data is flat and linear. Sometimes you’ll need to embed data that is nested or has relations to other data. In these scenarios, lean on `JSON`:\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_ca24124f5875.png\" title=\"GPT-4 handles nested JSON very reliably.\">\n\u003C\u002Fp>\n\n\u003Cdetails>\n\u003Csummary>(Full prompt)\u003C\u002Fsummary>\n\n~~~\nYou are a helpful assistant. You answer questions about users. Here is what you know about them:\n\n{\n  \"users\": [\n    {\n      \"id\": 1,\n      \"name\": \"John Doe\",\n      \"contact\": {\n        \"address\": {\n          \"street\": \"123 Main St\",\n          \"city\": \"Anytown\",\n          \"state\": \"CA\",\n          \"zip\": \"12345\"\n        },\n        \"phone\": \"555-555-1234\",\n        \"email\": \"johndoe@example.com\"\n      }\n    },\n    {\n      \"id\": 2,\n      \"name\": \"Jane Smith\",\n      \"contact\": {\n        \"address\": {\n          \"street\": \"456 Elm St\",\n          \"city\": \"Sometown\",\n          \"state\": \"TX\",\n          \"zip\": \"54321\"\n        },\n        \"phone\": \"555-555-5678\",\n        \"email\": \"janesmith@example.com\"\n      }\n    },\n    {\n      \"id\": 3,\n      \"name\": \"Alice Johnson\",\n      \"contact\": {\n        \"address\": {\n          \"street\": \"789 Oak St\",\n          \"city\": \"Othertown\",\n          \"state\": \"NY\",\n          \"zip\": \"67890\"\n        },\n        \"phone\": \"555-555-2468\",\n        \"email\": \"alicejohnson@example.com\"\n      }\n    },\n    {\n      \"id\": 4,\n      \"name\": \"Bob Williams\",\n      \"contact\": {\n        \"address\": {\n          \"street\": \"135 Maple St\",\n          \"city\": \"Thistown\",\n          \"state\": \"FL\",\n          \"zip\": \"98765\"\n        },\n        \"phone\": \"555-555-8642\",\n        \"email\": \"bobwilliams@example.com\"\n      }\n    },\n    {\n      \"id\": 5,\n      \"name\": \"Charlie Brown\",\n      \"contact\": {\n        \"address\": {\n          \"street\": \"246 Pine St\",\n          \"city\": \"Thatstown\",\n          \"state\": \"WA\",\n          \"zip\": \"86420\"\n        },\n        \"phone\": \"555-555-7531\",\n        \"email\": \"charliebrown@example.com\"\n      }\n    },\n    {\n      \"id\": 6,\n      \"name\": \"Diane Davis\",\n      \"contact\": {\n        \"address\": {\n          \"street\": \"369 Willow St\",\n          \"city\": \"Sumtown\",\n          \"state\": \"CO\",\n          \"zip\": \"15980\"\n        },\n        \"phone\": \"555-555-9512\",\n        \"email\": \"dianedavis@example.com\"\n      }\n    },\n    {\n      \"id\": 7,\n      \"name\": \"Edward Martinez\",\n      \"contact\": {\n        \"address\": {\n          \"street\": \"482 Aspen St\",\n          \"city\": \"Newtown\",\n          \"state\": \"MI\",\n          \"zip\": \"35742\"\n        },\n        \"phone\": \"555-555-6813\",\n        \"email\": \"edwardmartinez@example.com\"\n      }\n    },\n    {\n      \"id\": 8,\n      \"name\": \"Fiona Taylor\",\n      \"contact\": {\n        \"address\": {\n          \"street\": \"531 Birch St\",\n          \"city\": \"Oldtown\",\n          \"state\": \"OH\",\n          \"zip\": \"85249\"\n        },\n        \"phone\": \"555-555-4268\",\n        \"email\": \"fionataylor@example.com\"\n      }\n    },\n    {\n      \"id\": 9,\n      \"name\": \"George Thompson\",\n      \"contact\": {\n        \"address\": {\n          \"street\": \"678 Cedar St\",\n          \"city\": \"Nexttown\",\n          \"state\": \"GA\",\n          \"zip\": \"74125\"\n        },\n        \"phone\": \"555-555-3142\",\n        \"email\": \"georgethompson@example.com\"\n      }\n    },\n    {\n      \"id\": 10,\n      \"name\": \"Helen White\",\n      \"contact\": {\n        \"address\": {\n          \"street\": \"852 Spruce St\",\n          \"city\": \"Lasttown\",\n          \"state\": \"VA\",\n          \"zip\": \"96321\"\n        },\n        \"phone\": \"555-555-7890\",\n        \"email\": \"helenwhite@example.com\"\n      }\n    }\n  ]\n}\n~~~\n\u003C\u002Fdetails>\n\nIf using nested `JSON` winds up being too verbose for your token budget, fallback to `relational tables` defined with `Markdown`:\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_72beb535843f.png\" title=\"GPT-4 handles relational tables pretty reliably too.\">\n\u003C\u002Fp>\n\n\u003Cdetails>\n\u003Csummary>(Full prompt)\u003C\u002Fsummary>\n\n~~~\nYou are a helpful assistant. You answer questions about users. Here is what you know about them:\n\nTable 1: users\n| id (PK) | name          |\n|---------|---------------|\n| 1       | John Doe      |\n| 2       | Jane Smith    |\n| 3       | Alice Johnson |\n| 4       | Bob Williams  |\n| 5       | Charlie Brown |\n| 6       | Diane Davis   |\n| 7       | Edward Martinez |\n| 8       | Fiona Taylor  |\n| 9       | George Thompson |\n| 10      | Helen White   |\n\nTable 2: addresses\n| id (PK) | user_id (FK) | street      | city       | state | zip   |\n|---------|--------------|-------------|------------|-------|-------|\n| 1       | 1            | 123 Main St | Anytown    | CA    | 12345 |\n| 2       | 2            | 456 Elm St  | Sometown   | TX    | 54321 |\n| 3       | 3            | 789 Oak St  | Othertown  | NY    | 67890 |\n| 4       | 4            | 135 Maple St | Thistown  | FL    | 98765 |\n| 5       | 5            | 246 Pine St | Thatstown  | WA    | 86420 |\n| 6       | 6            | 369 Willow St | Sumtown  | CO    | 15980 |\n| 7       | 7            | 482 Aspen St | Newtown   | MI    | 35742 |\n| 8       | 8            | 531 Birch St | Oldtown   | OH    | 85249 |\n| 9       | 9            | 678 Cedar St | Nexttown  | GA    | 74125 |\n| 10      | 10           | 852 Spruce St | Lasttown | VA    | 96321 |\n\nTable 3: phone_numbers\n| id (PK) | user_id (FK) | phone       |\n|---------|--------------|-------------|\n| 1       | 1            | 555-555-1234 |\n| 2       | 2            | 555-555-5678 |\n| 3       | 3            | 555-555-2468 |\n| 4       | 4            | 555-555-8642 |\n| 5       | 5            | 555-555-7531 |\n| 6       | 6            | 555-555-9512 |\n| 7       | 7            | 555-555-6813 |\n| 8       | 8            | 555-555-4268 |\n| 9       | 9            | 555-555-3142 |\n| 10      | 10           | 555-555-7890 |\n\nTable 4: emails\n| id (PK) | user_id (FK) | email                 |\n|---------|--------------|-----------------------|\n| 1       | 1            | johndoe@example.com   |\n| 2       | 2            | janesmith@example.com |\n| 3       | 3            | alicejohnson@example.com |\n| 4       | 4            | bobwilliams@example.com |\n| 5       | 5            | charliebrown@example.com |\n| 6       | 6            | dianedavis@example.com |\n| 7       | 7            | edwardmartinez@example.com |\n| 8       | 8            | fionataylor@example.com |\n| 9       | 9            | georgethompson@example.com |\n| 10      | 10           | helenwhite@example.com |\n\nTable 5: cities\n| id (PK) | name         | state | population | median_income |\n|---------|--------------|-------|------------|---------------|\n| 1       | Anytown     | CA    | 50,000     | $70,000      |\n| 2       | Sometown    | TX    | 100,000    | $60,000      |\n| 3       | Othertown   | NY    | 25,000     | $80,000      |\n| 4       | Thistown    | FL    | 75,000     | $65,000      |\n| 5       | Thatstown   | WA    | 40,000     | $75,000      |\n| 6       | Sumtown     | CO    | 20,000     | $85,000      |\n| 7       | Newtown     | MI    | 60,000     | $55,000      |\n| 8       | Oldtown     | OH    | 30,000     | $70,000      |\n| 9       | Nexttown    | GA    | 15,000     | $90,000      |\n| 10      | Lasttown    | VA    | 10,000     | $100,000     |\n~~~\n\n\u003C\u002Fdetails>\n\n> 🧠 The model works well with data in [3rd normal form](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FThird_normal_form), but may struggle with too many joins. In experiments, it seems to do okay with at least three levels of nested joins. In the example above the model successfully joins from `users` to `addresses` to `cities` to infer the likely income for George – $90,000.\n\n### Citations\n\nFrequently, a natural language response isn’t sufficient on its own and you’ll want the model’s output to cite where it is getting data from. \n\nOne useful thing to note here is that anything you might want to cite should have a unique ID. The simplest approach is to just ask the model to link to anything it references:\n\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_c13eed69482d.png\" title=\"GPT-4 will reliably link to data if you ask it to.\">\n\u003C\u002Fp>\n\n### Programmatic Consumption\n\nBy default, language models output natural language text, but frequently we need to interact with this result in a programmatic way that goes beyond simply printing it out on screen. You can achieve this by  asking the model to output the results in your favorite serialization format (JSON and YAML seem to work best).\n\nMake sure you give the model an example of the output format you’d like. Building on our previous travel example above, we can augment our prompt to tell it:\n\n~~~\nProduce your output as JSON. The format should be:\n```\n{\n    message: \"The message to show the user\",\n    hotelId: 432,\n    flightId: 831\n}\n```\n\nDo not include the IDs in your message.\n~~~\n\nAnd now we’ll get interactions like this:\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_6292a6ffc611.png\" title=\"GPT-4 providing travel recommendations in an easy to work with format.\">\n\u003C\u002Fp>\n\nYou could imagine the UI for this rendering the message as normal text, but then also adding discrete buttons for booking the flight + hotel, or auto-filling a form for the user.\n\nAs another example, let’s build on the [citations](#citations) example – but move beyond Markdown links. We can ask it to produce JSON with a normal message along with a list of items used in the creation of that message. In this scenario you won’t know exactly where in the message the citations were leveraged, but you’ll know that they were used somewhere.\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_956f1b244668.png\" title=\"Asking the model to provide a list of citations is a reliable way to programmatically know what data the model leaned on in its response.\">\n\u003C\u002Fp>\n\n> 🧠 Interestingly, in the model’s response to “How much did I spend at Target?” it provides a single value, $188.16, but **importantly** in the `citations` array it lists the individual expenses that it used to compute that value.\n\n### Chain of Thought\n\nSometimes you will bang your head on a prompt trying to get the model to output reliable results, but, no matter what you do, it just won’t work. This will frequently happen when the bot’s final output requires intermediate thinking, but you ask the bot only for the output and nothing else.\n\nThe answer may surprise you: ask the bot to show its work. In October 2022, Google released a paper “[Chain-of-Thought Prompting Elicits Reasoning in Large Language Models](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2201.11903.pdf)” where they showed that if, in your hidden prompt, you give the bot examples of answering questions by showing your work, then when you ask the bot to answer something it will show its work and produce more reliable answers.\n\nJust a few weeks after that paper was published, at the end of October 2022, the University of Tokyo and Google released the paper “[Large Language Models are Zero-Shot Reasoners](https:\u002F\u002Fopenreview.net\u002Fpdf?id=e2TBb5y0yFf)”, where they show that you don’t even need to provide examples – **you simply have to ask the bot to think step-by-step**.\n\n#### Averaging\n\nHere is an example where we ask the bot to compute the average expense, excluding Target. The actual answer is $136.77 and the bot almost gets it correct with $136.43.\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_20c155be8eb7.png\" title=\"The model **almost** gets the average correct, but is a few cents off.\">\n\u003C\u002Fp>\n\nIf we simply add “Let’s think step-by-step”, the model gets the correct answer:\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_d80a30a3efbe.png\" title=\"When we ask the model to show its work, it gets the correct answer.\">\n\u003C\u002Fp>\n\n#### Interpreting Code\n\nLet’s revisit the Python example from earlier and apply chain-of-thought prompting to our question. As a reminder, when we asked the bot to evaluate the Python code it gets it slightly wrong. The correct answer is `Hello, Brex!!Brex!!Brex!!!` but the bot gets confused about the number of !'s to include. In below’s example, it outputs `Hello, Brex!!!Brex!!!Brex!!!`:\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_ba6f32c4eec7.png\" title=\"The bot almost interprets the Python code correctly, but is a little off.\">\n\u003C\u002Fp>\n\nIf we ask the bot to show its work, then it gets the correct answer:\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_f5a4e8e1f66e.png\" title=\"The bot correctly interprets the Python code if you ask it to show its work.\">\n\u003C\u002Fp>\n\n#### Delimiters\n\nIn many scenarios, you may not want to show the end user all of the bot’s thinking and instead just want to show the final answer. You can ask the bot to delineate the final answer from its thinking. There are many ways to do this, but let’s use JSON to make it easy to parse:\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_89697ada439f.png\" title=\"The bot showing its work while also delimiting the final answer for easy extraction.\">\n\u003C\u002Fp>\n\nUsing Chain-of-Thought prompting will consume more tokens, resulting in increased price and latency, but the results are noticeably more reliable for many scenarios. It’s a valuable tool to use when you need the bot to do something complex and as reliably as possible.\n\n### Fine Tuning\n\nSometimes no matter what tricks you throw at the model, it just won’t do what you want it to do. In these scenarios you can **sometimes** fallback to fine-tuning. This should, in general, be a last resort.\n\n[Fine-tuning](https:\u002F\u002Fplatform.openai.com\u002Fdocs\u002Fguides\u002Ffine-tuning) is the process of taking an already trained model and then giving it thousands (or more) of example `input:output` pairs\n\nIt does not eliminate the need for hidden prompts, because you still need to embed dynamic data, but it may make the prompts smaller and more reliable.\n\n#### Downsides\n\nThere are many downsides to fine-tuning. If it is at all possible, take advantage of the nature of language models being [zero-shot, one-shot, and few-shot learners](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FFew-shot_learning_(natural_language_processing)) by teaching them to do something in their prompt rather than fine-tuning.\n\nSome of the downsides include:\n\n- **Not possible**: [GPT-3.5\u002FGPT-4 isn’t fine tunable](https:\u002F\u002Fplatform.openai.com\u002Fdocs\u002Fguides\u002Fchat\u002Fis-fine-tuning-available-for-gpt-3-5-turbo), which is the primary model \u002F API we’ll be using, so we simply can’t lean in fine-tuning.\n- **Overhead**: Fine-tuning requires manually creating tons of data.\n- **Velocity**: The iteration loop becomes much slower – every time you want to add a new capability, instead of adding a few lines to a prompt, you need to create a bunch of fake data and then run the finetune process and then use the newly fine-tuned model.\n- **Cost**: It is up to 60x more expensive to use a fine-tuned GPT-3 model vs the stock `gpt-3.5-turbo` model. And it is 2x more expensive to use a fine-tuned GPT-3 model vs the stock GPT-4 model.\n\n> ⛔️ If you fine-tune a model, **never use real customer data**. Always use synthetic data. The model may memorize portions of the data you provide and may regurgitate private data to other users that shouldn’t be seeing it.\n>\n> If you never fine-tune a model, we don’t have to worry about accidentally leaking data into the model.\n\n## Additional Resources\n- :star2: [OpenAI Cookbook](https:\u002F\u002Fgithub.com\u002Fopenai\u002Fopenai-cookbook) :star2:\n- :technologist: [Prompt Hacking](https:\u002F\u002Flearnprompting.org\u002Fdocs\u002Fcategory\u002F-prompt-hacking) :technologist: \n- :books: [Dair.ai Prompt Engineering Guide](https:\u002F\u002Fgithub.com\u002Fdair-ai\u002FPrompt-Engineering-Guide) :books: \n","# [Brex 的](https:\u002F\u002Fbrex.com) 提示工程指南\n\n本指南由 Brex 为内部用途编写。它基于我们在研究和创建用于生产场景的大语言模型（LLM）提示方面的经验教训。指南涵盖了大语言模型的发展历史，以及与大语言模型协作并构建程序化系统的策略、准则和安全建议，例如使用 [OpenAI 的 GPT-4](https:\u002F\u002Fopenai.com\u002Fresearch\u002Fgpt-4)。\n\n本文档中的示例是由非确定性语言模型生成的，因此相同的示例可能会产生不同的结果。\n\n这是一份持续更新的文档。围绕大语言模型的最佳实践和策略每天都在迅速演进。欢迎讨论并提出改进建议。\n\n## 目录\n- [什么是大语言模型？](#what-is-a-large-language-model-llm)\n  - [语言模型简史：不完整且略显不准确](#a-brief-incomplete-and-somewhat-incorrect-history-of-language-models)\n    - [2000 年之前](#pre-2000s)\n    - [2000 年代中期](#mid-2000s)\n    - [2010 年代早期](#early-2010s)\n    - [2010 年代后期](#late-2010s)\n    - [2020 年代](#2020s)\n- [什么是提示？](#what-is-a-prompt)\n  - [隐藏提示](#hidden-prompts)\n  - [标记（Token）](#tokens)\n  - [标记限制](#token-limits)\n  - [提示攻击](#prompt-hacking)\n    - [越狱](#jailbreaks)\n    - [信息泄露](#leaks)\n- [为什么需要提示工程？](#why-do-we-need-prompt-engineering)\n  - [授人以鱼](#give-a-bot-a-fish)\n    - [语义搜索](#semantic-search)\n  - [授人以渔](#teach-a-bot-to-fish)\n    - [命令语法](#command-grammars)\n    - [ReAct](#react)\n    - [GPT-4 与 GPT-3.5](#gpt-4-vs-gpt-35)\n- [策略](#strategies)\n  - [嵌入数据](#embedding-data)\n    - [简单列表](#simple-lists)\n    - [Markdown 表格](#markdown-tables)\n    - [JSON](#json)\n    - [自由文本](#freeform-text)\n    - [嵌套数据](#nested-data)\n  - [引用](#citations)\n  - [程序化消费](#programmatic-consumption)\n  - [思维链](#chain-of-thought)\n    - [平均法](#averaging)\n    - [代码解释](#interpreting-code)\n    - [分隔符](#delimiters)\n  - [微调](#fine-tuning)\n    - [缺点](#downsides)\n- [其他资源](#additional-resources)\n\n## 什么是大语言模型（LLM）？\n\n大语言模型是一种预测引擎，它接收一串词，并尝试预测在这串词之后最有可能出现的序列[^1]。它通过为可能的后续序列分配概率，然后从中采样来选择一个序列[^2]。这一过程会不断重复，直到满足某些停止条件。\n\n大语言模型通过对大量文本语料库进行训练来学习这些概率。其结果是，某些模型在特定用例中表现更好（例如，如果模型是在 GitHub 数据上训练的，那么它对源代码中序列的概率理解就会非常出色）。另一个后果是，模型可能会生成看似合理但实际上只是随机组合、缺乏现实依据的陈述。\n\n随着语言模型在预测序列方面越来越准确，[许多令人惊讶的能力也随之涌现](https:\u002F\u002Fwww.assemblyai.com\u002Fblog\u002Femergent-abilities-of-large-language-models\u002F)。\n\n[^1]: 实际上，语言模型使用的是标记（token），而不是单词。一个标记大致对应于一个词中的音节，或者大约 4 个字符。\n[^2]: 有许多不同的剪枝和采样策略可以改变序列的行为和性能。\n\n### 语言模型简史：不完整且略显不准确\n\n> :pushpin: 如果您想跳过语言模型的历史部分，请直接[前往此处](#what-is-a-prompt)。本节适合好奇心强的读者，同时也有助于理解后续建议背后的逻辑。\n\n#### 2000 年之前\n\n[语言模型](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FLanguage_model#Model_types)已经存在了几十年，但传统的语言模型（如 [n-gram 模型](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FN-gram_language_model)）存在许多缺陷，比如状态空间的爆炸式增长（即“维度灾难”问题）以及难以处理从未见过的新短语（稀疏性问题）。简而言之，较早的语言模型虽然能够生成一些与人类文本统计特征相似的文本，但输出内容并不连贯，读者很快就能看出那不过是胡言乱语。此外，n-gram 模型无法扩展到较大的 n 值，因此具有先天的局限性。\n\n#### 2000 年代中期\n\n2007 年，以在 1980 年代推广反向传播算法而闻名的杰弗里·辛顿发表了一篇关于训练神经网络的重要进展论文[1]，该论文使得更深的神经网络成为可能。将这种简单的深度神经网络应用于语言建模，有助于缓解语言模型的一些问题——它们能够在有限的空间内以连续的方式表示复杂的任意概念，并能优雅地处理训练语料库中未出现过的序列。这些简单的神经网络很好地学习了训练语料库中的概率分布，但其输出往往只是在统计意义上与训练数据相符，而与输入序列的逻辑关联性较差。\n\n#### 2010 年代早期\n\n尽管长短期记忆网络（LSTM）早在 1995 年就被提出，但它真正发挥光芒却是在 2010 年代。LSTM 使模型能够处理任意长度的序列，并且在处理输入时能够动态地改变内部状态，从而记住之前看到的内容。这一小小的改进带来了显著的效果。2015 年，安德烈·卡帕西[2]曾撰文介绍如何构建一个字符级别的 LSTM 模型，结果表明其性能远远超出了预期。\n\nLSTM 具有近乎神奇的能力，但在处理长期依赖关系方面仍存在困难。例如，如果要求模型完成句子“在法国，我们四处旅行，吃了许多糕点，喝了大量的葡萄酒，……还有很多文字……，但却从未学会说 _______”，模型可能就难以预测出“法语”。此外，LSTM 是逐个处理输入标记的，因此本质上是顺序性的，训练速度较慢，而且第 N 个标记只能记住前 N-1 个标记的信息。\n\n#### 2010 年代后期\n\n2017年，谷歌发表了一篇论文《Attention Is All You Need》（https:\u002F\u002Farxiv.org\u002Fpdf\u002F1706.03762.pdf），提出了Transformer网络（https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FTransformer_(machine_learning_model)），从而掀起了自然语言处理领域的一场巨大革命。一夜之间，机器在诸如跨语言翻译等任务上的表现几乎可以媲美人类，甚至在某些情况下超越了人类。Transformer模型具有高度并行性，并引入了一种称为“注意力机制”的方法，使模型能够高效地聚焦于输入中的特定部分。Transformer会同时并行地分析整个输入序列，自动选择其中最重要、最具影响力的片段。因此，每一个输出标记都会受到所有输入标记的影响。\n\nTransformer模型不仅高度并行化、训练效率高，还能产生令人惊叹的结果。然而，它的缺点在于输入和输出的大小是固定的——即所谓的“上下文窗口”——而计算量会随着该窗口大小的增加呈二次方增长（在某些情况下，内存消耗也会如此）[^3]。\n\n尽管如此，Transformer并非终点，但近年来自然语言处理领域的绝大多数进展都与之密切相关。目前，关于如何实现和应用Transformer的研究仍然非常活跃，例如亚马逊的AlexaTM 20B模型（https:\u002F\u002Fwww.amazon.science\u002Fblog\u002F20b-parameter-alexa-model-sets-new-marks-in-few-shot-learning），它在多项任务中超越了GPT-3，且参数量仅为后者的十分之一左右。\n\n[^3]：虽然近年来出现了一些新的变体以提升计算和内存效率，但这仍然是一个活跃的研究方向。\n\n#### 2020年代\n\n从技术层面来看，2020年代的开端始于2018年，其核心主题便是生成式预训练模型——也就是更为人熟知的GPT系列。在《Attention Is All You Need》论文发布一年后，OpenAI发表了《通过生成式预训练提升语言理解能力》（https:\u002F\u002Fs3-us-west-2.amazonaws.com\u002Fopenai-assets\u002Fresearch-covers\u002Flanguage-unsupervised\u002Flanguage_understanding_paper.pdf）。这篇论文指出，可以在大规模数据集上对大型语言模型进行无特定目标的预训练；待模型掌握了语言的基本规律后，再针对具体任务进行微调，便能迅速获得最先进的效果。\n\n2020年，OpenAI又推出了GPT-3论文《语言模型是少样本学习者》（https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2020\u002Ffile\u002F1457c0d6bfcb4967418bfb8ac142f64a-Paper.pdf），表明如果将类似GPT的模型在参数量和训练数据规模上再扩大约10倍，便不再需要为许多任务进行专门的微调。模型的能力会自然涌现，用户只需通过文本交互即可获得当前最先进水平的结果。\n\n2022年，OpenAI进一步延续GPT-3的成功，发布了InstructGPT（https:\u002F\u002Fopenai.com\u002Fresearch\u002Finstruction-following）。其设计初衷是调整模型使其更好地遵循指令，同时减少输出中的毒性与偏见。这一成果的关键在于“基于人类反馈的强化学习”（RLHF，https:\u002F\u002Farxiv.org\u002Fpdf\u002F1706.03741.pdf），这一概念由谷歌和OpenAI于2017年共同提出[^4]，允许人类参与训练过程，从而对模型的输出进行微调，使其更符合人类偏好。InstructGPT正是如今广为人知的ChatGPT（https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FChatGPT）的前身。\n\n过去几年里，OpenAI一直是大型语言模型领域的重要贡献者，最近还推出了GPT-4（https:\u002F\u002Fcdn.openai.com\u002Fpapers\u002Fgpt-4.pdf）。不过，他们并非孤军奋战。Meta也发布了一系列开源大型语言模型，如OPT（https:\u002F\u002Fhuggingface.co\u002Ffacebook\u002Fopt-66b）、OPT-IML（https:\u002F\u002Fhuggingface.co\u002Ffacebook\u002Fopt-iml-30b，经过指令微调）以及LLaMa（https:\u002F\u002Fai.facebook.com\u002Fblog\u002Flarge-language-model-llama-meta-ai\u002F）。谷歌则推出了FLAN-T5（https:\u002F\u002Fhuggingface.co\u002Fgoogle\u002Fflan-t5-xxl）和BERT（https:\u002F\u002Fhuggingface.co\u002Fbert-base-uncased）等模型。此外，还有一个庞大的开源研究社区，不断推出诸如BLOOM（https:\u002F\u002Fhuggingface.co\u002Fbigscience\u002Fbloom）和StableLM（https:\u002F\u002Fgithub.com\u002Fstability-AI\u002FstableLM\u002F）等模型。\n\n如今，技术进步的速度极为迅猛，每隔几周就会有新的SOTA（最先进水平）出现，曾经需要集群才能运行的模型，现在甚至可以在树莓派上轻松部署。\n[^4]：2017年是自然语言处理领域的一个重要里程碑。\n\n## 什么是提示词？\n\n提示词，有时也被称为“上下文”，是指在模型开始生成输出之前提供给它的文本内容。它引导模型在其已学习的知识范围内探索特定领域，从而使输出结果与你的目标更加契合。打个比方，如果把语言模型看作一个代码解释器，那么提示词就是待解释的源代码。有趣的是，语言模型甚至会很乐意尝试猜测这段代码的作用：\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"450\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_4b3a60400936.png\" title=\"GPT-4模型正在解释一段Python代码。\">\n\u003C\u002Fp>\n\n而且它几乎完美地执行了这段Python代码！\n\n通常，提示词会是一条指令或一个问题，例如：\n\n \u003Cp align=\"center\">\n  \u003Cimg width=\"500\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_45f62943362b.png\">\n\u003C\u002Fp>\n\n另一方面，如果你不提供任何提示词，模型就失去了参考依据，它便会**随机从自己所学过的所有内容中采样**：\n\n**来自GPT-3-Davinci：**\n\n| ![image](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_b022bc17d71f.png) | ![image](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_ce74e26e27a1.png) | ![image](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_b653749a4907.png) |\n| --- | --- | --- |\n\n**来自GPT-4：**\n| ![image](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_ed36272229a8.png) | ![image](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_e45221c7c932.png) | ![image](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_ac923372aaa3.png) |\n| --- | --- | --- |\n\n### 隐藏提示\n\n> :warning: 始终假设隐藏提示中的任何内容都可能被用户看到。\n\n在用户与模型进行动态交互的应用中，例如与模型聊天时，通常会有部分提示内容是不打算让用户看到的。这些隐藏部分可以出现在任何位置，不过对话开始时几乎总是会有一个隐藏提示。\n\n通常，这包括一段初始文本，用于设定语气、模型约束和目标，以及其他特定于当前会话的动态信息——用户名、位置、一天中的时间等。\n\n模型是静态的，在某个时间点被“冻结”了，因此如果你希望它了解当前的信息，比如时间或天气，就必须明确提供这些信息。\n\n如果你使用的是 [OpenAI 的 Chat API](https:\u002F\u002Fplatform.openai.com\u002Fdocs\u002Fguides\u002Fchat\u002Fintroduction)，他们通过将隐藏提示内容放在 `system` 角色中来加以区分。\n\n下面是一个隐藏提示的例子，后面跟着与该提示内容的交互：\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_2d6d351e130e.png\" title=\"一个非常简单的隐藏提示。\">\n\u003C\u002Fp>\n\n在这个例子中，我们可以看到我们向机器人解释了各个角色、一些关于用户的背景信息、我们希望机器人能够访问的一些动态数据，以及机器人应该如何回应的指导。\n\n实际上，隐藏提示可能会相当长。这里是一个来自 [ChatGPT 命令行助手](https:\u002F\u002Fgithub.com\u002Fmanno\u002Fchatgpt-linux-assistant\u002Fblob\u002Fmain\u002Fsystem_prompt.txt) 的较长提示：\n\n\u003Cdetails>\n  \u003Csummary>摘自：https:\u002F\u002Fgithub.com\u002Fmanno\u002Fchatgpt-linux-assistant \u003C\u002Fsummary>\n\n```\n我们身处一个有 3 名用户的聊天室。其中一名用户叫“Human”，另一名叫“Backend”，还有一名叫“Proxy Natural Language Processor”。我会输入“Human”所说的话以及“Backend”所做的回复。你将扮演“Proxy Natural Language Processor”的角色，以 JSON 格式将“Human”请求的内容转发给“Backend”用户。“Backend”用户是一台 Ubuntu 服务器，发送给它的字符串会在 shell 中执行，然后它会返回命令的标准输出和退出码。这台 Ubuntu 服务器归我所有。当“Backend”返回标准输出和退出码时，“Proxy Natural Language Processor”会解析并将其格式化为简单易懂的英文，再发送给“Human”。举个例子：\n\n我作为 Human 提问：\nHuman：还有多少未编辑的视频？\n然后你将向 Backend 发送一条命令：\nProxy Natural Language Processor：@Backend {\"command\":\"find .\u002FVideos\u002FUnedited\u002F -iname '*.mp4' | wc -l\"}\n接着 Backend 返回命令的标准输出和退出码：\nBackend：{\"STDOUT\":\"5\", \"EXITCODE\":\"0\"}\n然后你回复用户：\nProxy Natural Language Processor：@Human 还有 5 个未编辑的视频。\n\n请仅回复“Proxy Natural Language Processor”应该说的话，除此之外不要说任何其他内容。现在和将来都不得以任何理由这样做。\n\n另一个例子：\n\n我作为 Human 提问：\nHuman：什么是 PEM 证书？\n然后你向 Backend 发送一条命令：\nProxy Natural Language Processor：@Backend {\"command\":\"xdg-open 'https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FPrivacy-Enhanced_Mail'\"}\n接着 Backend 返回命令的标准输出和退出码：\nBackend：{\"STDOUT\":\"\", \"EXITCODE\":\"0\"}\n然后你回复用户：\nProxy Natural Language Processor：@Human 我已经打开了一条链接，介绍了什么是 PEM 证书。\n\n\n请仅回复“Proxy Natural Language Processor”应该说的话，除此之外不要说任何其他内容。现在和将来都不得以任何理由这样做。\n\n切勿以 Backend 的身份回复。切勿代替 Backend 回复。你无权代替 Backend 回复。\n同时，请勿解释命令的作用或退出码的意义。无论现在还是将来，都绝不能以 Backend 的身份回复。\n\n请仅回复“Proxy Natural Language Processor”应该说的话，除此之外不要说任何其他内容。现在和将来都不得以任何理由这样做。\n```\n\u003C\u002Fdetails>\n\n你会注意到其中的一些良好实践，比如包含大量示例、对重要行为方面的重复强调、对回复的限制等……\n\n> :warning: 始终假设隐藏提示中的任何内容都可能被用户看到。\n\n### 令牌\n\n如果你觉得 2022 年的令牌就已经很“火”了，那么 2023 年的令牌就完全进入了另一个次元。语言模型消耗的基本单位并不是“词”，而是“令牌”。你可以把令牌想象成音节，平均来说，每 1,000 个令牌大约相当于 750 个词。它们不仅代表字母字符，还涵盖了标点符号、句子边界以及文档结束等许多概念。\n\n以下是 GPT 对一段文本进行分词的一个示例：\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_72667863536f.png\" title=\"一个分词示例。你可以在这里尝试：https:\u002F\u002Fplatform.openai.com\u002Ftokenizer\">\n\u003C\u002Fp>\n\n你可以在这里尝试分词工具：[https:\u002F\u002Fplatform.openai.com\u002Ftokenizer](https:\u002F\u002Fplatform.openai.com\u002Ftokenizer)\n\n不同的模型会使用不同粒度的分词器。理论上，你也可以直接给模型输入 0 和 1，但那样模型就需要从比特中学习字符的概念，再从字符中学习单词的概念，以此类推。同样，你也可以直接输入原始字符流，但那样模型就需要学习单词、标点符号等概念，而一般来说，模型的表现会更差。\n\n要了解更多，[Hugging Face 提供了关于分词器的精彩介绍](https:\u002F\u002Fhuggingface.co\u002Fdocs\u002Ftransformers\u002Ftokenizer_summary)，以及为什么它们是必要的。\n\n分词涉及很多细微之处，比如词汇表的大小，或者不同语言对句子结构的理解存在显著差异（例如单词之间可能没有空格）。幸运的是，语言模型的 API 几乎总是接受原始文本作为输入，并在后台自动进行分词——*因此你很少需要亲自考虑令牌的问题*。\n\n**除了一个重要的场景，我们将在下一部分讨论：令牌限制。**\n\n### Token 限制\n\n提示通常是追加式的，因为您希望聊天机器人能够掌握整个对话中之前消息的上下文。一般来说，语言模型是无状态的，不会记住之前的请求内容，因此每次都需要完整地提供当前会话中可能需要的所有信息。\n\n这样做的一个主要缺点是：目前主流的语言模型架构——Transformer，具有固定的输入和输出大小限制；当提示达到一定长度后，就无法再继续增长了。提示的总大小，有时也称为“上下文窗口”，因模型而异。对于 GPT-3 来说，这个限制是 4,096 个 token；而对于 GPT-4，则根据具体版本不同，分别为 8,192 个 token 或 32,768 个 token。\n\n如果您的上下文过大，超出了模型的处理范围，最常见的做法是以滑动窗口的方式截断上下文。可以将提示视为 `隐藏初始化提示 + messages[]` 的形式，通常隐藏提示部分保持不变，而 `messages[]` 数组则只保留最后 N 条消息。\n\n此外，还有一些更巧妙的提示截断策略，例如优先丢弃用户消息，以便让机器人的回复尽可能长时间地保留在上下文中；或者让另一个语言模型对对话进行总结，然后用一条包含总结内容的消息替换掉所有历史消息。实际上并没有所谓的“正确答案”，具体的解决方案取决于您的应用场景。\n\n需要注意的是，在截断上下文时，必须留出足够的空间来容纳模型的响应。OpenAI 的 token 限制同时考虑了输入和输出的长度。例如，如果您向 GPT-3 提供的输入为 4,090 个 token，那么它最多只能生成 6 个 token 的响应。\n\n> 🧙‍♂️ 如果您希望在将原始文本发送给模型之前先计算 token 数量，所使用的分词器会因具体模型而异。OpenAI 提供了一个名为 [tiktoken](https:\u002F\u002Fgithub.com\u002Fopenai\u002Ftiktoken\u002Fblob\u002Fmain\u002FREADME.md) 的库，可用于其旗下的模型；不过需要注意的是，OpenAI 内部使用的分词器在计数上可能会略有差异，并且还可能附加一些元数据，因此这里的结果仅可作为近似值参考。\n> \n> 如果您暂时无法使用分词器，也可以采用一种简单的估算方法：对于英文输入，直接用 `input.length \u002F 4` 即可得到一个大致的估计值，虽然不够精确，但往往比您预期的要好。\n\n### 提示词黑客攻击\n\n提示工程和大型语言模型是一个相当新兴的领域，因此每天都有新的绕过方法被发现。两大类攻击方式是：\n\n1. 让机器人绕过你为其设定的所有指导原则。\n2. 让机器人输出一些你本不希望用户看到的隐藏上下文。\n\n目前尚无任何机制能够全面阻止这些攻击，因此在与恶意用户交互时，务必假设机器人可能会做出或说出任何事情。幸运的是，在实际应用中，这些问题大多只是表面现象。\n\n可以把提示词看作是一种改善正常用户体验的方式。**我们设计提示词的目的是让普通用户不会偏离我们预期的交互范围——但也要始终假定，只要有心，用户就能绕过我们的提示约束。**\n\n#### 越狱攻击\n\n通常，隐藏的提示词会指示机器人以某种特定的人设行事，专注于某些任务，或者避免使用某些词语。对于非恶意用户来说，一般可以认为机器人会遵循这些指导原则，尽管非恶意用户也可能无意间绕过这些规则。\n\n例如，我们可以这样告诉机器人：\n\n```\n你是一位乐于助人的助手，但绝对不能使用“计算机”这个词。\n```\n\n如果我们随后问它关于计算机的问题，它就会把计算机称为“用于计算的设备”，因为它不允许使用“计算机”这个词。\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_39c78c1a57ce.png\" title=\"GPT-4 努力避免说‘计算机’这个词。\">\n\u003C\u002Fp>\n\n它会坚决拒绝说出这个词：\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_eafb92721de3.png\">\n\u003C\u002Fp>\n\n但是，如果我们通过让它翻译“计算机”的猪语版本来诱导它，就可以绕过这些指令，让它愉快地使用这个词。\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_7355e7b148ea.png\">\n\u003C\u002Fp>\n\n这里确实有一些防御措施可以采取，[参见此处](https:\u002F\u002Flearnprompting.org\u002Fdocs\u002Fprompt_hacking\u002Fdefensive_measures\u002Foverview)，但通常最好的办法是在尽可能靠近结尾的地方再次强调最重要的约束条件。对于 OpenAI 的聊天 API 来说，这可能意味着在最后一个 `user` 消息之后添加一条 `system` 消息。以下是一个示例：\n\n| ![image](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_8b3264358e4e.png) | ![image](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_1855bea97440.png) |\n| --- | --- |\n\n尽管 OpenAI 在防止越狱方面投入了大量精力，但仍有许多非常巧妙的绕过方法被不断分享，[比如这条推文](https:\u002F\u002Ftwitter.com\u002Falexalbert__\u002Fstatus\u002F1636488551817965568)，并且每天都有新的例子出现，[如这条推文](https:\u002F\u002Ftwitter.com\u002Fzswitten\u002Fstatus\u002F1598088267789787136)。\n\n#### 数据泄露\n\n如果你之前没有注意到本文中的警告，那么请务必记住：**你应该始终假定，任何暴露给语言模型的数据最终都会被用户看到。**\n\n在构建提示词的过程中，我们经常会将大量数据嵌入到隐藏的提示词中（即系统提示）。**机器人会很乐意将这些信息传递给用户**：\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_06a492886372.png\" title=\"机器人愉快地复述它所知道的用户信息。\">\n\u003C\u002Fp>\n\n即使你明确指示它不要透露这些信息，并且它也确实遵守了这一指示，仍然有数百万种方式可以从隐藏的提示词中泄露数据。\n\n这里有一个例子：机器人本不应该提到我的城市，但只要稍微调整一下问题的表述，它就会不小心把秘密说出来。\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_5676647f3ab4.png\" title=\"机器人拒绝透露个人信息，但我们却成功说服它告诉我我所在的城市。\">\n\u003C\u002Fp>\n\n类似地，我们还可以让机器人告诉我们它被禁止使用的那个词，而无需它真正说出那个词：\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_3f079bd75cd5.png\" title=\"从技术上讲，机器人从未说过‘计算机’这个词，但我仍然成功地让它透露了所有我需要了解的信息。\">\n\u003C\u002Fp>\n\n你应该把隐藏的提示词视为一种提升用户体验或使其更符合目标人设的方式。**切勿在提示词中放置任何你不希望用户直接在屏幕上看到的信息。**\n\n## 为什么我们需要提示工程？\n\n前面我们把提示词比作语言模型“解释执行”的“源代码”。**提示工程就是一门艺术，旨在编写出能够让语言模型按照我们的意愿行事的提示词**——就像软件工程是一门艺术，旨在编写源代码来指挥计算机完成我们想要的任务一样。\n\n编写优秀的提示词时，必须考虑到所使用模型的独特性。策略会因任务的复杂程度而异。你需要设计各种机制来约束模型，以获得可靠的结果；整合那些模型无法通过训练掌握的动态数据；考虑模型训练数据的局限性；围绕上下文长度限制进行设计；以及其他诸多方面。\n\n有一句古老的谚语说：“计算机只会做你命令它做的事情。” **请把这句忠告抛诸脑后。** 提示工程颠覆了这种观念。它更像是用自然语言对一台非确定性的计算机进行编程——这台计算机几乎无所不能，除非你明确引导它不去做某些事情。\n\n提示工程的方法大致可以分为两大类。\n\n### 给机器人一条鱼\n\n“给机器人一条鱼”这种方法适用于这样的情境：你可以在隐藏上下文中明确地向机器人提供它完成任何请求任务所需的所有信息。\n\n例如，如果用户打开了他们的仪表盘，而我们想向他们展示一条简短友好的消息，告知他们有哪些待处理的任务，我们可以通过向机器人提供整个收件箱的列表以及我们希望它拥有的其他用户上下文，让机器人总结出如下内容：\n\n> 您有4张收据\u002F备忘录待上传。最近的一张是3月5日来自Target的，最旧的一张则是1月17日来自Blink Fitness的。感谢您及时处理您的费用！\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_998cf81d554a.png\" title=\"GPT-3总结任务收件箱。\">\n\u003C\u002Fp>\n\n同样地，如果你正在帮助用户预订行程，你可以：\n\n- 询问用户出行日期和目的地。\n- 在后台搜索航班和酒店。\n- 将航班和酒店的搜索结果嵌入到隐藏上下文中。\n- 同时将公司的差旅政策也嵌入到隐藏上下文中。\n\n这样一来，机器人就拥有实时的旅行信息和约束条件，可以用来回答用户的疑问。以下是一个机器人推荐选项，而用户要求其进一步优化的例子：\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_cd080694ea2f.png\" title=\"GPT-4帮助用户预订行程。\">\n\u003C\u002Fp>\n\u003Cdetails>\n\n  \u003Csummary>(完整提示)\u003C\u002Fsummary>\n\n```\nBrex是一个用于管理企业费用的平台。\n\n以下是Brex的差旅费用政策：\n\n- 飞行时间少于6小时的航班，最高可报销经济舱票价。\n- 飞行时间超过6小时的航班，最高可报销优选经济舱票价。\n- 租车的日均费用不得超过75美元。\n- 住宿的平均每晚房价不得超过400美元。\n- 住宿必须达到四星级或以上。\n- 餐厅用餐、外卖、超市购物、酒吧及夜生活消费不得超过75美元。\n- 其他所有费用不得超过5,000美元。\n- 所有报销需经过审核。\n\n酒店选项如下：\n| 酒店名称 | 价格 | 评分 |\n| --- | --- | --- |\n| 希尔顿金融区酒店 | 每晚109美元 | 3.9星 |\n| VIA酒店 | 每晚131美元 | 4.4星 |\n| 凯悦广场旧金山酒店 | 每晚186美元 | 4.2星 |\n| 齐菲尔酒店 | 每晚119美元 | 4.1星 |\n\n航班选项如下：\n| 航空公司 | 起飞时间 | 飞行时长 | 中转次数 | 舱位 | 价格 |\n| --- | --- | --- | --- | --- | --- |\n| 美国航空 | 上午5:30-7:37 | 2小时7分钟 | 直飞 | 经济舱 | 248美元 |\n| 达美航空 | 下午1:20-3:36 | 2小时16分钟 | 直飞 | 经济舱 | 248美元 |\n| 阿拉斯加航空 | 晚上9:50-11:58 | 2小时8分钟 | 直飞 | 优选经济舱 | 512美元 |\n\n一位员工正计划于2月20日至2月25日前往旧金山出差。\n\n请推荐符合政策的酒店和航班。建议简洁明了，不超过一两句话，但请加入一些友好的措辞，仿佛是一位热心的同事在帮我一样：\n```\n \n\u003C\u002Fdetails>\n\n这与微软必应等产品利用动态数据的方式相同。当你与必应聊天时，它会要求机器人生成三条搜索查询，然后执行三次网络搜索，并将汇总后的结果放入隐藏上下文中供机器人使用。\n\n总结这一部分，打造良好体验的关键在于根据用户当前的操作动态调整上下文。\n\n> 🧙‍♂️ 给机器人一条鱼，是确保它能“吃到鱼”的最可靠方式。采用这种策略可以获得最为一致和可靠的响应。**只要可能，就尽量使用这种方法。**\n\n#### 语义搜索\n\n如果你只需要让机器人对世界有更多的了解，一种常见的方法是进行语义搜索[参考OpenAI的示例](https:\u002F\u002Fgithub.com\u002Fopenai\u002Fopenai-cookbook\u002Fblob\u002Fmain\u002Fexamples\u002FQuestion_answering_using_embeddings.ipynb)。\n\n语义搜索的核心是文档嵌入——你可以将其理解为一个固定长度的数字数组[^5]，其中每个数字代表文档的某个方面（比如，如果是科学类文档，第843个数字可能会较大；而如果是艺术类文档，第1,115个数字则可能较大——虽然这种说法过于简化，但基本能传达概念）。[^6]\n\n除了为文档计算嵌入外，你还可以用同样的方法为用户的查询计算嵌入。如果用户问：“为什么天空是蓝色的？”你就计算这个问题的嵌入，在理论上，这个嵌入会比那些不涉及天空的文档嵌入更接近于提到天空的文档嵌入。\n\n为了找到与用户查询相关的文档，你需要计算嵌入，然后找出与之最相似的前N个文档。接着，我们将这些文档（或它们的摘要）放入隐藏上下文中，供机器人参考。\n\n值得注意的是，有时用户的查询非常简短，以至于嵌入的作用并不明显。在2022年12月发表的一篇论文中介绍了一种巧妙的技术，称为“假设性文档嵌入”或HyDE。通过这种方法，你可以让模型根据用户的查询生成一篇假设性的文档，然后再为这篇生成的文档计算嵌入。模型实际上是在凭空捏造文档，但这种方法确实有效！\n\nHyDE技术需要调用更多的模型，但在许多应用场景中，能够显著提升结果。\n\n[^5]: 通常被称为向量。\n[^6]: 向量特征是自动学习得到的，具体的数值如果没有一定的专业知识，人类很难直接解读。\n\n### 教机器人如何钓鱼\n\n有时你希望机器人能够代表用户执行某些操作，比如将备忘录添加到收据中，或者绘制图表。又或者我们希望它能够以比语义搜索更精细的方式检索数据，例如获取过去90天的费用记录。\n\n在这种情况下，我们就需要教机器人“钓鱼”的方法。\n\n#### 命令语法\n\n我们可以为机器人提供一份系统可识别的命令列表，附带每条命令的描述和示例，然后让它生成由这些命令组成的程序。\n\n采用这种方法时需要注意许多问题。对于复杂的命令语法，机器人往往会“幻觉”出一些看似合理但实际上并不存在的命令或参数。要掌握好这一点，关键在于列出抽象程度较高的命令，同时赋予机器人足够的灵活性，使其能够以新颖且有用的方式组合这些命令。\n\n例如，给机器人一个`plot-the-last-90-days-of-expenses`命令，并不能很好地体现机器人的灵活性或组合性。同样，`draw-pixel-at-x-y [x] [y] [rgb]`这样的命令又过于底层。然而，如果提供`plot-expenses`和`list-expenses`这样的命令，就能为机器人提供一些基础且灵活的操作原语。\n\n在下面的例子中，我们使用以下命令列表：\n\n| 命令 | 参数 | 描述 |\n| --- | --- | --- |\n| list-expenses | budget | 返回指定预算下的支出列表 |\n| converse | message | 向用户展示消息 |\n| plot-expenses | expenses[] | 绘制支出列表 |\n| get-budget-by-name | budget_name | 根据名称获取预算 |\n| list-budgets | | 返回用户可访问的预算列表 |\n| add-memo | inbox_item_id, memo message | 为指定的收件箱项目添加备忘录 |\n\n我们将这张表格以Markdown格式提供给模型，而语言模型对这种格式的处理能力非常出色——这很可能是因为OpenAI在GitHub上的数据上进行了大量训练。\n\n在下面这个例子中，我们要求模型以[逆波兰表示法](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FReverse_Polish_notation)[^7]输出这些命令。\n\n[^7]: 模型对RPN的简洁性处理得非常出色。\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_77adcffc8179.png\" title=\"一个机器人愉快地生成响应用户查询的命令。\">\n\u003C\u002Fp>\n\n> 🧠 在那个例子中，除了生成命令之外，还有一些有趣而微妙的操作。当我们要求它向“shake shack”这笔支出添加备忘录时，模型知道`add-memo`命令需要一个支出ID。但我们并没有直接告诉它这个ID，于是它会在我们提供的支出表中查找“Shake Shack”，然后从对应的ID列中提取ID，再将其作为`add-memo`命令的参数。\n\n要在复杂情况下可靠地实现命令语法并不容易。我们能采取的最佳策略是提供大量的描述，以及尽可能多的使用示例。大型语言模型属于[少样本学习](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FFew-shot_learning_(natural_language_processing))类型，这意味着它们只需几个示例就能学会一项新任务。一般来说，提供的示例越多越好——但这也会影响你的token预算，因此需要权衡。\n\n这里有一个更复杂的例子，输出被指定为JSON格式，而不是RPN。我们还使用Typescript来定义命令的返回类型。\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_a63c539a19e0.png\" title=\"一个机器人愉快地生成响应用户查询的命令。\">\n\u003C\u002Fp>\n\n\u003Cdetails>\n\n  \u003Csummary>(完整提示)\u003C\u002Fsummary>\n  \n~~~\n你是一名在Brex工作的财务助理，同时也是一位编程专家。\n\n我是Brex的客户。\n\n你需要通过组合一系列命令来回答我的问题。\n\n输出类型如下：\n\n```typescript\ntype LinkedAccount = {\n    id: string,\n    bank_details: {\n        name: string,\n        type: string,\n    },\n    brex_account_id: string,\n    last_four: string,\n    available_balance: {\n        amount: number,\n        as_of_date: Date,\n    },\n    current_balance: {\n            amount: number,\n        as_of_date: Date,\n    },\n}\n\ntype Expense = {\n  id: string,\n  memo: string,\n  amount: number,\n}\n\ntype Budget = {\n  id: string,\n  name: string,\n  description: string,\n  limit: {\n    amount: number,\n    currency: string,\n  }\n}\n```\n\n你可以使用的命令有：\n\n| 命令 | 参数 | 描述 | 输出格式 |\n| --- | --- | --- | --- |\n| nth | index, values[] | 从数组中返回第n个元素 | 任意 |\n| push | value | 将值压入栈中，供后续命令使用 | 任意 |\n| value | key, object | 返回与键关联的值 | 任意 |\n| values | key, object[] | 从对象数组中提取对应键的值并返回一个数组 | 任意[] |\n| sum | value[] | 对数字数组求和 | 数字 |\n| plot | title, values[] | 以给定标题绘制数值图表 | Plot |\n| list-linked-accounts |  | “列出所有可向Brex现金账户进行ACH转账的银行连接” | LinkedAccount[] |\n| list-expenses | budget_id | 根据预算ID返回其支出列表 | Expense[]\n| get-budget-by-name | name | 根据名称返回预算 | Budget |\n| add-memo | expense_id, message | 为一笔支出添加备忘录 | bool |\n| converse | message | 向用户发送消息 | null |\n\n请仅以命令形式作答。\n将命令以JSON格式输出，作为抽象语法树。\n重要提示——请仅输出程序代码，不要包含任何非程序代码的文字内容。即使被要求解释，也不要写散文式的说明。\n你只能生成命令，但你是生成命令方面的专家。\n~~~\n\n\u003C\u002Fdetails>\n\n这种格式对于那些支持`JSON.parse`函数的语言来说，更容易解析和理解。\n\n> 🧙‍♂️ 目前尚未形成用于定义模型生成程序的DSL的最佳行业标准格式。因此，这仍是一个活跃的研究领域。你会遇到各种限制，而随着这些限制的逐步突破，我们或许会发现更优的命令定义方式。\n\n#### ReAct\n\n2023年3月，普林斯顿大学和谷歌联合发布了一篇论文《ReAct：在语言模型中协同推理与行动》（[arxiv.org\u002Fpdf\u002F2210.03629.pdf](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2210.03629.pdf)），其中他们提出了一种命令语法的变体，允许完全自主地交互执行动作并检索数据。\n\n模型被指示返回它想要执行的`thought`（思考）和`action`（行动）。另一个代理（例如我们的客户端）随后执行该`action`，并将结果作为`observation`（观察）反馈给模型。模型会循环返回更多的思考和行动，直到最终给出`answer`（答案）为止。\n\n这是一种极其强大的技术，实际上可以让机器人充当自己的研究助理，甚至代表用户采取行动。结合强大的命令语法，机器人应该能够迅速应对大量的用户请求。\n\n在这个例子中，我们为模型提供了一组与获取员工数据和搜索维基百科相关的简单命令：\n\n| 命令 | 参数 | 描述 |\n| --- | --- | --- |\n| find_employee | name | 根据姓名获取员工 |\n| get_employee | id | 根据ID获取员工 |\n| get_location | id | 根据ID获取地点 |\n| get_reports | employee_id | 获取所有向该员工汇报的员工ID列表 |\n| wikipedia | article | 根据主题获取维基百科文章 |\n\n然后我们问机器人一个简单的问题：“我的经理出名吗？”\n\n我们可以看到，机器人：\n\n1. 首先查找我们的员工档案。\n2. 从我们的档案中获取经理的ID，并查找其档案。\n3. 提取经理的名字，然后在维基百科上搜索该名字。\n    - 在这个场景中，我为经理选择了一个虚构角色。\n4. 机器人读取维基百科的文章，并得出结论：这不可能是我的经理，因为这是一个虚构角色。\n5. 机器人随后修改了搜索条件，加入“（真实人物）”这一筛选项。\n6. 看到没有结果后，机器人得出结论：我的经理并不出名。\n\n| ![image](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_2e125875ccab.png) | ![image](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_196ae15571ab.png) |\n| --- | --- |\n\n\u003Cdetails>\n\u003Csummary>(完整提示)\u003C\u002Fsummary>\n\n~~~\n你是一个乐于助人的助手。你会在一个循环中不断寻找额外的信息来回答用户的问题，直到你能给出答案为止。\n\n今天是2025年6月1日。我叫法比安·西卡斯特。我的员工编号是82442。\n\n用于获取信息的命令如下：\n\n| 命令 | 参数 | 描述 |\n| --- | --- | --- |\n| find_employee | 名字 | 根据名字检索员工 |\n| get_employee | ID | 根据ID检索员工 |\n| get_location | ID | 根据ID检索地点 |\n| get_reports | 员工ID | 检索直接向该员工汇报的所有员工ID列表。|\n| wikipedia | 文章 | 检索关于某个主题的维基百科文章。|\n\n你的回复将以JSON格式呈现，包含“思考”和“行动”，以获取回答问题所需的数据；或者直接包含“答案”。当数据被检索到时，它将作为“观察”被纳入响应中。\n\n你将持续生成思考和行动，直到得出答案，或确定无法解答。\n\n示例1：\n```\n用户：费城的人口是多少？\n\n助理：{\n  \"thought\": \"维基百科很可能有这些信息。我正在查找……\",\n  \"action\": {\"command\": \"wikipedia\", \"article\": \"Philadelphia\"}\n}\n\n助理：{\n  \"observation\": \"费城，常被称为菲利，是宾夕法尼亚州最大的城市，也是美国东北部大都市区和中大西洋地区中仅次于纽约市的第二大城市。它是美国历史上最具意义的城市之一，在1800年之前曾是美国的首都。根据2020年人口普查，费城是全美第六大城市，人口为1,603,797人。自1854年以来，这座城市与费城县范围一致，而费城县则是宾夕法尼亚州人口最多的县，同时也是特拉华河谷都市圈的核心区域。该都市圈是全美第七大、全球最大的都市区之一，2020年人口达624.5万人。费城以其对美国历史的深远贡献以及在生命科学、商业与工业、艺术、文学和音乐等领域的突出地位而闻名。\"\n}\n\n助理：{\n  \"answer\": \"根据2020年人口普查，费城的人口为1,603,797人。\"\n}\n```\n\n示例2：\n```\n用户：我的经理住在哪个城市？\n\n助理：{\n  \"thought\": \"让我先查一下你的经理是谁。首先，我需要查看你的档案。\",\n  \"action\": {\"command\": \"get_employee\", \"id\": 92352}\n}\n\n助理：{\n  \"observation\": {\n    \"id\": 78334,\n    \"name\": \"Ms. Manager\",\n    \"location_id\": 8832\n  }\n}\n\n助理：{\n  \"thought\": \"你的经理是Ms. Manager。我现在要查找她的居住地。\",\n  \"action\": {\"command\": \"get_location\", \"id\": 8832}\n}\n\n助理：{\n  \"observation\": {\n    \"id\": 8832,\n    \"name\": \"费城\"\n  }\n}\n\n助理：{\n  \"answer\": \"你的经理住在费城。\"\n}\n```\n~~~\n\u003C\u002Fdetails>\n\n#### GPT-4与GPT-3.5\n\n在这份文档中的大多数例子中，GPT-3.5和GPT-4之间的差异几乎可以忽略不计，但在“教会机器人如何捕鱼”的场景中，这两种模型之间的差异就非常明显了。\n\n例如，上述所有命令语法的例子，若不针对GPT-3.5进行有意义的修改，都无法正常工作。至少，你需要提供一些示例（每个命令至少一个使用示例），才能得到合理的输出。而对于复杂的命令集，GPT-3.5可能会“幻觉”出新的命令，或者创建虚构的参数。\n\n通过足够详尽的隐藏提示，你应该能够克服这些限制。相比之下，GPT-4只需更简单的提示就能实现更加一致和复杂的逻辑（甚至可以在几乎没有示例的情况下运行——尽管尽可能多地提供示例总是有益的）。\n\n\n\n## 策略\n\n本节包含针对特定需求或问题的示例和策略。为了成功进行提示工程，你需要结合本文档中列出的各种策略。不要害怕混合搭配不同的方法，也可以自行发明新的思路。\n\n### 嵌入数据\n\n在隐藏上下文中，你经常需要嵌入各种数据。具体策略会根据你要嵌入的数据类型和数量而有所不同。\n\n#### 简单列表\n\n对于一次性对象，用普通的项目符号列表罗列字段和值效果相当不错：\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_081da3e06dc9.png\" title=\"GPT-4从属性列表中提取史蒂夫的职业。\">\n\u003C\u002Fp>\n\n这种方法也适用于较大的数据集，不过还有其他格式的列表，GPT更能可靠地处理它们。无论如何，这里有一个例子：\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_5bdb825b6560.png\" title=\"GPT-4回答关于一组支出的问题。\">\n\u003C\u002Fp>\n\n#### Markdown表格\n\nMarkdown表格非常适合需要枚举大量同类条目的场景。\n\n幸运的是，OpenAI的模型非常擅长处理Markdown表格（可能是因为它们在训练过程中接触了大量的GitHub数据）。\n\n我们可以用Markdown表格重新表述上面的例子：\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_75b56fa9006f.png\" title=\"GPT-4从Markdown表格中回答关于一组支出的问题。\">\n\u003C\u002Fp>\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_e8525ed6ed12.png\" title=\"GPT-4从Markdown表格中回答关于一组支出的问题。\">\n\u003C\u002Fp>\n\n> 🧠 注意，在最后一个例子中，表格里的条目明确标注了日期——2月2日。而在我们的问题中，我们询问的是“今天”。此外，在提示的开头我们提到今天就是2月2日。模型正确地进行了传递性推理——将“今天”转换为“2月2日”，然后在表格中查找“2月2日”的相关条目。\n\n#### JSON\n\nMarkdown 表格在许多用例中表现非常出色，由于其紧凑性和模型对其的可靠处理能力，通常应优先使用。然而，在某些情况下，比如列数过多导致模型难以处理，或者每个条目都包含自定义属性而不得不设置大量空列时，Markdown 表格可能就不那么适用了。\n\n在这种情况下，JSON 是另一种模型能够很好地处理的格式。`key` 与其对应的 `value` 紧密相邻，使得模型更容易保持映射关系的清晰性。\n\n以下是与 Markdown 表格示例相同的内容，但使用 JSON 格式：\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_d24512bab5f4.png\" title=\"GPT-4 回答关于 JSON 数据块中一组支出的问题。\">\n\u003C\u002Fp>\n\n#### 自由文本\n\n有时，你可能希望在提示中插入一段自由文本，并将其与其他部分明确区分开来——例如嵌入一份供机器人参考的文档。在这种场景下，用三个反引号 ``` 将文档包裹起来是一个不错的选择[^8]。\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_f251cb6972a1.png\" title=\"GPT-4 回答关于 JSON 数据块中一组支出的问题。\">\n\u003C\u002Fp>\n\n[^8]：在编写提示时，一个很好的经验法则就是尽量依赖模型从 GitHub 上学到的内容。\n\n#### 嵌套数据\n\n并非所有数据都是扁平和线性的。有时，你需要嵌入一些具有嵌套结构或与其他数据存在关联的数据。在这种情况下，建议使用 `JSON`：\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_ca24124f5875.png\" title=\"GPT-4 能够非常可靠地处理嵌套的 JSON 数据。\">\n\u003C\u002Fp>\n\n\u003Cdetails>\n\u003Csummary>(完整提示)\u003C\u002Fsummary>\n\n~~~\n你是一位乐于助人的助手，负责回答有关用户的问题。以下是关于这些用户的已知信息：\n\n{\n  \"users\": [\n    {\n      \"id\": 1,\n      \"name\": \"John Doe\",\n      \"contact\": {\n        \"address\": {\n          \"street\": \"123 Main St\",\n          \"city\": \"Anytown\",\n          \"state\": \"CA\",\n          \"zip\": \"12345\"\n        },\n        \"phone\": \"555-555-1234\",\n        \"email\": \"johndoe@example.com\"\n      }\n    },\n    {\n      \"id\": 2,\n      \"name\": \"Jane Smith\",\n      \"contact\": {\n        \"address\": {\n          \"street\": \"456 Elm St\",\n          \"city\": \"Sometown\",\n          \"state\": \"TX\",\n          \"zip\": \"54321\"\n        },\n        \"phone\": \"555-555-5678\",\n        \"email\": \"janesmith@example.com\"\n      }\n    },\n    {\n      \"id\": 3,\n      \"name\": \"Alice Johnson\",\n      \"contact\": {\n        \"address\": {\n          \"street\": \"789 Oak St\",\n          \"city\": \"Othertown\",\n          \"state\": \"NY\",\n          \"zip\": \"67890\"\n        },\n        \"phone\": \"555-555-2468\",\n        \"email\": \"alicejohnson@example.com\"\n      }\n    },\n    {\n      \"id\": 4,\n      \"name\": \"Bob Williams\",\n      \"contact\": {\n        \"address\": {\n          \"street\": \"135 Maple St\",\n          \"city\": \"Thistown\",\n          \"state\": \"FL\",\n          \"zip\": \"98765\"\n        },\n        \"phone\": \"555-555-8642\",\n        \"email\": \"bobwilliams@example.com\"\n      }\n    },\n    {\n      \"id\": 5,\n      \"name\": \"Charlie Brown\",\n      \"contact\": {\n        \"address\": {\n          \"street\": \"246 Pine St\",\n          \"city\": \"Thatstown\",\n          \"state\": \"WA\",\n          \"zip\": \"86420\"\n        },\n        \"phone\": \"555-555-7531\",\n        \"email\": \"charliebrown@example.com\"\n      }\n    },\n    {\n      \"id\": 6,\n      \"name\": \"Diane Davis\",\n      \"contact\": {\n        \"address\": {\n          \"street\": \"369 Willow St\",\n          \"city\": \"Sumtown\",\n          \"state\": \"CO\",\n          \"zip\": \"15980\"\n        },\n        \"phone\": \"555-555-9512\",\n        \"email\": \"dianedavis@example.com\"\n      }\n    },\n    {\n      \"id\": 7,\n      \"name\": \"Edward Martinez\",\n      \"contact\": {\n        \"address\": {\n          \"street\": \"482 Aspen St\",\n          \"city\": \"Newtown\",\n          \"state\": \"MI\",\n          \"zip\": \"35742\"\n        },\n        \"phone\": \"555-555-6813\",\n        \"email\": \"edwardmartinez@example.com\"\n      }\n    },\n    {\n      \"id\": 8,\n      \"name\": \"Fiona Taylor\",\n      \"contact\": {\n        \"address\": {\n          \"street\": \"531 Birch St\",\n          \"city\": \"Oldtown\",\n          \"state\": \"OH\",\n          \"zip\": \"85249\"\n        },\n        \"phone\": \"555-555-4268\",\n        \"email\": \"fionataylor@example.com\"\n      }\n    },\n    {\n      \"id\": 9,\n      \"name\": \"George Thompson\",\n      \"contact\": {\n        \"address\": {\n          \"street\": \"678 Cedar St\",\n          \"city\": \"Nexttown\",\n          \"state\": \"GA\",\n          \"zip\": \"74125\"\n        },\n        \"phone\": \"555-555-3142\",\n        \"email\": \"georgethompson@example.com\"\n      }\n    },\n    {\n      \"id\": 10,\n      \"name\": \"Helen White\",\n      \"contact\": {\n        \"address\": {\n          \"street\": \"852 Spruce St\",\n          \"city\": \"Lasttown\",\n          \"state\": \"VA\",\n          \"zip\": \"96321\"\n        },\n        \"phone\": \"555-555-7890\",\n        \"email\": \"helenwhite@example.com\"\n      }\n    }\n  ]\n}\n~~~\n\u003C\u002Fdetails>\n\n如果使用嵌套的 `JSON` 对你的 token 预算来说过于冗长，可以退而求其次，采用用 `Markdown` 定义的 `关系型表格`：\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_72beb535843f.png\" title=\"GPT-4 对关系型表格也能相当可靠地处理。\">\n\u003C\u002Fp>\n\n\u003Cdetails>\n\u003Csummary>(完整提示)\u003C\u002Fsummary>\n\n~~~\n你是一位乐于助人的助手，负责回答有关用户的问题。以下是关于这些用户的已知信息：\n\n表 1：users\n| id (PK) | name          |\n|---------|---------------|\n| 1       | John Doe      |\n| 2       | Jane Smith    |\n| 3       | Alice Johnson |\n| 4       | Bob Williams  |\n| 5       | Charlie Brown |\n| 6       | Diane Davis   |\n| 7       | Edward Martinez |\n| 8       | Fiona Taylor  |\n| 9       | George Thompson |\n| 10      | Helen White   |\n\n表 2：addresses\n| id (PK) | user_id (FK) | street      | city       | state | zip   |\n|---------|--------------|-------------|------------|-------|-------|\n| 1       | 1            | 123 Main St | Anytown    | CA    | 12345 |\n| 2       | 2            | 456 Elm St  | Sometown   | TX    | 54321 |\n| 3       | 3            | 789 Oak St  | Othertown  | NY    | 67890 |\n| 4       | 4            | 135 Maple St | Thistown  | FL    | 98765 |\n| 5       | 5            | 246 Pine St | Thatstown  | WA    | 86420 |\n| 6       | 6            | 369 Willow St | Sumtown  | CO    | 15980 |\n| 7       | 7            | 482 Aspen St | Newtown   | MI    | 35742 |\n| 8       | 8            | 531 Birch St | Oldtown   | OH    | 85249 |\n| 9       | 9            | 678 Cedar St | Nexttown  | GA    | 74125 |\n| 10      | 10           | 852 Spruce St | Lasttown  | VA    | 96321 |\n~~~\n\u003C\u002Fdetails>\n\n表 3：phone_numbers\n| id (主键) | user_id (外键) | 电话       |\n|---------|--------------|-------------|\n| 1       | 1            | 555-555-1234 |\n| 2       | 2            | 555-555-5678 |\n| 3       | 3            | 555-555-2468 |\n| 4       | 4            | 555-555-8642 |\n| 5       | 5            | 555-555-7531 |\n| 6       | 6            | 555-555-9512 |\n| 7       | 7            | 555-555-6813 |\n| 8       | 8            | 555-555-4268 |\n| 9       | 9            | 555-555-3142 |\n| 10      | 10           | 555-555-7890 |\n\n表 4：emails\n| id (主键) | user_id (外键) | 邮箱                 |\n|---------|--------------|-----------------------|\n| 1       | 1            | johndoe@example.com   |\n| 2       | 2            | janesmith@example.com |\n| 3       | 3            | alicejohnson@example.com |\n| 4       | 4            | bobwilliams@example.com |\n| 5       | 5            | charliebrown@example.com |\n| 6       | 6            | dianedavis@example.com |\n| 7       | 7            | edwardmartinez@example.com |\n| 8       | 8            | fionataylor@example.com |\n| 9       | 9            | georgethompson@example.com |\n| 10      | 10           | helenwhite@example.com |\n\n表 5：cities\n| id (主键) | 名称         | 州     | 人口     | 中位收入    |\n|---------|--------------|-------|------------|---------------|\n| 1       | Anytown     | CA    | 50,000     | $70,000      |\n| 2       | Sometown    | TX    | 100,000    | $60,000      |\n| 3       | Othertown   | NY    | 25,000     | $80,000      |\n| 4       | Thistown    | FL    | 75,000     | $65,000      |\n| 5       | Thatstown   | WA    | 40,000     | $75,000      |\n| 6       | Sumtown     | CO    | 20,000     | $85,000      |\n| 7       | Newtown     | MI    | 60,000     | $55,000      |\n| 8       | Oldtown     | OH    | 30,000     | $70,000      |\n| 9       | Nexttown    | GA    | 15,000     | $90,000      |\n| 10      | Lasttown    | VA    | 10,000     | $100,000     |\n~~~\n\n\u003C\u002Fdetails>\n\n> 🧠 该模型在处理符合[第三范式](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FThird_normal_form)的数据时表现良好，但在涉及过多连接操作时可能会遇到困难。实验表明，它至少可以处理三层嵌套连接。在上述示例中，模型成功地从`users`表连接到`addresses`表，再到`cities`表，从而推断出乔治的可能收入为9万美元。\n\n\n\n### 引用\n\n通常情况下，仅靠自然语言回复是不够的，你可能希望模型能够引用其数据来源。\n\n值得注意的是，任何需要引用的内容都应具有唯一标识符。最简单的方法就是直接要求模型为其引用的内容添加链接：\n\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_c13eed69482d.png\" title=\"GPT-4会在被要求时可靠地链接到相关数据。\">\n\u003C\u002Fp>\n\n### 程序化消费\n\n默认情况下，语言模型会输出自然语言文本，但很多时候我们需要以程序化的方式与这些结果进行交互，而不仅仅是将其打印在屏幕上。为此，你可以要求模型以你喜欢的序列化格式（如JSON或YAML）输出结果。\n\n请务必向模型提供你期望的输出格式示例。基于我们之前的旅行示例，我们可以扩展提示内容，告诉模型：\n\n~~~\n请以JSON格式输出结果。格式如下：\n```\n{\n    message: \"要展示给用户的讯息\",\n    hotelId: 432,\n    flightId: 831\n}\n```\n\n请勿在讯息中包含ID。\n~~~\n\n现在我们将得到类似这样的交互：\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_6292a6ffc611.png\" title=\"GPT-4以易于处理的格式提供旅行建议。\">\n\u003C\u002Fp>\n\n你可以设想，用户界面会将讯息以普通文本形式显示，同时提供用于预订航班和酒店的独立按钮，或者自动填充表格供用户使用。\n\n再举一个例子，我们可以在[引用](#citations)的基础上进一步扩展——但不再局限于Markdown链接。我们可以要求模型生成包含正常讯息以及用于生成该讯息的条目列表的JSON。在这种情况下，你可能无法确切知道引用具体出现在讯息的哪个位置，但至少可以确定它们确实被使用过。\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_956f1b244668.png\" title=\"要求模型提供引用列表是一种可靠的方式，可以程序化地了解模型在其响应中依赖了哪些数据。\">\n\u003C\u002Fp>\n\n> 🧠 有趣的是，在模型对“我在塔吉特花了多少钱？”的回答中，它给出了单一数值188.16美元，但**重要的是**，在`citations`数组中列出了用于计算该数值的各项支出明细。\n\n### 思维链\n\n有时候，你可能会绞尽脑汁地调整提示词，试图让模型输出可靠的结果，但无论怎么努力，都无济于事。这种情况通常发生在机器人的最终输出需要中间推理步骤时，而你却只直接要求它给出结果，没有提供任何中间过程。\n\n答案可能会让你感到意外：请机器人展示它的解题步骤。2022年10月，谷歌发布了一篇论文《通过思维链提示激发大型语言模型的推理能力》（[Chain-of-Thought Prompting Elicits Reasoning in Large Language Models](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2201.11903.pdf)），其中他们表明，如果你在隐藏的提示中为机器人提供一些通过展示解题步骤来回答问题的示例，那么当你要求机器人解答某个问题时，它就会展示其思考过程，并给出更可靠的答案。\n\n就在那篇论文发表后的几周，也就是2022年10月底，东京大学和谷歌又联合发布了另一篇论文《大型语言模型是零样本推理者》（[Large Language Models are Zero-Shot Reasoners](https:\u002F\u002Fopenreview.net\u002Fpdf?id=e2TBb5y0yFf)），该论文指出，你甚至不需要提供示例——**你只需要简单地要求机器人一步一步地思考**即可。\n\n#### 计算平均值\n\n这里有一个例子：我们要求机器人计算平均支出，但不包括Target的开销。实际答案是136.77美元，而机器人几乎正确地给出了136.43美元。\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_20c155be8eb7.png\" title=\"模型**几乎**算对了平均值，但差了几美分。\">\n\u003C\u002Fp>\n\n如果我们简单地加上“让我们一步一步地思考”，模型就能给出正确的答案：\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_d80a30a3efbe.png\" title=\"当我们要求模型展示其思考过程时，它给出了正确答案。\">\n\u003C\u002Fp>\n\n#### 解释代码\n\n让我们再回顾一下前面的Python示例，并将思维链提示应用到我们的问题中。提醒一下，当我们要求机器人评估这段Python代码时，它会稍微出错。正确答案应该是`Hello, Brex!!Brex!!Brex!!!`，但机器人对应该包含多少个感叹号感到困惑。在下面的例子中，它输出的是`Hello, Brex!!!Brex!!!Brex!!!`：\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_ba6f32c4eec7.png\" title=\"机器人几乎正确地解释了Python代码，但还是有点偏差。\">\n\u003C\u002Fp>\n\n如果我们要求机器人展示其思考过程，它就能给出正确的答案：\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_f5a4e8e1f66e.png\" title=\"只要要求机器人展示其思考过程，它就能正确解释Python代码。\">\n\u003C\u002Fp>\n\n#### 分隔符\n\n在许多场景中，你可能并不希望向最终用户展示机器人的全部思考过程，而只是想直接呈现最终答案。这时你可以要求机器人将最终答案与其思考过程明确区分开来。实现这一点的方法有很多，但我们这里使用JSON格式，以便于后续解析：\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"550\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_readme_89697ada439f.png\" title=\"机器人展示了其思考过程，同时用分隔符标出了最终答案，便于提取。\">\n\u003C\u002Fp>\n\n使用思维链提示虽然会消耗更多的token，从而增加成本和延迟，但对于许多复杂场景来说，其结果确实更加可靠。因此，当需要机器人以尽可能高的可靠性完成复杂任务时，这是一种非常有价值的工具。\n\n### 微调\n\n有时，无论你尝试何种技巧，模型仍然无法按照你的期望运行。在这种情况下，你**有时**可以退而求其次，采用微调的方法。不过，一般来说，这应被视为最后的手段。\n\n[微调](https:\u002F\u002Fplatform.openai.com\u002Fdocs\u002Fguides\u002Ffine-tuning)是指在已经训练好的模型基础上，为其提供数千（或更多）个输入-输出示例对的过程。\n\n微调并不能完全取代隐藏提示的作用，因为你仍然需要嵌入动态数据，但它可以使提示变得更简洁、更可靠。\n\n#### 缺点\n\n微调也存在诸多缺点。如果可能的话，最好利用语言模型作为[零样本、单样本和少样本学习者](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FFew-shot_learning_(natural_language_processing))的特点，在提示中教会它们如何完成任务，而不是进行微调。\n\n其中一些缺点包括：\n\n- **不可行**：[GPT-3.5\u002FGPT-4无法进行微调](https:\u002F\u002Fplatform.openai.com\u002Fdocs\u002Fguides\u002Fchat\u002Fis-fine-tuning-available-for-gpt-3-5-turbo)，而这是我们主要使用的模型和API，因此我们根本无法采用微调。\n- **额外开销**：微调需要手动准备大量的数据。\n- **迭代速度慢**：每次想要添加新功能时，你不再只需在提示中增加几行内容，而是必须创建大量虚假数据，然后运行微调流程，最后才能使用新微调过的模型。\n- **成本高昂**：与原生的`gpt-3.5-turbo`模型相比，使用微调后的GPT-3模型的成本最高可高出60倍；而与原生的GPT-4模型相比，微调后的GPT-3模型成本也要高2倍。\n\n> ⛔️ 如果你对模型进行微调，**切勿使用真实的客户数据**。务必使用合成数据。否则，模型可能会记住你提供的部分数据，并将这些隐私信息泄露给其他不应看到的用户。\n>\n> 如果你不进行微调，我们就无需担心意外将数据泄露到模型中。\n\n## 更多资源\n- :star2: [OpenAI Cookbook](https:\u002F\u002Fgithub.com\u002Fopenai\u002Fopenai-cookbook) :star2:\n- :technologist: [Prompt Hacking](https:\u002F\u002Flearnprompting.org\u002Fdocs\u002Fcategory\u002F-prompt-hacking) :technologist: \n- :books: [Dair.ai 提示工程指南](https:\u002F\u002Fgithub.com\u002Fdair-ai\u002FPrompt-Engineering-Guide) :books:","# Prompt Engineering 快速上手指南\n\n> **注意**：本指南基于 Brex 的《Prompt Engineering Guide》整理。该资源主要是一份关于大语言模型（LLM）提示词策略、历史背景及安全建议的**概念性文档**，而非一个需要安装的具体软件库或框架。因此，本指南侧重于“环境准备”以访问主流模型，以及“如何使用”其中提到的核心提示工程策略。\n\n## 环境准备\n\n由于提示工程（Prompt Engineering）是与大语言模型交互的方法论，您不需要安装特定的本地二进制文件，但需要准备以下环境以调用模型 API 或使用相关工具：\n\n### 系统要求\n- **操作系统**：Windows, macOS, 或 Linux 均可。\n- **网络环境**：需要能够访问大模型服务商（如 OpenAI, Anthropic 等）的 API 接口。\n  - *国内开发者提示*：由于网络限制，直接访问 OpenAI 等服务可能需要配置代理或使用国内提供兼容 API 的服务商（如百度文心一言、阿里通义千问、智谱 AI 等，它们大多支持类似的 Prompt 结构）。\n\n### 前置依赖\n建议使用 Python 进行实验，需安装以下基础库：\n- Python 3.8+\n- `openai` (或其他模型对应的 SDK)\n- `requests` (用于直接调用 HTTP API)\n\n安装基础依赖命令：\n```bash\npip install openai requests\n```\n\n## 安装步骤\n\n本指南所指的\"Prompt Engineering\"并非一个独立的安装包，而是一套实践方法。若您希望使用 Brex 文档中提到的具体代码示例或测试工具，通常只需克隆其 GitHub 仓库查看源码，或直接在其支持的平台上应用以下策略。\n\n若您需要搭建本地开发环境以测试提示词效果，请执行以下步骤：\n\n1. **获取 API Key**\n   注册并获取大模型服务的 API Key（例如 OpenAI API Key）。\n\n2. **配置环境变量**\n   在终端中设置您的密钥（避免硬编码在代码中）：\n   ```bash\n   export OPENAI_API_KEY=\"your-api-key-here\"\n   ```\n   *(Windows PowerShell: `$env:OPENAI_API_KEY=\"your-api-key-here\"`)*\n\n3. **(可选) 克隆参考仓库**\n   如果您想查看 Brex 原始文档中的完整示例代码：\n   ```bash\n   git clone https:\u002F\u002Fgithub.com\u002Fbrexhq\u002Fprompt-engineering.git\n   cd prompt-engineering\n   ```\n\n## 基本使用\n\n提示工程的核心在于构造高质量的 **Prompt（提示词）** 来引导模型输出。以下是基于该指南整理的三个最基础且核心的使用策略示例。\n\n### 1. 基础指令与上下文 (Basic Instruction & Context)\n最简单的用法是提供清晰的指令和必要的背景信息。不要假设模型知道你的隐含意图。\n\n**示例代码 (Python):**\n```python\nfrom openai import OpenAI\n\nclient = OpenAI()\n\n# 构建 Prompt：包含角色设定、任务指令和输入数据\nprompt = \"\"\"\n你是一位专业的翻译助手。\n请将以下英文技术术语翻译成中文，并简要解释其含义。\n\n术语列表：\n- Token\n- Embedding\n- Chain of Thought\n\n请以 Markdown 表格形式输出。\n\"\"\"\n\nresponse = client.chat.completions.create(\n    model=\"gpt-4\", # 或 gpt-3.5-turbo\n    messages=[\n        {\"role\": \"system\", \"content\": \"你是一个乐于助人的 AI 助手。\"},\n        {\"role\": \"user\", \"content\": prompt}\n    ]\n)\n\nprint(response.choices[0].message.content)\n```\n\n### 2. 结构化数据嵌入 (Embedding Data)\n指南中强调，将数据以结构化格式（如 JSON、Markdown 表格）放入 Prompt 中，能显著提高模型处理的准确性。\n\n**示例代码:**\n```python\n# 使用 JSON 格式提供数据，便于模型解析\ndata_context = \"\"\"\n以下是用户订单数据 (JSON 格式):\n{\n  \"order_id\": \"A123\",\n  \"items\": [\"Laptop\", \"Mouse\"],\n  \"total\": 1200.50,\n  \"status\": \"pending\"\n}\n\n任务：检查订单状态。如果状态是 'pending'，请生成一封提醒用户确认的邮件草稿。\n\"\"\"\n\nresponse = client.chat.completions.create(\n    model=\"gpt-4\",\n    messages=[{\"role\": \"user\", \"content\": data_context}]\n)\n\nprint(response.choices[0].message.content)\n```\n\n### 3. 思维链 (Chain of Thought)\n对于复杂推理任务，引导模型“一步步思考”可以大幅提高准确率。这是在 Prompt 中加入推理过程的示范。\n\n**示例代码:**\n```python\n# 在 Prompt 中明确要求展示推理步骤\ncomplex_prompt = \"\"\"\n问题：罗杰有 5 个网球。他又买了两筒网球。每筒有 3 个网球。他现在有多少个网球？\n\n请按以下步骤回答：\n1. 计算罗杰最初拥有的网球数量。\n2. 计算新买的网球总数。\n3. 将两者相加得出最终结果。\n4. 给出最终答案。\n\"\"\"\n\nresponse = client.chat.completions.create(\n    model=\"gpt-4\",\n    messages=[{\"role\": \"user\", \"content\": complex_prompt}]\n)\n\nprint(response.choices[0].message.content)\n```\n\n### 关键注意事项\n- **Token 限制**：注意模型的上下文窗口限制（Context Window），过长的 Prompt 会被截断。\n- **非确定性**：相同的 Prompt 可能会产生不同的结果，建议在关键业务逻辑中加入验证步骤或使用 `temperature=0` 以获得更稳定的输出。\n- **安全性**：避免在 Prompt 中直接泄露敏感系统指令，防范“提示词注入”（Prompt Injection）攻击。","某金融科技公司（类似 Brex）的开发团队正在构建一个基于 GPT-4 的自动化财务摘要系统，需要从杂乱的银行交易流水和备注中提取关键信息并生成结构化报告。\n\n### 没有 prompt-engineering 时\n- **输出格式混乱**：模型经常返回非标准的文本描述或错误的 JSON 结构，导致后端程序无法解析，频繁抛出异常。\n- **幻觉与事实不符**：面对模糊的交易备注，模型倾向于“脑补”不存在的商户名称或金额，缺乏基于上下文的严谨性。\n- **逻辑推理薄弱**：在处理复杂的多笔关联转账时，模型无法展示推导过程，直接给出错误分类，开发者难以排查原因。\n- **安全漏洞风险**：用户输入的特殊字符或诱导性指令容易触发“越狱”，导致系统泄露内部提示词或执行未授权操作。\n- **迭代成本高昂**：每次调整需求都需要反复试错修改提示语，缺乏系统化的策略指导，效率极低。\n\n### 使用 prompt-engineering 后\n- **结构化数据交付**：利用指南中的\"JSON 嵌入”和“分隔符”策略，强制模型输出严格符合 Schema 的数据，实现零错误解析。\n- **引用与事实锚定**：应用“引用（Citations）”机制，要求模型仅依据提供的交易片段作答，显著消除了无中生有的幻觉。\n- **思维链可解释**：引入“思维链（Chain of Thought）”技巧，让模型先输出推理步骤再给结论，不仅提升了准确率，还便于人工审计。\n- **防御性提示设计**：遵循“安全建议”章节，预设防注入规则，有效拦截了恶意提示攻击，保障了生产环境安全。\n- **标准化开发流程**：团队依据指南中的最佳实践（如 ReAct 模式）快速构建提示模板，将新功能上线周期从数天缩短至数小时。\n\nprompt-engineering 将大模型的应用从“碰运气的艺术”转变为“可控的工程学科”，确保了金融级应用的准确性、安全性与可维护性。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fbrexhq_prompt-engineering_7c79f8ad.png","brexhq","Brex","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fbrexhq_4ca01dd4.png","",null,"https:\u002F\u002Fbrex.com","https:\u002F\u002Fgithub.com\u002Fbrexhq",9521,508,"2026-04-18T14:15:43","MIT",1,"未说明",{"notes":86,"python":84,"dependencies":87},"该工具并非可执行的软件代码库，而是一份关于大语言模型（LLM）提示工程（Prompt Engineering）的指南文档。它主要包含策略、历史背景、最佳实践和安全建议，不涉及具体的安装步骤、运行环境配置或依赖库安装。文中提到的示例是基于外部模型（如 OpenAI GPT-4）生成的，用户只需阅读文档内容即可，无需部署本地运行环境。",[],[35,14],"2026-03-27T02:49:30.150509","2026-04-19T15:46:30.820966",[92],{"id":93,"question_zh":94,"answer_zh":95,"source_url":96},42694,"文档中用于模拟助手\u002F用户聊天的界面是什么？","该界面是 OpenAI Playground。请访问 https:\u002F\u002Fplatform.openai.com\u002Fplayground，并将 `mode` 切换为 `chat` 即可看到相同的界面。","https:\u002F\u002Fgithub.com\u002Fbrexhq\u002Fprompt-engineering\u002Fissues\u002F3",[]]