[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-kagisearch--vectordb":3,"tool-kagisearch--vectordb":62},[4,18,26,36,46,54],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",158594,2,"2026-04-16T23:34:05",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":42,"last_commit_at":43,"category_tags":44,"status":17},8272,"opencode","anomalyco\u002Fopencode","OpenCode 是一款开源的 AI 编程助手（Coding Agent），旨在像一位智能搭档一样融入您的开发流程。它不仅仅是一个代码补全插件，而是一个能够理解项目上下文、自主规划任务并执行复杂编码操作的智能体。无论是生成全新功能、重构现有代码，还是排查难以定位的 Bug，OpenCode 都能通过自然语言交互高效完成，显著减少开发者在重复性劳动和上下文切换上的时间消耗。\n\n这款工具专为软件开发者、工程师及技术研究人员设计，特别适合希望利用大模型能力来提升编码效率、加速原型开发或处理遗留代码维护的专业人群。其核心亮点在于完全开源的架构，这意味着用户可以审查代码逻辑、自定义行为策略，甚至私有化部署以保障数据安全，彻底打破了传统闭源 AI 助手的“黑盒”限制。\n\n在技术体验上，OpenCode 提供了灵活的终端界面（Terminal UI）和正在测试中的桌面应用程序，支持 macOS、Windows 及 Linux 全平台。它兼容多种包管理工具，安装便捷，并能无缝集成到现有的开发环境中。无论您是追求极致控制权的资深极客，还是渴望提升产出的独立开发者，OpenCode 都提供了一个透明、可信",144296,1,"2026-04-16T14:50:03",[13,45],"插件",{"id":47,"name":48,"github_repo":49,"description_zh":50,"stars":51,"difficulty_score":32,"last_commit_at":52,"category_tags":53,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",108322,"2026-04-10T11:39:34",[14,15,13],{"id":55,"name":56,"github_repo":57,"description_zh":58,"stars":59,"difficulty_score":32,"last_commit_at":60,"category_tags":61,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[45,13,15,14],{"id":63,"github_repo":64,"name":65,"description_en":66,"description_zh":67,"ai_summary_zh":67,"readme_en":68,"readme_zh":69,"quickstart_zh":70,"use_case_zh":71,"hero_image_url":72,"owner_login":73,"owner_name":74,"owner_avatar_url":75,"owner_bio":76,"owner_company":77,"owner_location":77,"owner_email":77,"owner_twitter":77,"owner_website":78,"owner_url":79,"languages":80,"stars":85,"forks":86,"last_commit_at":87,"license":88,"difficulty_score":42,"env_os":89,"env_gpu":90,"env_ram":91,"env_deps":92,"category_tags":99,"github_topics":100,"view_count":32,"oss_zip_url":77,"oss_zip_packed_at":77,"status":17,"created_at":106,"updated_at":107,"faqs":108,"releases":134},8084,"kagisearch\u002Fvectordb","vectordb","A minimal Python package for storing and retrieving text using chunking, embeddings, and vector search.","vectordb 是一款轻量级的 Python 开源库，专为本地化的文本存储与检索而设计。它通过自动分块、生成嵌入向量以及执行向量搜索，帮助用户快速构建基于语义的文本检索系统，无需依赖复杂的外部数据库或服务。\n\n在处理长文本或海量文档时，传统关键词搜索往往难以捕捉深层语义关联。vectordb 解决了这一痛点，让开发者能轻松实现“按意思搜内容”的功能。其核心优势在于极低的延迟和微小的内存占用，甚至被用于驱动 Kagi Search 的 AI 特性。所有数据处理，包括模型推理和向量计算，均在用户本地完成，既保障了数据隐私，又确保了运行效率。\n\n这款工具特别适合需要快速原型验证的 AI 开发者、希望在本机部署知识库的研究人员，以及想要在应用中集成智能搜索功能的工程师。只需几行代码，即可将文本存入内存并发起语义查询。vectordb 还提供了灵活的配置选项，支持滑动窗口或段落模式的分块策略，并允许用户根据需求切换不同精度的预训练嵌入模型，从极速模式到多语言支持均可按需选择。无论是构建个人知识助手，还是过滤 RSS 资讯流，vectordb 都能提供简洁高效的端到端解决方案。","# VectorDB\n\n\nVectorDB is a simple, lightweight, fully local, end-to-end solution for using embeddings-based text retrieval.\n\nThanks to its low latency and small memory footprint, VectorDB is used to power AI features inside [Kagi Search](https:\u002F\u002Fkagi.com).\n\nCheck an [example Colab notebook](https:\u002F\u002Fcolab.research.google.com\u002Fdrive\u002F1pecKGCCru_Jvx7v0WRNrW441EBlcS5qS#scrollTo=Eh6o8m7d8eOk) where this is used to filter the content of [Kagi Small Web](https:\u002F\u002Fkagi.com\u002Fsmallweb) RSS feed based on stated user interests.\n\n\n## Installation\n\nTo install VectorDB, use pip:\n\n```\npip install vectordb2\n```\n\n## Usage\n\nQuick example that loads data into memory, and runs retrieval. All data will be handled locally, including embeddings and vector search, completely trasparent for the user with maximum possible performance. \n\n```python\nfrom vectordb import Memory\n\n# Memory is where all content you want to store\u002Fsearch goes.\nmemory = Memory()\n\nmemory.save(\n    [\"apples are green\", \"oranges are orange\"],  # save your text content. for long text we will automatically chunk it\n    [{\"url\": \"https:\u002F\u002Fapples.com\"}, {\"url\": \"https:\u002F\u002Foranges.com\"}], # associate any kind of metadata with it (optional)\n)\n\n# Search for top n relevant results, automatically using embeddings\nquery = \"green\"\nresults = memory.search(query, top_n = 1)\n\nprint(results)\n```\n\nThis returns the chunks with the added metadata and the vector distance (where 0 is the exact match and higher means further apart)\n\n```json\n[\n  {\n    \"chunk\": \"apples are green\",\n    \"metadata\": {\"url\": \"https:\u002F\u002Fapples.com\"},\n    \"distance\": 0.87\n  }\n]\n```\n\n## Options\n\n\n**Memory(memory_file=None, chunking_strategy={\"mode\":\"sliding_window\"},\nembeddings=\"normal\")**\n\n\n- `memory_file`: *Optional.* Path to the memory file. If provided, memory will persist to disk and loaded\u002Fsaved to this file. \n- `chunking_strategy`: *Optional.* Dictionary containing the chunking mode.\n  \n   Options:\\\n  `{'mode':'sliding_window', 'window_size': 240, 'overlap': 8}`   (default)\\\n  `{'mode':'paragraph'}`\n- `embeddings`: *Optional.* \n  \n   Options:\\\n   `fast` - Uses Universal Sentence Encoder 4\\\n   `normal` - Uses \"BAAI\u002Fbge-small-en-v1.5\" (default)\\\n   `best` - Uses \"BAAI\u002Fbge-base-en-v1.5\"\\\n   `multilingual` - Uses Universal Sentence Encoder Multilingual Large 3\n\n\n   You can also specify a custom HuggingFace model by name eg. `TaylorAI\u002Fbge-micro-v2`. See also [Pretrained models](https:\u002F\u002Fwww.sbert.net\u002Fdocs\u002Fpretrained_models.html) and [MTEB](https:\u002F\u002Fhuggingface.co\u002Fspaces\u002Fmteb\u002Fleaderboard).\n\n**Memory.save(texts, metadata, memory_file=None)**\n\nSave content to memory. Metadata will be automatically optimized to use less resources.\n\n- `texts`: *Required.*  Text or list of texts to be saved.\n- `metdata`: *Optional.* Metadata or list of metadata associated with the texts.\n- `memory_file`: *Optional.* Path to persist the memory file. By default \n\n**Memory.search(query, top_n=5, unique=False, batch_results=\"flatten\")**\n\nSearch inside memory.\n\n- `query`: *Required.* Query text or  list of queries (see `batch_results` option below for handling results for a list).\n- `top_n`:  *Optional.* Number of most similar chunks to return (default: 5).\n- `unique`:  *Optional.* Return only items chunks from unique original texts (additional chunks coming from the same text will be ignored). Note this may return less chhunks than requested (default: False).\n- `batch_results`:  *Optional.* When input is a list of queries, output algorithm can be \"flatten\" or \"diverse\". Flatten returns true nearest neighbours across all input queries, meaning all results could come from just one query. \"diverse\" attempts to spread out the results, so that each query's nearest neighbours are equally added (neareast first across all queries, than 2nd nearest and so on). (default: \"flatten\")\n\n**Memory.clear()**\n\nClears the memory.\n\n\n**Memory.dump()**\n\nPrints the contents of the memory.\n\n\n## Example\n\n```python\nfrom vectordb import Memory\n\nmemory = Memory(\n    chunking_strategy={\"mode\": \"sliding_window\", \"window_size\": 128, \"overlap\": 16}, embeddings='TaylorAI\u002Fbge-micro-v2'\n)\n\ntexts = [\n    \"\"\"\nMachine learning is a method of data analysis that automates analytical model building.\n\nIt is a branch of artificial intelligence based on the idea that systems can learn from data,\nidentify patterns and make decisions with minimal human intervention.\n\nMachine learning algorithms are trained on data sets that contain examples of the desired output. For example, a machine learning algorithm that is used to classify images might be trained on a data set that contains images of cats and dogs.\nOnce an algorithm is trained, it can be used to make predictions on new data. For example, the machine learning algorithm that is used to classify images could be used to predict whether a new image contains a cat or a dog.\n\nMachine learning algorithms can be used to solve a wide variety of problems. Some common applications of machine learning include:\n\nClassification: Categorizing data into different groups. For example, a machine learning algorithm could be used to classify emails as spam or not spam.\n\nRegression: Predicting a continuous value. For example, a machine learning algorithm could be used to predict the price of a house.\n\nClustering: Finding groups of similar data points. For example, a machine learning algorithm could be used to find groups of customers with similar buying habits.\n\nAnomaly detection: Finding data points that are different from the rest of the data. For example, a machine learning algorithm could be used to find fraudulent credit card transactions.\n\nMachine learning is a powerful tool that can be used to solve a wide variety of problems. As the amount of data available continues to grow, machine learning is likely to become even more important in the future.\n\"\"\",\n    \"\"\"\nArtificial intelligence (AI) is the simulation of human intelligence in machines\nthat are programmed to think like humans and mimic their actions.\n\nThe term may also be applied to any machine that exhibits traits associated with\na human mind such as learning and problem-solving.\n\nAI research has been highly successful in developing effective techniques for solving a wide range of problems, from game playing to medical diagnosis.\n\nHowever, there is still a long way to go before AI can truly match the intelligence of humans. One of the main challenges is that human intelligence is incredibly complex and poorly understood.\n\nDespite the challenges, AI is a rapidly growing field with the potential to revolutionize many aspects of our lives. Some of the potential benefits of AI include:\n\nIncreased productivity: AI can be used to automate tasks that are currently performed by humans, freeing up our time for more creative and fulfilling activities.\n\nImproved decision-making: AI can be used to make more informed decisions, based on a wider range of data than humans can typically access.\n\nEnhanced creativity: AI can be used to generate new ideas and solutions, beyond what humans can imagine on their own.\nOf course, there are also potential risks associated with AI, such as:\n\nJob displacement: As AI becomes more capable, it is possible that it will displace some human workers.\n\nWeaponization: AI could be used to develop new weapons that are more powerful and destructive than anything we have today.\n\nLoss of control: If AI becomes too powerful, we may lose control over it, with potentially disastrous consequences.\n\nIt is important to weigh the potential benefits and risks of AI carefully as we continue to develop this technology. With careful planning and oversight, AI has the potential to make the world a better place. However, if we are not careful, it could also lead to serious problems.\n\"\"\",\n]\n\nmetadata_list = [\n    {\n        \"title\": \"Introduction to Machine Learning\",\n        \"url\": \"https:\u002F\u002Fexample.com\u002Fintroduction-to-machine-learning\",\n    },\n    {\n        \"title\": \"Introduction to Artificial Intelligence\",\n        \"url\": \"https:\u002F\u002Fexample.com\u002Fintroduction-to-artificial-intelligence\",\n    },\n]\n\nmemory.save(texts, metadata_list)\n\nquery = \"What is the relationship between AI and machine learning?\"\nresults = memory.search(query, top_n=3, unique=True)\nprint(results)\n\n# two results will be returned as unique param is set to True\n```\n\nOutput:\n```json\n[\n  {\n    \"chunk\": \"Artificial intelligence (AI) is the simulation of human intelligence in machines that are programmed to think like humans and mimic their actions. The term may also be applied to any machine that exhibits traits associated with a human mind such as learning and problem-solving. AI research has been highly successful in developing effective techniques for solving a wide range of problems, from game playing to medical diagnosis. However, there is still a long way to go before AI can truly match the intelligence of humans. One of the main challenges is that human intelligence is incredibly complex and poorly understood. Despite the challenges, AI is a rapidly growing field with the potential to revolutionize many aspects of our lives. Some of the potential benefits of AI include: Increased\",\n    \"metadata\": {\n      \"title\": \"Introduction to Artificial Intelligence\",\n      \"url\": \"https:\u002F\u002Fexample.com\u002Fintroduction-to-artificial-intelligence\"\n    },\n    \"distance\": 0.87\n  },\n  {\n    \"chunk\": \"Machine learning is a method of data analysis that automates analytical model building. It is a branch of artificial intelligence based on the idea that systems can learn from data, identify patterns and make decisions with minimal human intervention. Machine learning algorithms are trained on data sets that contain examples of the desired output. For example, a machine learning algorithm that is used to classify images might be trained on a data set that contains images of cats and dogs. Once an algorithm is trained, it can be used to make predictions on new data. For example, the machine learning algorithm that is used to classify images could be used to predict whether a new image contains a cat or a dog. Machine learning algorithms can be used\",\n    \"metadata\": {\n      \"title\": \"Introduction to Machine Learning\",\n      \"url\": \"https:\u002F\u002Fexample.com\u002Fintroduction-to-machine-learning\"\n    },\n    \"distance\": 0.83\n  }\n]\n\n```\n\n## Embeddings performance analysis\n\n\nWe constantly evaluate embedding models using standardized benchmarks (higher is better). Average latency is measured locally on CPU (lower is better). Benchmark data pulled from [MTEB](https:\u002F\u002Fhuggingface.co\u002Fspaces\u002Fmteb\u002Fleaderboard). \n\n\n\n| Model                                         | Latency  | Benchmark 1 | Benchmark 2 | Benchmark 3 | Benchmark 4 |\n|-----------------------------------------------|----------|-------------|-------------|-------------|-------------|\n| all-mpnet-base-v2                              | 6.12 s   | 80.28       | 65.07       | 43.69       | 83.04       |\n| all-MiniLM-L6-v2                               | 1.14 s   | 78.9        | 63.05       | 42.35       | 82.37       |\n| BAAI\u002Fbge-large-en-v1.5                         | 20.8 s   | 83.11       | 75.97       | 46.08       | 87.12       |\n| BAAI\u002Fbge-base-en-v1.5                          | 6.48 s   | 82.4        | 75.53       | 45.77       | 86.55       |\n| BAAI\u002Fbge-small-en-v1.5                         | 1.85 s   | 81.59       | 74.14       | 43.82       | 84.92       |\n| TaylorAI\u002Fbge-micro-v2                          | 0.671 s  | 78.65       | 68.04       | 39.18       | 82.81       |\n| TaylorAI\u002Fgte-tiny                              | 1.25 s   | 80.46       | 70.35       | 42.09       | 82.83       |\n| thenlper\u002Fgte-base                              | 6.28 s   | 82.3        | 73.01       | 46.2        | 84.57       |\n| thenlper\u002Fgte-small                             | 2.14 s   | 82.07       | 72.31       | 44.89       | 83.54       |\n| universal-sentence-encoder-large\u002F5             | 0.769 s  | 74.05       | 67.9        | 37.82       | 79.53       |\n| universal-sentence-encoder-multilingual-large\u002F3| 1.02 s   | 75.35       | 65.78       | 35.06       | 79.62       |\n| universal-sentence-encoder-multilingual\u002F3      | 0.162 s  | 75.39       | 63.42       | 34.82       | 75.43       |\n| universal-sentence-encoder\u002F4                   | 0.019 s  | 72.04       | 64.45       | 35.71       | 76.23       |\n\n*Relative embeddings latency on CPU*\n![Embeddings Latency on CPU](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkagisearch_vectordb_readme_4e80c3225a98.png)\n\n*Relative embeddings latency on GPU*\n![Embeddings Latency on GPU](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkagisearch_vectordb_readme_06ac8b85407c.png)\n\n\n![Embeddings Quality](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkagisearch_vectordb_readme_de2aa6f43733.png)\n\n![Scatter of Embeddings](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkagisearch_vectordb_readme_1678d2206b49.png)\n\n\n\n## Vector search performance analysis\n\nVectorDB is also optimized for speed of retrieval. We automatically uses [Faiss](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Ffaiss) for low number of chunks (\u003C4000) and [mrpt](https:\u002F\u002Fgithub.com\u002Fvioshyvo\u002Fmrpt) for high number of chunks to ensure maximum performance across the spectrum of use cases.\n\n![Vector search engine comparison](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkagisearch_vectordb_readme_7139ba9539e8.png)\n\n## License\n\nMIT License.\n","# VectorDB\n\n\nVectorDB 是一个简单、轻量级、完全本地化的端到端解决方案，用于基于嵌入的文本检索。\n\n得益于其低延迟和较小的内存占用，VectorDB 被用于支持 [Kagi Search](https:\u002F\u002Fkagi.com) 内部的 AI 功能。\n\n请查看一个 [示例 Colab 笔记本](https:\u002F\u002Fcolab.research.google.com\u002Fdrive\u002F1pecKGCCru_Jvx7v0WRNrW441EBlcS5qS#scrollTo=Eh6o8m7d8eOk)，其中它被用来根据用户声明的兴趣过滤 [Kagi Small Web](https:\u002F\u002Fkagi.com\u002Fsmallweb) 的 RSS 订阅内容。\n\n\n## 安装\n\n要安装 VectorDB，请使用 pip：\n\n```\npip install vectordb2\n```\n\n## 使用方法\n\n以下是一个快速示例，将数据加载到内存中并执行检索。所有数据，包括嵌入和向量搜索，都将在本地处理，对用户完全透明，同时提供尽可能高的性能。\n\n```python\nfrom vectordb import Memory\n\n# Memory 是您想要存储或搜索的所有内容的容器。\nmemory = Memory()\n\nmemory.save(\n    [\"苹果是绿色的\", \"橙子是橙色的\"],  # 保存您的文本内容。对于长文本，我们会自动进行分块处理\n    [{\"url\": \"https:\u002F\u002Fapples.com\"}, {\"url\": \"https:\u002F\u002Foranges.com\"}],  # 可选：为内容关联任意元数据\n)\n\n# 搜索前 n 个相关结果，自动使用嵌入技术\nquery = \"绿色\"\nresults = memory.search(query, top_n=1)\n\nprint(results)\n```\n\n此操作将返回带有附加元数据和向量距离的文本块（其中 0 表示完全匹配，数值越大表示距离越远）。\n\n```json\n[\n  {\n    \"chunk\": \"苹果是绿色的\",\n    \"metadata\": {\"url\": \"https:\u002F\u002Fapples.com\"},\n    \"distance\": 0.87\n  }\n]\n```\n\n## 选项\n\n\n**Memory(memory_file=None, chunking_strategy={\"mode\":\"sliding_window\"},\nembeddings=\"normal\")**\n\n\n- `memory_file`: *可选。* 内存文件的路径。如果提供，内存将持久化到磁盘，并从此文件加载或保存。\n- `chunking_strategy`: *可选。* 包含分块模式的字典。\n  \n   选项：\\\n  `{'mode':'sliding_window', 'window_size': 240, 'overlap': 8}`   (默认)\\\n  `{'mode':'paragraph'}`\n- `embeddings`: *可选。* \n  \n   选项：\\\n   `fast` - 使用 Universal Sentence Encoder 4\\\n   `normal` - 使用 \"BAAI\u002Fbge-small-en-v1.5\" (默认)\\\n   `best` - 使用 \"BAAI\u002Fbge-base-en-v1.5\"\\\n   `multilingual` - 使用 Universal Sentence Encoder Multilingual Large 3\n\n\n   您也可以指定自定义的 HuggingFace 模型名称，例如 `TaylorAI\u002Fbge-micro-v2`。更多信息请参阅 [预训练模型](https:\u002F\u002Fwww.sbert.net\u002Fdocs\u002Fpretrained_models.html) 和 [MTEB](https:\u002F\u002Fhuggingface.co\u002Fspaces\u002Fmteb\u002Fleaderboard)。\n\n**Memory.save(texts, metadata, memory_file=None)**\n\n将内容保存到内存中。元数据会自动优化以减少资源占用。\n\n- `texts`: *必填。* 要保存的文本或文本列表。\n- `metdata`: *可选。* 与文本关联的元数据或元数据列表。\n- `memory_file`: *可选。* 用于持久化内存文件的路径。默认情况下\n\n**Memory.search(query, top_n=5, unique=False, batch_results=\"flatten\")**\n\n在内存中进行搜索。\n\n- `query`: *必填。* 查询文本或查询列表（有关如何处理列表查询的结果，请参阅下方的 `batch_results` 选项）。\n- `top_n`: *可选。* 返回最相似文本块的数量（默认：5）。\n- `unique`: *可选。* 仅返回来自不同原始文本的文本块（来自同一文本的重复块将被忽略）。请注意，这可能会导致返回的文本块数量少于请求的数量（默认：False）。\n- `batch_results`: *可选。* 当输入为查询列表时，输出算法可以是“flatten”或“diverse”。Flatten 会返回所有查询中最接近的邻居，这意味着所有结果可能都来自单个查询。而“diverse”则会尝试分散结果，使每个查询的最近邻都能被均匀地加入（先按所有查询的最近邻排序，再按次近邻排序，依此类推）。（默认：“flatten”）\n\n**Memory.clear()**\n\n清空内存。\n\n\n**Memory.dump()**\n\n打印内存中的内容。\n\n## 示例\n\n```python\nfrom vectordb import Memory\n\nmemory = Memory(\n    chunking_strategy={\"mode\": \"sliding_window\", \"window_size\": 128, \"overlap\": 16}, embeddings='TaylorAI\u002Fbge-micro-v2'\n)\n\ntexts = [\n    \"\"\"\n机器学习是一种数据分析方法，能够自动构建分析模型。\n\n它是人工智能的一个分支，其核心思想是让系统能够从数据中学习、识别模式，并在尽量减少人类干预的情况下做出决策。\n\n机器学习算法会基于包含期望输出示例的数据集进行训练。例如，用于图像分类的机器学习算法可能会在包含猫和狗图像的数据集上进行训练。一旦算法训练完成，它就可以用来对新数据进行预测。比如，该图像分类算法可以用来判断一张新图片中是猫还是狗。\n\n机器学习算法可以应用于解决各种各样的问题。常见的应用场景包括：\n\n分类：将数据划分为不同的类别。例如，机器学习算法可以用来将电子邮件分类为垃圾邮件或非垃圾邮件。\n\n回归：预测连续值。例如，机器学习算法可以用来预测房屋的价格。\n\n聚类：找出相似的数据点群组。例如，机器学习算法可以用来发现具有相似购买习惯的客户群体。\n\n异常检测：识别与整体数据不同的异常点。例如，机器学习算法可以用来检测欺诈性的信用卡交易。\n\n机器学习是一种功能强大的工具，能够解决多种多样的问题。随着可用数据量的持续增长，机器学习在未来很可能会变得更加重要。\n\"\"\",\n    \"\"\"\n人工智能（AI）是指通过编程使机器模拟人类智能，从而像人类一样思考并模仿人类行为。\n\n该术语也可以指任何表现出与人类思维相关特征的机器，例如学习和解决问题的能力。\n\n人工智能研究在开发解决广泛问题的有效技术方面取得了巨大成功，从游戏博弈到医学诊断等各个领域。\n\n然而，要真正达到人类的智能水平，人工智能还有很长的路要走。其中一个主要挑战在于，人类智能极其复杂且尚未完全被理解。\n\n尽管面临诸多挑战，人工智能仍是一个快速发展的领域，有望彻底改变我们生活的许多方面。人工智能的一些潜在好处包括：\n\n提高生产力：人工智能可以自动化目前由人类执行的任务，从而让我们有更多时间从事更具创造性和成就感的工作。\n\n改善决策能力：人工智能可以根据比人类通常能够获取的更广泛的数据做出更明智的决策。\n\n增强创造力：人工智能可以生成新的想法和解决方案，超越人类自身的想象范围。\n\n当然，人工智能也伴随着一些潜在风险，例如：\n\n工作岗位流失：随着人工智能能力的提升，部分人类工作岗位可能会被取代。\n\n武器化：人工智能可能被用于研发比现有武器更强大、更具破坏性的新型武器。\n\n失去控制：如果人工智能变得过于强大，我们可能会失去对其的控制，进而带来灾难性的后果。\n\n因此，在继续发展这项技术的过程中，我们需要仔细权衡人工智能的潜在收益与风险。通过周密的规划和监督，人工智能确实有可能让世界变得更美好；但若处理不当，也可能引发严重问题。\n\"\"\",\n]\n\nmetadata_list = [\n    {\n        \"title\": \"机器学习简介\",\n        \"url\": \"https:\u002F\u002Fexample.com\u002Fintroduction-to-machine-learning\",\n    },\n    {\n        \"title\": \"人工智能简介\",\n        \"url\": \"https:\u002F\u002Fexample.com\u002Fintroduction-to-artificial-intelligence\",\n    },\n]\n\nmemory.save(texts, metadata_list)\n\nquery = \"人工智能与机器学习之间有什么关系？\"\nresults = memory.search(query, top_n=3, unique=True)\nprint(results)\n\n# 因设置了unique参数为True，将返回两条结果\n```\n\n输出：\n```json\n[\n  {\n    \"chunk\": \"人工智能（AI）是指通过编程使机器模拟人类智能，从而像人类一样思考并模仿人类行为。该术语也可以指任何表现出与人类思维相关特征的机器，例如学习和解决问题的能力。人工智能研究在开发解决广泛问题的有效技术方面取得了巨大成功，从游戏博弈到医学诊断等各个领域。然而，要真正达到人类的智能水平，人工智能还有很长的路要走。其中一个主要挑战在于，人类智能极其复杂且尚未完全被理解。尽管面临诸多挑战，人工智能仍是一个快速发展的领域，有望彻底改变我们生活的许多方面。人工智能的一些潜在好处包括：增加\",\n    \"metadata\": {\n      \"title\": \"人工智能简介\",\n      \"url\": \"https:\u002F\u002Fexample.com\u002Fintroduction-to-artificial-intelligence\"\n    },\n    \"distance\": 0.87\n  },\n  {\n    \"chunk\": \"机器学习是一种数据分析方法，能够自动构建分析模型。它是人工智能的一个分支，其核心思想是让系统能够从数据中学习、识别模式并以最小的人为干预做出决策。机器学习算法会基于包含期望输出示例的数据集进行训练。例如，用于图像分类的机器学习算法可能会在包含猫和狗图像的数据集上进行训练。一旦算法训练完成，它就可以用来对新数据进行预测。例如，该图像分类算法可以用来判断一张新图片中是猫还是狗。机器学习算法可以用于\",\n    \"metadata\": {\n      \"title\": \"机器学习简介\",\n      \"url\": \"https:\u002F\u002Fexample.com\u002Fintroduction-to-machine-learning\"\n    },\n    \"distance\": 0.83\n  }\n]\n\n```\n\n## 嵌入模型性能分析\n\n\n我们使用标准化基准持续评估嵌入模型（分数越高越好）。平均延迟是在本地 CPU 上测量的（数值越低越好）。基准数据来自 [MTEB](https:\u002F\u002Fhuggingface.co\u002Fspaces\u002Fmteb\u002Fleaderboard)。\n\n\n\n| 模型                                         | 延迟  | 基准测试 1 | 基准测试 2 | 基准测试 3 | 基准测试 4 |\n|-----------------------------------------------|----------|-------------|-------------|-------------|-------------|\n| all-mpnet-base-v2                              | 6.12 s   | 80.28       | 65.07       | 43.69       | 83.04       |\n| all-MiniLM-L6-v2                               | 1.14 s   | 78.9        | 63.05       | 42.35       | 82.37       |\n| BAAI\u002Fbge-large-en-v1.5                         | 20.8 s   | 83.11       | 75.97       | 46.08       | 87.12       |\n| BAAI\u002Fbge-base-en-v1.5                          | 6.48 s   | 82.4        | 75.53       | 45.77       | 86.55       |\n| BAAI\u002Fbge-small-en-v1.5                         | 1.85 s   | 81.59       | 74.14       | 43.82       | 84.92       |\n| TaylorAI\u002Fbge-micro-v2                          | 0.671 s  | 78.65       | 68.04       | 39.18       | 82.81       |\n| TaylorAI\u002Fgte-tiny                              | 1.25 s   | 80.46       | 70.35       | 42.09       | 82.83       |\n| thenlper\u002Fgte-base                              | 6.28 s   | 82.3        | 73.01       | 46.2        | 84.57       |\n| thenlper\u002Fgte-small                             | 2.14 s   | 82.07       | 72.31       | 44.89       | 83.54       |\n| universal-sentence-encoder-large\u002F5             | 0.769 s  | 74.05       | 67.9        | 37.82       | 79.53       |\n| universal-sentence-encoder-multilingual-large\u002F3| 1.02 s   | 75.35       | 65.78       | 35.06       | 79.62       |\n| universal-sentence-encoder-multilingual\u002F3      | 0.162 s  | 75.39       | 63.42       | 34.82       | 75.43       |\n| universal-sentence-encoder\u002F4                   | 0.019 s  | 72.04       | 64.45       | 35.71       | 76.23       |\n\n*CPU 上的相对嵌入延迟*\n![CPU 上的嵌入延迟](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkagisearch_vectordb_readme_4e80c3225a98.png)\n\n*GPU 上的相对嵌入延迟*\n![GPU 上的嵌入延迟](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkagisearch_vectordb_readme_06ac8b85407c.png)\n\n\n![嵌入质量](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkagisearch_vectordb_readme_de2aa6f43733.png)\n\n![嵌入散点图](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkagisearch_vectordb_readme_1678d2206b49.png)\n\n\n\n## 向量检索性能分析\n\n向量数据库同样针对检索速度进行了优化。对于小规模数据集（少于 4000 个分块），我们会自动使用 [Faiss](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Ffaiss)；而对于大规模数据集，则使用 [mrpt](https:\u002F\u002Fgithub.com\u002Fvioshyvo\u002Fmrpt)，以确保在各种应用场景下都能达到最佳性能。\n\n![向量搜索引擎对比](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkagisearch_vectordb_readme_7139ba9539e8.png)\n\n## 许可证\n\nMIT 许可证。","# VectorDB 快速上手指南\n\nVectorDB 是一个简单、轻量级且完全本地化的端到端解决方案，专为基于嵌入（Embeddings）的文本检索设计。它具有低延迟和小内存占用的特点，所有数据处理（包括向量化和搜索）均在本地完成，无需依赖外部服务。\n\n## 环境准备\n\n- **操作系统**：支持 Windows、macOS 和 Linux。\n- **Python 版本**：建议 Python 3.8 及以上版本。\n- **前置依赖**：无需额外安装系统级依赖，库会自动处理模型下载和运行时环境。\n- **网络要求**：首次运行时需联网下载预训练嵌入模型（后续可离线使用）。若网络受限，可配置国内镜像或手动下载模型。\n\n## 安装步骤\n\n使用 pip 直接安装：\n\n```bash\npip install vectordb2\n```\n\n> **提示**：国内用户若下载缓慢，可尝试使用清华或阿里镜像源加速：\n> ```bash\n> pip install vectordb2 -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n> ```\n\n## 基本使用\n\n以下是最简示例，展示如何初始化内存、保存文本数据并执行语义搜索。\n\n```python\nfrom vectordb import Memory\n\n# 初始化内存对象（所有数据存储在内存中）\nmemory = Memory()\n\n# 保存文本内容及可选元数据\n# 长文本会自动分块处理\nmemory.save(\n    [\"apples are green\", \"oranges are orange\"],\n    [{\"url\": \"https:\u002F\u002Fapples.com\"}, {\"url\": \"https:\u002F\u002Foranges.com\"}],\n)\n\n# 执行语义搜索\nquery = \"green\"\nresults = memory.search(query, top_n=1)\n\nprint(results)\n```\n\n**输出示例：**\n\n```json\n[\n  {\n    \"chunk\": \"apples are green\",\n    \"metadata\": {\"url\": \"https:\u002F\u002Fapples.com\"},\n    \"distance\": 0.87\n  }\n]\n```\n\n### 参数说明简述\n\n- `Memory()` 支持自定义分块策略和嵌入模型，例如：\n  ```python\n  memory = Memory(\n      chunking_strategy={\"mode\": \"sliding_window\", \"window_size\": 128, \"overlap\": 16},\n      embeddings='TaylorAI\u002Fbge-micro-v2'\n  )\n  ```\n- `save(texts, metadata)`：`texts` 为必填，`metadata` 可选。\n- `search(query, top_n=5)`：返回最相关的文本片段及距离值（越小越匹配）。\n\n该工具适合快速构建本地知识库、文档检索或个性化内容过滤等应用场景。","一位独立开发者正在构建一个本地运行的个人知识库助手，需要从数百篇保存的技术博客和笔记中快速检索与当前编程问题最相关的片段。\n\n### 没有 vectordb 时\n- **检索精度低**：传统的关键词匹配无法理解语义，搜索\"Python 异步”时无法命中包含\"asyncio 并发”但未出现关键词的文章。\n- **长文处理困难**：手动编写代码对长篇文章进行切片（Chunking）逻辑复杂，容易破坏上下文连贯性或丢失关键信息。\n- **部署门槛高**：引入大型向量数据库（如 Milvus 或 Qdrant）需要配置 Docker 容器和管理外部服务，增加了本地轻量级应用的运维负担。\n- **响应延迟大**：复杂的数据库连接和网络开销导致查询响应慢，无法实现“即输即得”的流畅交互体验。\n\n### 使用 vectordb 后\n- **语义理解精准**：vectordb 内置嵌入模型自动将文本转化为向量，能直接识别\"Python 异步”与\"asyncio\"的语义关联，返回高度相关的内容。\n- **自动智能切片**：只需传入原始长文本，vectordb 通过滑动窗口等策略自动完成分块，无需开发者关心底层数据处理细节。\n- **极简本地集成**：作为一个纯 Python 包，vectordb 无需任何外部服务或 Docker，直接通过 `pip install` 即可在本地内存或文件中运行。\n- **毫秒级响应**：得益于轻量级架构和低内存占用，查询过程完全在本地高速完成，为用户提供实时的知识反馈。\n\nvectordb 让开发者仅需几行代码，就能在本地应用中拥有媲美企业级系统的语义检索能力，极大降低了 AI 功能的落地成本。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkagisearch_vectordb_912b5856.png","kagisearch","Kagi Search","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fkagisearch_797fd36f.jpg","Kagi search engine.",null,"https:\u002F\u002Fkagi.com","https:\u002F\u002Fgithub.com\u002Fkagisearch",[81],{"name":82,"color":83,"percentage":84},"Python","#3572A5",100,786,42,"2026-04-10T03:31:00","MIT","未说明","非必需。默认在 CPU 上运行（README 提及延迟测试基于 CPU），支持通过 HuggingFace 加载自定义模型，但未明确指定 GPU 型号或 CUDA 版本要求。","未说明（描述为'轻量级'且'内存占用小'，具体数值取决于所选嵌入模型大小）",{"notes":93,"python":89,"dependencies":94},"该工具完全本地运行，无需外部服务。安装命令为 `pip install vectordb2`。用户可选择不同的预训练嵌入模型（如 fast, normal, best, multilingual 或自定义 HuggingFace 模型），模型选择将直接影响内存占用和推理延迟。数据可持久化存储到磁盘。",[95,96,97,98],"vectordb2","sentence-transformers (隐含，用于加载 SBERT\u002FMTEB 模型)","torch (隐含，深度学习后端)","transformers (隐含，HuggingFace 生态)",[35,15,13,14],[101,102,103,104,105],"ai","artificial-intelligence","llm","llms","machine-learning","2026-03-27T02:49:30.150509","2026-04-17T09:54:10.475971",[109,114,119,124,129],{"id":110,"question_zh":111,"answer_zh":112,"source_url":113},36350,"搜索空数据库或数据量过小时报错 'ValueError: not enough values to unpack' 怎么办？","该错误通常发生在尝试搜索空字符串或未保存任何数据时（向量形状为 (0,)）。目前的临时解决方案是在调用搜索前自行添加 try\u002Fexcept 块来捕获异常，或者在代码层面进行早期检查以避免对空条目执行搜索操作。维护者已确认这是一个需要修复的 Bug，建议用户关注后续版本更新以获取正式修复。","https:\u002F\u002Fgithub.com\u002Fkagisearch\u002Fvectordb\u002Fissues\u002F10",{"id":115,"question_zh":116,"answer_zh":117,"source_url":118},36351,"如果我已经自己完成了文本分块，如何跳过 vectordb 内置的分块方法？","可以通过配置滑动窗口策略来变相跳过自动分块。具体做法是将 `window_size`（窗口大小）设置为一个非常大的值，这样内置的分块逻辑就不会对文本进行实质性的切割，从而直接使用你提供的文本块进行嵌入和检索。","https:\u002F\u002Fgithub.com\u002Fkagisearch\u002Fvectordb\u002Fissues\u002F15",{"id":120,"question_zh":121,"answer_zh":122,"source_url":123},36352,"初始化 Memory 类时传入 'embeddings' 参数报错 'TypeError: unexpected keyword argument' 是怎么回事？","这是因为当前版本的 API 可能已变更或不支持直接在 `__init__` 中通过 `embeddings` 关键字参数传入模型名称。由于项目目前资源有限，维护者暂时不接受此类 Bug 报告，建议用户直接查阅最新源代码或提交合并请求（MR）来适配新的初始化方式。","https:\u002F\u002Fgithub.com\u002Fkagisearch\u002Fvectordb\u002Fissues\u002F7",{"id":125,"question_zh":126,"answer_zh":127,"source_url":128},36353,"从磁盘加载 Memory 文件失败或丢失元数据（metadata）如何解决？","这是由于元数据和嵌入向量被分开存储导致的兼容性问题。目前的建议方案是在保存时将两者合并为一个字典，加载时再将其拆分回两个部分。虽然这会稍微增加复杂性，但能确保数据完整加载。维护者认可此方案并欢迎社区提交相关修复代码。","https:\u002F\u002Fgithub.com\u002Fkagisearch\u002Fvectordb\u002Fissues\u002F5",{"id":130,"question_zh":131,"answer_zh":132,"source_url":133},36354,"查询结果中能否返回向量之间的距离或设置最小阈值？","目前该功能尚未默认实现，但维护者认为这是一个很好的想法并欢迎贡献。如果需要此功能，建议开发者参考源码自行添加距离返回逻辑或阈值过滤功能，并提交合并请求（MR）。","https:\u002F\u002Fgithub.com\u002Fkagisearch\u002Fvectordb\u002Fissues\u002F2",[]]