[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-marv1nnnnn--llm-min.txt":3,"tool-marv1nnnnn--llm-min.txt":64},[4,17,27,35,43,56],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":16},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,3,"2026-04-05T11:01:52",[13,14,15],"开发框架","图像","Agent","ready",{"id":18,"name":19,"github_repo":20,"description_zh":21,"stars":22,"difficulty_score":23,"last_commit_at":24,"category_tags":25,"status":16},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",138956,2,"2026-04-05T11:33:21",[13,15,26],"语言模型",{"id":28,"name":29,"github_repo":30,"description_zh":31,"stars":32,"difficulty_score":23,"last_commit_at":33,"category_tags":34,"status":16},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",107662,"2026-04-03T11:11:01",[13,14,15],{"id":36,"name":37,"github_repo":38,"description_zh":39,"stars":40,"difficulty_score":23,"last_commit_at":41,"category_tags":42,"status":16},3704,"NextChat","ChatGPTNextWeb\u002FNextChat","NextChat 是一款轻量且极速的 AI 助手，旨在为用户提供流畅、跨平台的大模型交互体验。它完美解决了用户在多设备间切换时难以保持对话连续性，以及面对众多 AI 模型不知如何统一管理的痛点。无论是日常办公、学习辅助还是创意激发，NextChat 都能让用户随时随地通过网页、iOS、Android、Windows、MacOS 或 Linux 端无缝接入智能服务。\n\n这款工具非常适合普通用户、学生、职场人士以及需要私有化部署的企业团队使用。对于开发者而言，它也提供了便捷的自托管方案，支持一键部署到 Vercel 或 Zeabur 等平台。\n\nNextChat 的核心亮点在于其广泛的模型兼容性，原生支持 Claude、DeepSeek、GPT-4 及 Gemini Pro 等主流大模型，让用户在一个界面即可自由切换不同 AI 能力。此外，它还率先支持 MCP（Model Context Protocol）协议，增强了上下文处理能力。针对企业用户，NextChat 提供专业版解决方案，具备品牌定制、细粒度权限控制、内部知识库整合及安全审计等功能，满足公司对数据隐私和个性化管理的高标准要求。",87618,"2026-04-05T07:20:52",[13,26],{"id":44,"name":45,"github_repo":46,"description_zh":47,"stars":48,"difficulty_score":23,"last_commit_at":49,"category_tags":50,"status":16},2268,"ML-For-Beginners","microsoft\u002FML-For-Beginners","ML-For-Beginners 是由微软推出的一套系统化机器学习入门课程，旨在帮助零基础用户轻松掌握经典机器学习知识。这套课程将学习路径规划为 12 周，包含 26 节精炼课程和 52 道配套测验，内容涵盖从基础概念到实际应用的完整流程，有效解决了初学者面对庞大知识体系时无从下手、缺乏结构化指导的痛点。\n\n无论是希望转型的开发者、需要补充算法背景的研究人员，还是对人工智能充满好奇的普通爱好者，都能从中受益。课程不仅提供了清晰的理论讲解，还强调动手实践，让用户在循序渐进中建立扎实的技能基础。其独特的亮点在于强大的多语言支持，通过自动化机制提供了包括简体中文在内的 50 多种语言版本，极大地降低了全球不同背景用户的学习门槛。此外，项目采用开源协作模式，社区活跃且内容持续更新，确保学习者能获取前沿且准确的技术资讯。如果你正寻找一条清晰、友好且专业的机器学习入门之路，ML-For-Beginners 将是理想的起点。",84991,"2026-04-05T10:45:23",[14,51,52,53,15,54,26,13,55],"数据工具","视频","插件","其他","音频",{"id":57,"name":58,"github_repo":59,"description_zh":60,"stars":61,"difficulty_score":10,"last_commit_at":62,"category_tags":63,"status":16},3128,"ragflow","infiniflow\u002Fragflow","RAGFlow 是一款领先的开源检索增强生成（RAG）引擎，旨在为大语言模型构建更精准、可靠的上下文层。它巧妙地将前沿的 RAG 技术与智能体（Agent）能力相结合，不仅支持从各类文档中高效提取知识，还能让模型基于这些知识进行逻辑推理和任务执行。\n\n在大模型应用中，幻觉问题和知识滞后是常见痛点。RAGFlow 通过深度解析复杂文档结构（如表格、图表及混合排版），显著提升了信息检索的准确度，从而有效减少模型“胡编乱造”的现象，确保回答既有据可依又具备时效性。其内置的智能体机制更进一步，使系统不仅能回答问题，还能自主规划步骤解决复杂问题。\n\n这款工具特别适合开发者、企业技术团队以及 AI 研究人员使用。无论是希望快速搭建私有知识库问答系统，还是致力于探索大模型在垂直领域落地的创新者，都能从中受益。RAGFlow 提供了可视化的工作流编排界面和灵活的 API 接口，既降低了非算法背景用户的上手门槛，也满足了专业开发者对系统深度定制的需求。作为基于 Apache 2.0 协议开源的项目，它正成为连接通用大模型与行业专有知识之间的重要桥梁。",77062,"2026-04-04T04:44:48",[15,14,13,26,54],{"id":65,"github_repo":66,"name":67,"description_en":68,"description_zh":69,"ai_summary_zh":69,"readme_en":70,"readme_zh":71,"quickstart_zh":72,"use_case_zh":73,"hero_image_url":74,"owner_login":75,"owner_name":76,"owner_avatar_url":77,"owner_bio":76,"owner_company":76,"owner_location":76,"owner_email":76,"owner_twitter":76,"owner_website":78,"owner_url":79,"languages":80,"stars":85,"forks":86,"last_commit_at":87,"license":88,"difficulty_score":23,"env_os":89,"env_gpu":90,"env_ram":90,"env_deps":91,"category_tags":97,"github_topics":98,"view_count":23,"oss_zip_url":76,"oss_zip_packed_at":76,"status":16,"created_at":102,"updated_at":103,"faqs":104,"releases":105},2700,"marv1nnnnn\u002Fllm-min.txt","llm-min.txt","Min.js Style Compression of Tech Docs for LLM Context","llm-min.txt 是一款专为大语言模型（LLM）设计的文档压缩工具，旨在解决 AI 编程助手因知识截止而无法获取最新技术库信息的问题。当前主流的解决方案（如 llms.txt）往往文件过大，容易超出 AI 的上下文窗口限制，或者依赖不透明的外部服务。llm-min.txt 借鉴了 Web 开发中\"min.js\"的理念，将冗长的技术文档进行极致精简，去除冗余内容，仅保留机器阅读所需的核心信息。\n\n通过这种“机器优先”的压缩格式，llm-min.txt 能在大幅减少 Token 消耗的同时，确保 AI 准确理解最新的 API 变更和功能特性，从而生成更可靠、与时俱进的代码建议。它特别适合开发者、技术研究人员以及经常使用 AI 编程辅助工具（如 Cursor、GitHub Copilot）的用户。无论是维护开源项目还是探索新技术栈，llm-min.txt 都能帮助用户轻松构建轻量级、高精度的上下文环境，让 AI 真正“读懂”最新文档，填补知识鸿沟。","# llm-min.txt: Min.js Style Compression of Tech Docs for LLM Context 🤖\n\n[![License: MIT](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FLicense-MIT-yellow.svg)](https:\u002F\u002Fopensource.org\u002Flicenses\u002FMIT)\n[![Python Version](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FPython-3.10%2B-blue)](https:\u002F\u002Fwww.python.org\u002Fdownloads\u002F)\n[![Gemini API](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FGemini-API-green)](https:\u002F\u002Fconsole.cloud.google.com\u002Fapis\u002Fapi\u002Fgemini.googleapis.com\u002Foverview?project=llm-min)\n\n## 📜 Table of Contents\n\n- [llm-min.txt: Min.js Style Compression of Tech Docs for LLM Context 🤖](#llm-mintxt-minjs-style-compression-of-tech-docs-for-llm-context-)\n  - [📜 Table of Contents](#-table-of-contents)\n  - [What is `llm-min.txt` and Why is it Important?](#what-is-llm-mintxt-and-why-is-it-important)\n  - [Understanding `llm-min.txt`: A Machine-Optimized Format 🧩](#understanding-llm-mintxt-a-machine-optimized-format-)\n  - [Does it Really Work? Visualizing the Impact](#does-it-really-work-visualizing-the-impact)\n  - [Quick Start 🚀](#quick-start-)\n  - [Output Directory Structure 📂](#output-directory-structure-)\n  - [Choosing the Right AI Model (Why Gemini) 🧠](#choosing-the-right-ai-model-why-gemini-)\n  - [How it Works: A Look Inside (src\u002Fllm\\_min) ⚙️](#how-it-works-a-look-inside-srcllm_min-️)\n  - [What's Next? Future Plans 🔮](#whats-next-future-plans-)\n  - [Common Questions (FAQ) ❓](#common-questions-faq-)\n  - [Want to Help? Contributing 🤝](#want-to-help-contributing-)\n  - [License 📜](#license-)\n\n---\n\n## What is `llm-min.txt` and Why is it Important?\n\nIf you've ever used an AI coding assistant (like GitHub Copilot, Cursor, or others powered by Large Language Models - LLMs), you've likely encountered situations where they don't know about the latest updates to programming libraries. This knowledge gap exists because AI models have a \"knowledge cutoff\" – a point beyond which they haven't learned new information. Since software evolves rapidly, this limitation can lead to outdated recommendations and broken code.\n\nSeveral innovative approaches have emerged to address this challenge:\n- \u003Ca href=\"https:\u002F\u002Fllmstxt.org\u002F\">\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmarv1nnnnn_llm-min.txt_readme_3796aa9e32be.png\" alt=\"llms.txt logo\" width=\"60\" style=\"vertical-align:middle; margin-right:8px;\"\u002F>\u003C\u002Fa> [llms.txt](https:\u002F\u002Fllmstxt.org\u002F)\n  A community-driven initiative where contributors create reference files (`llms.txt`) containing up-to-date library information specifically formatted for AI consumption.\n\n- \u003Ca href=\"https:\u002F\u002Fcontext7.com\u002F\">\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmarv1nnnnn_llm-min.txt_readme_4263a573bd30.png\" alt=\"Context7 logo\" width=\"60\" style=\"vertical-align:middle; margin-left:4px;\"\u002F>\u003C\u002Fa> [Context7](https:\u002F\u002Fcontext7.com\u002F)\n  A service that dynamically provides contextual information to AIs, often by intelligently summarizing documentation.\n\nWhile these solutions are valuable, they face certain limitations:\n- `llms.txt` files can become extraordinarily large – some exceeding **800,000** tokens (word fragments). This size can overwhelm many AI systems' context windows.\n  \n    \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmarv1nnnnn_llm-min.txt_readme_ccf4f5923f08.png\" alt=\"Token comparison for llms.txt\" width=\"500\"\u002F>\n    \n    Many shorter `llms.txt` variants simply contain links to official documentation, requiring the AI to fetch and process those documents separately. Even the comprehensive versions (`llms-full.txt`) often exceed what most AI assistants can process at once. Additionally, these files may not always reflect the absolute latest documentation.\n\n- `Context7` operates somewhat as a \"black box\" – while useful, its precise information selection methodology isn't fully transparent to users. It primarily works with GitHub code repositories or existing `llms.txt` files, rather than any arbitrary software package.\n\n**`llm-min.txt` offers a fresh approach:**\n\n\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmarv1nnnnn_llm-min.txt_readme_c1bd7804d6e7.png\" alt=\"llm-min.txt icon\" width=\"300\"\u002F>\n\nInspired by `min.js` files in web development (JavaScript with unnecessary elements removed), `llm-min.txt` adopts a similar philosophy for technical documentation. Instead of feeding an AI a massive, verbose manual, we leverage another AI to distill that documentation into a super-condensed, highly structured summary. The resulting `llm-min.txt` file captures only the most essential information needed to understand a library's usage, packaged in a format optimized for AI assistants rather than human readers.\n\nModern AI reasoning capabilities excel at this distillation process, creating remarkably efficient knowledge representations that deliver maximum value with minimal token consumption.\n\n---\n## Understanding `llm-min.txt`: A Machine-Optimized Format 🧩\n\nThe `llm-min.txt` file utilizes the **Structured Knowledge Format (SKF)** – a compact, machine-optimized format designed for efficient AI parsing rather than human readability. This format organizes technical information into distinct, highly structured sections with precise relationships.\n\n**Key Elements of the SKF Format:**\n\n1.  **Header Metadata:** Every file begins with essential contextual information:\n    *   `# IntegratedKnowledgeManifest_SKF`: Format identifier and version\n    *   `# SourceDocs: [...]`: Original documentation sources\n    *   `# GenerationTimestamp: ...`: Creation timestamp\n    *   `# PrimaryNamespace: ...`: Top-level package\u002Fnamespace, critical for understanding import paths\n\n2.  **Three Core Structured Sections:** The content is organized into distinct functional categories:\n    *   `# SECTION: DEFINITIONS (Prefix: D)`: Describes the static aspects of the library:\n        *   Canonical component definitions with unique global IDs (e.g., `D001:G001_MyClass`)\n        *   Namespace paths relative to `PrimaryNamespace`\n        *   Method signatures with parameters and return types\n        *   Properties\u002Ffields with types and access modifiers\n        *   Static relationships like inheritance or interface implementation\n        *   **Important:** This section effectively serves as the glossary for the file, as the traditional glossary (`G` section) is used during generation but deliberately omitted from the final output to save space.\n\n    *   `# SECTION: INTERACTIONS (Prefix: I)`: Captures dynamic behaviors within the library:\n        *   Method invocations (`INVOKES`)\n        *   Component usage patterns (`USES_COMPONENT`)\n        *   Event production\u002Fconsumption\n        *   Error raising and handling logic, with references to specific error types\n\n    *   `# SECTION: USAGE_PATTERNS (Prefix: U)`: Provides concrete usage examples:\n        *   Common workflows for core functionality\n        *   Step-by-step sequences involving object creation, configuration, method invocation, and error handling\n        *   Each pattern has a descriptive name (e.g., `U_BasicCrawl`) with numbered steps (`U_BasicCrawl.1`, `U_BasicCrawl.2`)\n\n3.  **Line-Based Structure:** Each item appears on its own line following precise formatting conventions that enable reliable machine parsing.\n\n**Example SKF Format (Simplified):**\n\n```text\n# IntegratedKnowledgeManifest_SKF\u002F1.4 LA\n# SourceDocs: [example-lib-docs]\n# GenerationTimestamp: 2024-05-28T12:00:00Z\n# PrimaryNamespace: example_lib\n\n# SECTION: DEFINITIONS (Prefix: D)\n# Format_PrimaryDef: Dxxx:Gxxx_Entity [DEF_TYP] [NAMESPACE \"relative.path\"] [OPERATIONS {op1:RetT(p1N:p1T)}] [ATTRIBUTES {attr1:AttrT1}] (\"Note\")\n# ---\nD001:G001_Greeter [CompDef] [NAMESPACE \".\"] [OPERATIONS {greet:Str(name:Str)}] (\"A simple greeter class\")\nD002:G002_AppConfig [CompDef] [NAMESPACE \"config\"] [ATTRIBUTES {debug_mode:Bool(\"RO\")}] (\"Application configuration\")\n# ---\n\n# SECTION: INTERACTIONS (Prefix: I)\n# Format: Ixxx:Source_Ref INT_VERB Target_Ref_Or_Literal (\"Note_Conditions_Error(Gxxx_ErrorType)\")\n# ---\nI001:G001_Greeter.greet INVOKES G003_Logger.log (\"Logs greeting activity\")\n# ---\n\n# SECTION: USAGE_PATTERNS (Prefix: U)\n# Format: U_Name:PatternTitleKeyword\n#         U_Name.N:[Actor_Or_Ref] ACTION_KEYWORD (Target_Or_Data_Involving_Ref) -> [Result_Or_State_Change_Involving_Ref]\n# ---\nU_BasicGreeting:Basic User Greeting\nU_BasicGreeting.1:[User] CREATE (G001_Greeter) -> [greeter_instance]\nU_BasicGreeting.2:[greeter_instance] INVOKE (greet name='Alice') -> [greeting_message]\n# ---\n# END_OF_MANIFEST\n```\n\nThe `llm-min-guideline.md` file (generated alongside `llm-min.txt`) provides detailed decoding instructions and schema definitions that enable an AI to correctly interpret the SKF format. It serves as the essential companion document explaining the notation, field meanings, and relationship types used throughout the file.\n\n---\n\n## Does it Really Work? Visualizing the Impact\n\n`llm-min.txt` achieves dramatic token reduction while preserving the essential knowledge needed by AI assistants. The chart below compares token counts between original library documentation (`llm-full.txt`) and the compressed `llm-min.txt` versions:\n\n![Token Compression Comparison](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmarv1nnnnn_llm-min.txt_readme_d802e5ac4ac1.png)\n\nThese results demonstrate token reductions typically ranging from 90-95%, with some cases exceeding 97%. This extreme compression, combined with the highly structured SKF format, enables AI tools to ingest and process library documentation far more efficiently than with raw text.\n\nIn our samples directory, you can examine these impressive results firsthand:\n*   `sample\u002Fcrawl4ai\u002Fllm-full.txt`: Original documentation (uncompressed)\n*   `sample\u002Fcrawl4ai\u002Fllm-min.txt`: The compressed SKF representation\n*   `sample\u002Fcrawl4ai\u002Fllm-min-guideline.md`: The format decoder companion file, also seen in [llm-min-guideline.md](src\u002Fllm_min\u002Fassets\u002Fllm_min_guideline.md)\n\nMost compressed files contain around 10,000 tokens – well within the processing capacity of modern AI assistants.\n\n**How to use it?**\n\nSimply reference the files in your AI-powered IDE's conversation, and watch your assistant immediately gain detailed knowledge of the library:\n\n![Demo](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmarv1nnnnn_llm-min.txt_readme_17d4db9331f8.gif)\n\n**How does it perform?**\n\nIt's necessary to make a benchmark but incredibly hard. LLM code generation is stochastic and the quality of the generated code depends on many factors. crawl4ai \u002F google-genai \u002F svelte are all packages current LLM failed to generate correct code for. Using `llm-min` will largely improve the success rate of code generation.\n\n---\n\n## Quick Start 🚀\n\nGetting started with `llm-min` is straightforward:\n\n**1. Installation:**\n\n*   **For regular users (recommended):**\n    ```bash\n    pip install llm-min\n\n    # Install required browser automation tools\n    playwright install\n    ```\n\n*   **For contributors and developers:**\n    ```bash\n    # Clone the repository (if not already done)\n    # git clone https:\u002F\u002Fgithub.com\u002Fyour-repo\u002Fllm-min.git\n    # cd llm-min\n\n    # Create and activate a virtual environment\n    python -m venv .venv\n    source .venv\u002Fbin\u002Factivate  # On Windows: .venv\\Scripts\\activate\n\n    # Install dependencies with UV (faster than pip)\n    uv sync\n    uv pip install -e .\n\n    # Optional: Set up pre-commit hooks for code quality\n    # uv pip install pre-commit\n    # pre-commit install\n    ```\n\n**2. Set Up Your Gemini API Key:** 🔑\n\n`llm-min` uses Google's Gemini AI to generate compressed documentation. You'll need a Gemini API key to proceed:\n\n*   **Best practice:** Set an environment variable named `GEMINI_API_KEY` with your key value:\n    ```bash\n    # Linux\u002FmacOS\n    export GEMINI_API_KEY=your_api_key_here\n    \n    # Windows (Command Prompt)\n    set GEMINI_API_KEY=your_api_key_here\n    \n    # Windows (PowerShell)\n    $env:GEMINI_API_KEY=\"your_api_key_here\"\n    ```\n\n*   **Alternative:** Supply your key directly via the `--gemini-api-key` command-line option.\n\nYou can obtain a Gemini API key from the [Google AI Studio](https:\u002F\u002Faistudio.google.com\u002Fapp\u002Fapikey) or Google Cloud Console.\n\n**3. Generate Your First `llm-min.txt` File:** 💻\n\nChoose one of the following input sources:\n\n| Input Source Options | Short | Type      | What it does                                                                 |\n|---------------------|-------|-----------|------------------------------------------------------------------------------|\n| `--input-folder`    | `-i`  | `DIRECTORY` | **📁 Process local documentation files.** Recursively scans a directory for `.md`, `.txt`, and `.rst` files. Web crawling is skipped when using this option. |\n| `--package`         | `-pkg`| `TEXT`    | **📦 Process a Python package.** Automatically finds and crawls the package's documentation website. |\n| `--doc-url`         | `-u`  | `TEXT`    | **🌐 Process a documentation website.** Directly crawls the specified URL. |\n\n| Configuration Options | Short | Type      | What it does                                                                 |\n|---------------------|-------|-----------|------------------------------------------------------------------------------|\n| `--output-dir`      | `-o`  | `DIRECTORY` | Where to save the generated files (default: `llm_min_docs`).                |\n| `--output-name`     | `-n`  | `TEXT`    | Give a custom name for the subfolder inside `output-dir`.                    |\n| `--library-version` | `-V`  | `TEXT`    | Specify the library version (useful when using `--input-folder` or `--doc-url`). |\n| `--max-crawl-pages` | `-p`  | `INTEGER` | Max web pages to read (default: 200; 0 means no limit). Only applies to web crawling. |\n| `--max-crawl-depth` | `-D`  | `INTEGER` | How many links deep to follow on a website (default: 3). Only applies to web crawling. |\n| `--chunk-size`      | `-c`  | `INTEGER` | How much text to give the AI at once (default: 0, which enables adaptive chunking). If 0, `llm-min` automatically determines an optimal size. |\n| `--gemini-api-key`  | `-k`  | `TEXT`    | Your Gemini API Key (if not set as an environment variable).                 |\n| `--gemini-model`    | `-m`  | `TEXT`    | Which Gemini model to use (default: `gemini-2.5-flash-lite-preview-06-17`).       |\n| `--force-reprocess` |       |           | Force reprocessing even if `llm-full.txt` exists and ignore intermediate files. |\n| `--save-fragments`  |       | `BOOLEAN` | Save intermediate fragments for debugging and retry capability (default: True). |\n| `--verbose`         | `-v`  |           | Show more detailed messages while it's working.                              |\n\n**Example Commands:**\n\n```bash\n# 📦 Process the \"typer\" Python package, save to \"my_docs\" folder\nllm-min -pkg \"typer\" -o my_docs -p 50\n\n# 🌐 Process the FastAPI documentation website\nllm-min -u \"https:\u002F\u002Ffastapi.tiangolo.com\u002F\" -o my_docs -p 50\n\n# 📁 Process documentation files in a local folder\nllm-min -i \".\u002Fdocs\" -o my_docs\n\n# 📁 Process local files with custom output name and version\nllm-min -i \".\u002Fmy-project-docs\" -o my_docs -n \"my-project\" -V \"1.2.3\"\n\n# 📁 Process a project's entire documentation directory structure\nllm-min -i \"\u002Fpath\u002Fto\u002Fproject\u002Fdocumentation\" -o project_docs --verbose\n```\n\n**Local Folder Processing Details:** 📁\n\nWhen using `--input-folder`, `llm-min` will:\n- Recursively scan the specified directory for documentation files\n- Process files with extensions: `.md` (Markdown), `.txt` (Plain text), `.rst` (reStructuredText)\n- Combine all found files into a single content stream\n- Skip web crawling entirely (making it faster and not requiring internet connectivity)\n- Preserve the original combined content in `llm-full.txt` and generate the compressed `llm-min.txt`\n\nThis is particularly useful for:\n- **Internal\u002Fproprietary documentation** that isn't available online\n- **Local project documentation** that you're developing\n- **Offline processing** when internet access is limited\n- **Custom documentation** in various formats\n\n**4. Programmatic Usage in Python:** 🐍\n\nYou can also integrate `llm-min` directly into your Python applications:\n\n```python\nfrom llm_min import LLMMinGenerator\nimport os\n\n# Configuration for the AI processing\nllm_config = {\n    \"api_key\": os.environ.get(\"GEMINI_API_KEY\"),  # Use environment variable\n    \"model_name\": \"gemini-2.5-flash-lite-preview-06-17\",  # Recommended model\n    \"chunk_size\": 600000,  # Characters per AI processing batch\n    \"max_crawl_pages\": 200,  # Maximum pages to crawl (only for web crawling)\n    \"max_crawl_depth\": 3,  # Link following depth (only for web crawling)\n}\n\n# Initialize the generator (output files will go to .\u002Fmy_output_docs\u002F[source_name]\u002F)\ngenerator = LLMMinGenerator(output_dir=\".\u002Fmy_output_docs\", llm_config=llm_config)\n\n# 📦 Generate llm-min.txt for a Python package\ntry:\n    generator.generate_from_package(\"requests\")\n    print(\"✅ Successfully created documentation for 'requests'!\")\nexcept Exception as e:\n    print(f\"❌ Error processing 'requests': {e}\")\n\n# 🌐 Generate llm-min.txt from a documentation URL\ntry:\n    generator.generate_from_url(\"https:\u002F\u002Ffastapi.tiangolo.com\u002F\")\n    print(\"✅ Successfully processed FastAPI documentation!\")\nexcept Exception as e:\n    print(f\"❌ Error processing URL: {e}\")\n\n# 📁 Generate llm-min.txt from local documentation files\ntry:\n    # Read and combine all documentation files from a local folder\n    import pathlib\n    docs_folder = pathlib.Path(\".\u002Fmy-project-docs\")\n    \n    # Collect content from supported file types\n    content = \"\"\n    for ext in [\".md\", \".txt\", \".rst\"]:\n        for file_path in docs_folder.rglob(f\"*{ext}\"):\n            with open(file_path, encoding=\"utf-8\") as f:\n                content += f\"# File: {file_path.name}\\n\\n\"\n                content += f.read() + \"\\n\\n---\\n\\n\"\n    \n    # Process the combined content\n    generator.generate_from_text(\n        input_content=content, \n        source_name=\"my-project\",\n        library_version=\"1.0.0\"  # Optional\n    )\n    print(\"✅ Successfully processed local documentation!\")\nexcept Exception as e:\n    print(f\"❌ Error processing local files: {e}\")\n```\n\nFor a complete list of command-line options, run:\n```bash\nllm-min --help\n```\n\n---\n\n## Output Directory Structure 📂\n\nWhen `llm-min` completes its processing, it creates the following organized directory structure:\n\n```text\nyour_chosen_output_dir\u002F\n└── name_of_package_or_website\u002F\n    ├── llm-full.txt             # Complete documentation text (original content)\n    ├── llm-min.txt              # Compressed SKF\u002F1.4 LA structured summary\n    └── llm-min-guideline.md     # Essential format decoder for AI interpretation\n```\n\nFor example, running `llm-min -pkg \"requests\" -o my_llm_docs` produces:\n\n```text\nmy_llm_docs\u002F\n└── requests\u002F\n    ├── llm-full.txt             # Original documentation\n    ├── llm-min.txt              # Compressed SKF format (D, I, U sections)\n    └── llm-min-guideline.md     # Format decoding instructions\n```\n\n**Important:** The `llm-min-guideline.md` file is a critical companion to `llm-min.txt`. It provides the detailed schema definitions and format explanations that an AI needs to correctly interpret the structured data. When using `llm-min.txt` with an AI assistant, always include this guideline file as well.\n\n---\n\n## Choosing the Right AI Model (Why Gemini) 🧠\n\n`llm-min` utilizes Google's Gemini family of AI models for document processing. While you can select a specific Gemini model via the `--gemini-model` option, we strongly recommend using the default: `gemini-2.5-flash-lite-preview-06-17`.\n\nThis particular model offers an optimal combination of capabilities for documentation compression:\n\n1.  **Advanced Reasoning:** Excels at understanding complex technical documentation and extracting the essential structural relationships needed for the SKF format.\n\n2.  **Exceptional Context Window:** With a 1-million token input capacity, it can process large documentation chunks at once, enabling more coherent and comprehensive analysis.\n\n3.  **Cost Efficiency:** Provides an excellent balance of capability and affordability compared to other large-context models.\n\nThe default model has been carefully selected to deliver the best results for the `llm-min` compression process across a wide range of documentation styles and technical domains.\n\n---\n\n## How it Works: A Look Inside (src\u002Fllm_min) ⚙️\n\nThe `llm-min` tool employs a sophisticated multi-stage process to transform verbose documentation into a compact, machine-optimized SKF manifest:\n\n1.  **Input Processing:** Based on your command-line options, `llm-min` gathers documentation from the appropriate source:\n    - **Package (`--package \"requests\"`)**: Automatically discovers and crawls the package's documentation website\n    - **URL (`--doc-url \"https:\u002F\u002F...\"`)**: Directly crawls the specified documentation website  \n    - **Local Folder (`--input-folder \".\u002Fdocs\"`)**: Recursively scans for `.md`, `.txt`, and `.rst` files and combines their content\n\n2.  **Text Preparation:** The collected documentation is cleaned and segmented into manageable chunks for processing. The original text is preserved as `llm-full.txt`.\n\n3.  **Three-Step AI Analysis Pipeline (Gemini):** This is the heart of the SKF manifest generation, orchestrated by the `compact_content_to_structured_text` function in `compacter.py`:\n\n    *   **Step 1: Global Glossary Generation (Internal Only):**\n        *   Each document chunk is analyzed using the `SKF_PROMPT_CALL1_GLOSSARY_TEMPLATE` prompt to identify key technical entities and generate a *chunk-local* glossary fragment with temporary `Gxxx` IDs.\n        *   These fragments are consolidated via the `SKF_PROMPT_CALL1_5_MERGE_GLOSSARY_TEMPLATE` prompt, which resolves duplicates and creates a unified entity list.\n        *   The `re_id_glossary_items` function then assigns globally sequential `Gxxx` IDs (G001, G002, etc.) to these consolidated entities.\n        *   This global glossary is maintained in memory throughout the process but is **not included in the final `llm-min.txt` output** to conserve space.\n\n    *   **Step 2: Definitions & Interactions (D & I) Generation:**\n        *   For the first document chunk (or if there's only one chunk), the AI uses the `SKF_PROMPT_CALL2_DETAILS_SINGLE_CHUNK_TEMPLATE` with the global glossary to generate initial D and I items.\n        *   For subsequent chunks, the `SKF_PROMPT_CALL2_DETAILS_ITERATIVE_TEMPLATE` is used, providing both the global glossary and previously generated D&I items as context to avoid duplication.\n        *   As each chunk is processed, newly identified D and I items are accumulated and assigned sequential global IDs (D001, D002, etc. and I001, I002, etc.).\n\n    *   **Step 3: Usage Patterns (U) Generation:**\n        *   Similar to Step 2, the first chunk uses `SKF_PROMPT_CALL3_USAGE_SINGLE_CHUNK_TEMPLATE`, receiving the global glossary, all accumulated D&I items, and the current chunk text.\n        *   Subsequent chunks use `SKF_PROMPT_CALL3_USAGE_ITERATIVE_TEMPLATE`, which additionally receives previously generated U-items to enable pattern continuation and avoid duplication.\n        *   Usage patterns are identified with descriptive names (e.g., `U_BasicNetworkFetch`) and contain numbered steps (e.g., `U_BasicNetworkFetch.1`, `U_BasicNetworkFetch.2`).\n\n4.  **Final Assembly:** The complete `llm-min.txt` file is created by combining:\n    *   The SKF manifest header (protocol version, source docs, timestamp, primary namespace)\n    *   The accumulated `DEFINITIONS` section\n    *   The accumulated `INTERACTIONS` section\n    *   The accumulated `USAGE_PATTERNS` section\n    *   A final `# END_OF_MANIFEST` marker\n\n**Conceptual Pipeline Overview:**\n\n```\nUser Input      →  Doc Gathering   →  Text Processing   →  AI Step 1: Glossary   →  In-Memory Global    →  AI Step 2: D&I     →  Accumulated D&I\n(CLI\u002FPython)       (Package\u002FURL)      (Chunking)           (Extract + Merge)        Glossary (Gxxx)        (Per chunk)          (Dxxx, Ixxx)\n                                                                                                                                     ↓\n           ┌─────────────────────────────────────────────────────────────────────────────────────────────────┐                      ↓\n           ↓                                                                                                 ↑                      ↓\nFinal SKF Manifest   ←   Assembly   ←   Accumulated Usage   ←   AI Step 3: Usage   ←   Global Glossary + Accumulated D&I\n(llm-min.txt)            (D,I,U)        Patterns (U_Name.N)      (Per chunk)           (Required context for generating valid U-items)\n```\n\nThis multi-stage approach ensures that the SKF manifest is comprehensive, avoids duplication across chunks, and maintains consistent cross-references between entities, definitions, interactions, and usage patterns.\n\n---\n\n## What's Next? Future Plans 🔮\n\nWe're exploring several exciting directions to evolve `llm-min`:\n\n*   **Public Repository for Pre-Generated Files** 🌐\n    A central hub where the community could share and discover `llm-min.txt` files for popular libraries would be valuable. This would eliminate the need for individual users to generate these files repeatedly and ensure consistent, high-quality information. Key challenges include quality control, version management, and hosting infrastructure costs.\n\n*   **Code-Based Documentation Inference** 💻\n    An intriguing possibility is using source code analysis (via Abstract Syntax Trees) to automatically generate or augment documentation summaries. While initial experiments have shown this to be technically challenging, particularly for complex libraries with dynamic behaviors, it remains a promising research direction that could enable even more accurate documentation.\n\n*   **Model Control Protocol Integration** 🤔\n    While technically feasible, implementing `llm-min` as an MCP server doesn't fully align with our current design philosophy. The strength of `llm-min.txt` lies in providing reliable, static context – a deterministic reference that reduces the uncertainty sometimes associated with dynamic AI integrations. We're monitoring user needs to determine if a server-based approach might deliver value in the future.\n\nWe welcome community input on these potential directions!\n\n---\n\n## Common Questions (FAQ) ❓\n\n**Q: Do I need a reasoning-capable model to generate an `llm-min.txt` file?** 🧠\n\nA: Yes, generating an `llm-min.txt` file requires a model with strong reasoning capabilities like Gemini. The process involves complex information extraction, entity relationship mapping, and structured knowledge representation. However, once generated, the `llm-min.txt` file can be effectively used by any competent coding model (e.g., Claude 3.5 Sonnet) to answer library-specific questions.\n\n**Q: Does `llm-min.txt` preserve all information from the original documentation?** 📚\n\nA: No, `llm-min.txt` is explicitly designed as a lossy compression format. It prioritizes programmatically relevant details (classes, methods, parameters, return types, core usage patterns) while deliberately omitting explanatory prose, conceptual discussions, and peripheral information. This selective preservation is what enables the dramatic token reduction while maintaining the essential technical reference information an AI assistant needs.\n\n**Q: Why does generating an `llm-min.txt` file take time?** ⏱️\n\nA: Creating an `llm-min.txt` file involves a sophisticated multi-stage AI pipeline:\n1. Gathering and preprocessing documentation\n2. Analyzing each chunk to identify entities (glossary generation)\n3. Consolidating entities across chunks\n4. Extracting detailed definitions and interactions from each chunk\n5. Generating representative usage patterns\n\nThis intensive process can take several minutes, particularly for large libraries. However, once created, the resulting `llm-min.txt` file can be reused indefinitely, providing much faster reference information for AI assistants.\n\n**Q: I received a \"Gemini generation stopped due to MAX_TOKENS limit\" error. What should I do?** 🛑\n\nA: This error indicates that the Gemini model reached its output limit while processing a particularly dense or complex documentation chunk. Try reducing the `--chunk-size` option (e.g., from 600,000 to 300,000 characters) to give the model smaller batches to process. While this might slightly increase API costs due to more separate calls, it often resolves token limit errors.\n\n**Q: What's the typical cost for generating one `llm-min.txt` file?** 💰\n\nA: Processing costs vary based on documentation size and complexity, but for a moderate-sized library, expect to spend between **$0.01 and $1.00 USD** in Gemini API charges. Key factors affecting cost include:\n- Total documentation size\n- Number of chunks processed\n- Complexity of the library's structure\n- Selected Gemini model\n\nFor current pricing details, refer to the [Google Cloud AI pricing page](https:\u002F\u002Fcloud.google.com\u002Fvertex-ai\u002Fpricing#gemini).\n\n**Q: Can I process local documentation files without internet access?** 📁\n\nA: Yes! The `--input-folder` option is perfect for offline processing. When using this option, `llm-min` will:\n- Skip web crawling entirely (no internet required for content gathering)\n- Only need internet access for the Gemini API calls during compression\n- Support `.md`, `.txt`, and `.rst` files recursively in any directory structure\n- Work with internal\u002Fproprietary documentation that isn't publicly available online\n\nThis makes it ideal for processing private documentation, local development docs, or when working with limited internet connectivity.\n\n**Q: Did you vibe code this project** 🤖\n\nA: Yes, definitely. This project was developed using [Roocode](https:\u002F\u002Froocode.com\u002F) with a custom configuration called [Rooroo](https:\u002F\u002Fgithub.com\u002Fmarv1nnnnn\u002Frooroo).\n\n---\n\n## Want to Help? Contributing 🤝\n\nWe welcome contributions to make `llm-min` even better! 🎉 \n\nWhether you're reporting bugs, suggesting features, or submitting code changes via pull requests, your involvement helps improve this tool for everyone. Check our GitHub repository for contribution guidelines and open issues.\n\n---\n\n## License 📜\n\nThis project is licensed under the MIT License. See the `LICENSE` file for complete details.\n","# llm-min.txt: Min.js Style Compression of Tech Docs for LLM Context 🤖\n\n[![License: MIT](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FLicense-MIT-yellow.svg)](https:\u002F\u002Fopensource.org\u002Flicenses\u002FMIT)\n[![Python Version](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FPython-3.10%2B-blue)](https:\u002F\u002Fwww.python.org\u002Fdownloads\u002F)\n[![Gemini API](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FGemini-API-green)](https:\u002F\u002Fconsole.cloud.google.com\u002Fapis\u002Fapi\u002Fgemini.googleapis.com\u002Foverview?project=llm-min)\n\n## 📜 Table of Contents\n\n- [llm-min.txt: Min.js Style Compression of Tech Docs for LLM Context 🤖](#llm-mintxt-minjs-style-compression-of-tech-docs-for-llm-context-)\n  - [📜 Table of Contents](#-table-of-contents)\n  - [What is `llm-min.txt` and Why is it Important?](#what-is-llm-mintxt-and-why-is-it-important)\n  - [Understanding `llm-min.txt`: A Machine-Optimized Format 🧩](#understanding-llm-mintxt-a-machine-optimized-format-)\n  - [Does it Really Work? Visualizing the Impact](#does-it-really-work-visualizing-the-impact)\n  - [Quick Start 🚀](#quick-start-)\n  - [Output Directory Structure 📂](#output-directory-structure-)\n  - [Choosing the Right AI Model (Why Gemini) 🧠](#choosing-the-right-ai-model-why-gemini-)\n  - [How it Works: A Look Inside (src\u002Fllm\\_min) ⚙️](#how-it-works-a-look-inside-srcllm_min-️)\n  - [What's Next? Future Plans 🔮](#whats-next-future-plans-)\n  - [Common Questions (FAQ) ❓](#common-questions-faq-)\n  - [Want to Help? Contributing 🤝](#want-to-help-contributing-)\n  - [License 📜](#license-)\n\n---\n\n## What is `llm-min.txt` and Why is it Important?\n\nIf you've ever used an AI coding assistant (like GitHub Copilot, Cursor, or others powered by Large Language Models - LLMs), you've likely encountered situations where they don't know about the latest updates to programming libraries. This knowledge gap exists because AI models have a \"knowledge cutoff\" – a point beyond which they haven't learned new information. Since software evolves rapidly, this limitation can lead to outdated recommendations and broken code.\n\nSeveral innovative approaches have emerged to address this challenge:\n- \u003Ca href=\"https:\u002F\u002Fllmstxt.org\u002F\">\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmarv1nnnnn_llm-min.txt_readme_3796aa9e32be.png\" alt=\"llms.txt logo\" width=\"60\" style=\"vertical-align:middle; margin-right:8px;\"\u002F>\u003C\u002Fa> [llms.txt](https:\u002F\u002Fllmstxt.org\u002F)\n  A community-driven initiative where contributors create reference files (`llms.txt`) containing up-to-date library information specifically formatted for AI consumption.\n\n- \u003Ca href=\"https:\u002F\u002Fcontext7.com\u002F\">\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmarv1nnnnn_llm-min.txt_readme_4263a573bd30.png\" alt=\"Context7 logo\" width=\"60\" style=\"vertical-align:middle; margin-left:4px;\"\u002F>\u003C\u002Fa> [Context7](https:\u002F\u002Fcontext7.com\u002F)\n  A service that dynamically provides contextual information to AIs, often by intelligently summarizing documentation.\n\nWhile these solutions are valuable, they face certain limitations:\n- `llms.txt` files can become extraordinarily large – some exceeding **800,000** tokens (word fragments). This size can overwhelm many AI systems' context windows.\n  \n    \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmarv1nnnnn_llm-min.txt_readme_ccf4f5923f08.png\" alt=\"Token comparison for llms.txt\" width=\"500\"\u002F>\n    \n    Many shorter `llms.txt` variants simply contain links to official documentation, requiring the AI to fetch and process those documents separately. Even the comprehensive versions (`llms-full.txt`) often exceed what most AI assistants can process at once. Additionally, these files may not always reflect the absolute latest documentation.\n\n- `Context7` operates somewhat as a \"black box\" – while useful, its precise information selection methodology isn't fully transparent to users. It primarily works with GitHub code repositories or existing `llms.txt` files, rather than any arbitrary software package.\n\n**`llm-min.txt` offers a fresh approach:**\n\n\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmarv1nnnnn_llm-min.txt_readme_c1bd7804d6e7.png\" alt=\"llm-min.txt icon\" width=\"300\"\u002F>\n\nInspired by `min.js` files in web development (JavaScript with unnecessary elements removed), `llm-min.txt` adopts a similar philosophy for technical documentation. Instead of feeding an AI a massive, verbose manual, we leverage another AI to distill that documentation into a super-condensed, highly structured summary. The resulting `llm-min.txt` file captures only the most essential information needed to understand a library's usage, packaged in a format optimized for AI assistants rather than human readers.\n\nModern AI reasoning capabilities excel at this distillation process, creating remarkably efficient knowledge representations that deliver maximum value with minimal token consumption.\n\n---\n## Understanding `llm-min.txt`: A Machine-Optimized Format 🧩\n\nThe `llm-min.txt` file utilizes the **Structured Knowledge Format (SKF)** – a compact, machine-optimized format designed for efficient AI parsing rather than human readability. This format organizes technical information into distinct, highly structured sections with precise relationships.\n\n**Key Elements of the SKF Format:**\n\n1.  **Header Metadata:** Every file begins with essential contextual information:\n    *   `# IntegratedKnowledgeManifest_SKF`: Format identifier and version\n    *   `# SourceDocs: [...]`: Original documentation sources\n    *   `# GenerationTimestamp: ...`: Creation timestamp\n    *   `# PrimaryNamespace: ...`: Top-level package\u002Fnamespace, critical for understanding import paths\n\n2.  **Three Core Structured Sections:** The content is organized into distinct functional categories:\n    *   `# SECTION: DEFINITIONS (Prefix: D)`: Describes the static aspects of the library:\n        *   Canonical component definitions with unique global IDs (e.g., `D001:G001_MyClass`)\n        *   Namespace paths relative to `PrimaryNamespace`\n        *   Method signatures with parameters and return types\n        *   Properties\u002Ffields with types and access modifiers\n        *   Static relationships like inheritance or interface implementation\n        *   **Important:** This section effectively serves as the glossary for the file, as the traditional glossary (`G` section) is used during generation but deliberately omitted from the final output to save space.\n\n    *   `# SECTION: INTERACTIONS (Prefix: I)`: Captures dynamic behaviors within the library:\n        *   Method invocations (`INVOKES`)\n        *   Component usage patterns (`USES_COMPONENT`)\n        *   Event production\u002Fconsumption\n        *   Error raising and handling logic, with references to specific error types\n\n    *   `# SECTION: USAGE_PATTERNS (Prefix: U)`: Provides concrete usage examples:\n        *   Common workflows for core functionality\n        *   Step-by-step sequences involving object creation, configuration, method invocation, and error handling\n        *   Each pattern has a descriptive name (e.g., `U_BasicCrawl`) with numbered steps (`U_BasicCrawl.1`, `U_BasicCrawl.2`)\n\n3.  **Line-Based Structure:** Each item appears on its own line following precise formatting conventions that enable reliable machine parsing.\n\n**Example SKF Format (Simplified):**\n\n```text\n\n# 集成知识清单_SKF\u002F1.4 LA\n# 源文档: [example-lib-docs]\n# 生成时间戳: 2024-05-28T12:00:00Z\n# 主命名空间: example_lib\n\n# 第一部分：定义（前缀：D）\n# 主要定义格式: Dxxx:Gxxx_Entity [DEF_TYP] [NAMESPACE \"relative.path\"] [OPERATIONS {op1:RetT(p1N:p1T)}] [ATTRIBUTES {attr1:AttrT1}] (\"注释\")\n# ---\nD001:G001_Greeter [组件定义] [NAMESPACE \".\"] [OPERATIONS {greet:Str(name:Str)}] (\"一个简单的问候类\")\nD002:G002_AppConfig [组件定义] [NAMESPACE \"config\"] [ATTRIBUTES {debug_mode:Bool(\"只读\")}] (\"应用程序配置\")\n# ---\n\n# 第二部分：交互（前缀：I）\n# 格式: Ixxx:源引用 INT_VERB 目标引用或字面量 (\"备注_条件_错误(Gxxx_ErrorType)\")\n# ---\nI001:G001_Greeter.greet 调用 G003_Logger.log (\"记录问候活动\")\n# ---\n\n# 第三部分：使用模式（前缀：U）\n# 格式: U_Name:模式标题关键词\n#         U_Name.N:[参与者或引用] 动作关键词 (涉及目标或数据的引用) -> [结果或状态变化涉及的引用]\n# ---\nU_BasicGreeting:基本用户问候\nU_BasicGreeting.1:[用户] 创建 (G001_Greeter) -> [greeter_instance]\nU_BasicGreeting.2:[greeter_instance] 调用 (greet name='Alice') -> [greeting_message]\n# ---\n# 清单结束\n```\n\n`llm-min-guideline.md` 文件（与 `llm-min.txt` 同时生成）提供了详细的解码说明和模式定义，使 AI 能够正确解析 SKF 格式。它作为关键的配套文档，解释了文件中使用的符号、字段含义以及关系类型。\n\n---\n\n## 真的有效吗？可视化其效果\n\n`llm-min.txt` 在大幅减少 token 数量的同时，保留了 AI 助手所需的核心知识。下图比较了原始库文档（`llm-full.txt`）与压缩后的 `llm-min.txt` 版本之间的 token 数量：\n\n![Token 压缩对比图](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmarv1nnnnn_llm-min.txt_readme_d802e5ac4ac1.png)\n\n结果显示，token 减少幅度通常在 90% 至 95% 之间，某些情况下甚至超过 97%。这种极致的压缩结合高度结构化的 SKF 格式，使得 AI 工具能够比处理原始文本更高效地摄入和处理库文档。\n\n在我们的示例目录中，您可以亲自查看这些令人印象深刻的成果：\n*   `sample\u002Fcrawl4ai\u002Fllm-full.txt`: 原始文档（未压缩）\n*   `sample\u002Fcrawl4ai\u002Fllm-min.txt`: 压缩后的 SKF 表示\n*   `sample\u002Fcrawl4ai\u002Fllm-min-guideline.md`: 格式解码器配套文件，也可参见 [llm-min-guideline.md](src\u002Fllm_min\u002Fassets\u002Fllm_min_guideline.md)\n\n大多数压缩后的文件仅包含约 10,000 个 token，完全在现代 AI 助手的处理能力范围内。\n\n**如何使用？**\n\n只需在您的 AI 驱动 IDE 的对话中引用这些文件，您的助手便会立即获得关于该库的详细知识：\n\n![演示](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmarv1nnnnn_llm-min.txt_readme_17d4db9331f8.gif)\n\n**性能如何？**\n\n进行基准测试是必要的，但非常困难。LLM 代码生成具有随机性，生成代码的质量取决于诸多因素。例如，crawl4ai、google-genai 和 svelte 这些软件包目前仍难以被 LLM 正确生成代码。而使用 `llm-min` 将显著提高代码生成的成功率。\n\n---\n\n## 快速入门 🚀\n\n开始使用 `llm-min` 非常简单：\n\n**1. 安装：**\n\n*   **对于普通用户（推荐）：**\n    ```bash\n    pip install llm-min\n\n    # 安装所需的浏览器自动化工具\n    playwright install\n    ```\n\n*   **对于贡献者和开发者：**\n    ```bash\n    # 克隆仓库（如果尚未完成）\n    # git clone https:\u002F\u002Fgithub.com\u002Fyour-repo\u002Fllm-min.git\n    # cd llm-min\n\n    # 创建并激活虚拟环境\n    python -m venv .venv\n    source .venv\u002Fbin\u002Factivate  # 在 Windows 上：.venv\\Scripts\\activate\n\n    # 使用 UV 安装依赖项（比 pip 更快）\n    uv sync\n    uv pip install -e .\n\n    # 可选：设置 pre-commit 钩子以保证代码质量\n    # uv pip install pre-commit\n    # pre-commit install\n    ```\n\n**2. 设置你的 Gemini API 密钥：🔑**\n\n`llm-min` 使用 Google 的 Gemini AI 来生成压缩后的文档。你需要一个 Gemini API 密钥才能继续：\n\n*   **最佳实践：** 将你的密钥值设置为名为 `GEMINI_API_KEY` 的环境变量：\n    ```bash\n    # Linux\u002FmacOS\n    export GEMINI_API_KEY=your_api_key_here\n    \n    # Windows（命令提示符）\n    set GEMINI_API_KEY=your_api_key_here\n    \n    # Windows（PowerShell）\n    $env:GEMINI_API_KEY=\"your_api_key_here\"\n    ```\n\n*   **替代方案：** 直接通过 `--gemini-api-key` 命令行选项提供你的密钥。\n\n你可以在 [Google AI Studio](https:\u002F\u002Faistudio.google.com\u002Fapp\u002Fapikey) 或 Google Cloud 控制台获取 Gemini API 密钥。\n\n**3. 生成你的第一个 `llm-min.txt` 文件：💻**\n\n从以下输入源中选择一个：\n\n| 输入源选项 | 短选项 | 类型      | 功能                                                                 |\n|-----------|--------|-----------|----------------------------------------------------------------------|\n| `--input-folder`    | `-i`  | `DIRECTORY` | **📁 处理本地文档文件。** 递归扫描指定目录中的 `.md`、`.txt` 和 `.rst` 文件。使用此选项时会跳过网页爬取。 |\n| `--package`         | `-pkg`| `TEXT`    | **📦 处理 Python 包。** 自动查找并爬取该包的文档网站。 |\n| `--doc-url`         | `-u`  | `TEXT`    | **🌐 处理文档网站。** 直接爬取指定的 URL。 |\n\n| 配置选项 | 短选项 | 类型      | 功能                                                                 |\n|---------|--------|-----------|----------------------------------------------------------------------|\n| `--output-dir`      | `-o`  | `DIRECTORY` | 保存生成文件的目录（默认：`llm_min_docs`）。                        |\n| `--output-name`     | `-n`  | `TEXT`    | 为 `output-dir` 内的子文件夹指定自定义名称。                        |\n| `--library-version` | `-V`  | `TEXT`    | 指定库版本（在使用 `--input-folder` 或 `--doc-url` 时有用）。        |\n| `--max-crawl-pages` | `-p`  | `INTEGER` | 最多读取的网页数量（默认：200；0 表示无限制）。仅适用于网页爬取。    |\n| `--max-crawl-depth` | `-D`  | `INTEGER` | 在网站上跟随链接的最大深度（默认：3）。仅适用于网页爬取。            |\n| `--chunk-size`      | `-c`  | `INTEGER` | 每次传递给 AI 的文本量（默认：0，启用自适应分块）。如果为 0，`llm-min` 会自动确定最佳大小。 |\n| `--gemini-api-key`  | `-k`  | `TEXT`    | 你的 Gemini API 密钥（如果未设置为环境变量）。                       |\n| `--gemini-model`    | `-m`  | `TEXT`    | 要使用的 Gemini 模型（默认：`gemini-2.5-flash-lite-preview-06-17`）。       |\n| `--force-reprocess` |       |           | 即使存在 `llm-full.txt` 也强制重新处理，并忽略中间文件。             |\n| `--save-fragments`  |       | `BOOLEAN` | 保存中间片段以便调试和重试（默认：True）。                           |\n| `--verbose`         | `-v`  |           | 在运行时显示更详细的日志信息。                                      |\n\n**示例命令：**\n\n```bash\n# 📦 处理 \"typer\" Python 包，保存到 \"my_docs\" 文件夹\nllm-min -pkg \"typer\" -o my_docs -p 50\n\n# 🌐 处理 FastAPI 文档网站\nllm-min -u \"https:\u002F\u002Ffastapi.tiangolo.com\u002F\" -o my_docs -p 50\n\n# 📁 处理本地文件夹中的文档文件\nllm-min -i \".\u002Fdocs\" -o my_docs\n\n# 📁 处理本地文件，自定义输出名称和版本\nllm-min -i \".\u002Fmy-project-docs\" -o my_docs -n \"my-project\" -V \"1.2.3\"\n\n# 📁 处理项目的整个文档目录结构\nllm-min -i \"\u002Fpath\u002Fto\u002Fproject\u002Fdocumentation\" -o project_docs --verbose\n```\n\n**本地文件夹处理详情：** 📁\n\n当使用 `--input-folder` 时，`llm-min` 会：\n- 递归扫描指定目录中的文档文件\n- 处理扩展名为 `.md`（Markdown）、`.txt`（纯文本）、`.rst`（reStructuredText）的文件\n- 将所有找到的文件合并成一个内容流\n- 完全跳过网页爬取（从而更快且无需互联网连接）\n- 将原始合并内容保存为 `llm-full.txt`，并生成压缩后的 `llm-min.txt`\n\n这特别适用于：\n- **内部\u002F专有文档**，这些文档无法在线获取\n- **本地项目文档**，你正在开发的文档\n- **离线处理**，当互联网访问受限时\n- **自定义格式的文档**\n\n**4. 在 Python 中的程序化使用：** 🐍\n\n你也可以直接将 `llm-min` 集成到你的 Python 应用程序中：\n\n```python\nfrom llm_min import LLMMinGenerator\nimport os\n\n# 用于 AI 处理的配置\nllm_config = {\n    \"api_key\": os.environ.get(\"GEMINI_API_KEY\"),  # 使用环境变量\n    \"model_name\": \"gemini-2.5-flash-lite-preview-06-17\",  # 推荐模型\n    \"chunk_size\": 600000,  # 每次 AI 处理的字符数\n    \"max_crawl_pages\": 200,  # 最大爬取页面数（仅限网页爬取）\n    \"max_crawl_depth\": 3,  # 链接跟随深度（仅限网页爬取）\n}\n\n# 初始化生成器（输出文件将保存到 .\u002Fmy_output_docs\u002F[source_name]\u002F）\ngenerator = LLMMinGenerator(output_dir=\".\u002Fmy_output_docs\", llm_config=llm_config)\n\n# 📦 为 Python 包生成 llm-min.txt\ntry:\n    generator.generate_from_package(\"requests\")\n    print(\"✅ 成功创建了 'requests' 的文档！\")\nexcept Exception as e:\n    print(f\"❌ 处理 'requests' 时出错：{e}\")\n\n# 🌐 从文档 URL 生成 llm-min.txt\ntry:\n    generator.generate_from_url(\"https:\u002F\u002Ffastapi.tiangolo.com\u002F\")\n    print(\"✅ 成功处理了 FastAPI 文档！\")\nexcept Exception as e:\n    print(f\"❌ 处理 URL 时出错：{e}\")\n\n# 📁 从本地文档文件生成 llm-min.txt\ntry:\n    # 读取并合并本地文件夹中的所有文档文件\n    import pathlib\n    docs_folder = pathlib.Path(\".\u002Fmy-project-docs\")\n    \n    # 收集支持的文件类型内容\n    content = \"\"\n    for ext in [\".md\", \".txt\", \".rst\"]:\n        for file_path in docs_folder.rglob(f\"*{ext}\"):\n            with open(file_path, encoding=\"utf-8\") as f:\n                content += f\"# 文件: {file_path.name}\\n\\n\"\n                content += f.read() + \"\\n\\n---\\n\\n\"\n    \n    # 处理合并后的内容\n    generator.generate_from_text(\n        input_content=content, \n        source_name=\"my-project\",\n        library_version=\"1.0.0\"  # 可选\n    )\n    print(\"✅ 成功处理了本地文档！\")\nexcept Exception as e:\n    print(f\"❌ 处理本地文件时出错：{e}\")\n```\n\n如需查看完整的命令行选项列表，请运行：\n```bash\nllm-min --help\n```\n\n---\n\n## 输出目录结构 📂\n\n当 `llm-min` 完成处理后，会创建如下整齐的目录结构：\n\n```text\nyour_chosen_output_dir\u002F\n└── name_of_package_or_website\u002F\n    ├── llm-full.txt             # 完整文档文本（原始内容）\n    ├── llm-min.txt              # 压缩后的 SKF\u002F1.4 LA 结构化摘要\n    └── llm-min-guideline.md     # 用于 AI 解读的重要格式说明文档\n```\n\n例如，运行 `llm-min -pkg \"requests\" -o my_llm_docs` 后，将生成：\n\n```text\nmy_llm_docs\u002F\n└── requests\u002F\n    ├── llm-full.txt             # 原始文档\n    ├── llm-min.txt              # 压缩后的 SKF 格式（D、I、U 部分）\n    └── llm-min-guideline.md     # 格式解码说明\n```\n\n**重要提示：** `llm-min-guideline.md` 文件是 `llm-min.txt` 的关键配套文件。它提供了详细的模式定义和格式说明，AI 需要这些信息才能正确解读结构化数据。在使用 `llm-min.txt` 与 AI 助手交互时，务必同时提供此指南文件。\n\n---\n\n## 选择合适的 AI 模型（为何推荐 Gemini） 🧠\n\n`llm-min` 使用 Google 的 Gemini 系列 AI 模型进行文档处理。虽然您可以通过 `--gemini-model` 选项指定特定的 Gemini 模型，但我们强烈建议使用默认设置：`gemini-2.5-flash-lite-preview-06-17`。\n\n该模型在文档压缩方面具备以下优势，能够实现最佳效果：\n\n1.  **高级推理能力：** 在理解复杂的技术文档以及提取 SKF 格式所需的关键结构关系方面表现出色。\n\n2.  **超大上下文窗口：** 具备 100 万标记的输入容量，可一次性处理大量文档内容，从而实现更连贯、更全面的分析。\n\n3.  **成本效益高：** 相较于其他大上下文模型，其性能与价格比更为理想。\n\n默认模型经过精心挑选，能够在广泛的文档风格和技术领域中为 `llm-min` 的压缩流程带来最佳结果。\n\n---\n\n## 工作原理：内部解析（src\u002Fllm_min）⚙️\n\n`llm-min` 工具采用一套复杂的多阶段流程，将冗长的文档转换为紧凑且针对机器优化的 SKF 清单：\n\n1.  **输入处理：** 根据您的命令行选项，`llm-min` 会从相应来源收集文档：\n    - **包（`--package \"requests\"`）：** 自动发现并抓取该包的文档网站\n    - **URL（`--doc-url \"https:\u002F\u002F...\"`）：** 直接抓取指定的文档网站  \n    - **本地文件夹（`--input-folder \".\u002Fdocs\"`）：** 递归扫描 `.md`、`.txt` 和 `.rst` 文件，并合并其内容\n\n2.  **文本准备：** 收集到的文档会被清理并分割成易于处理的小块。原始文本会以 `llm-full.txt` 的形式保留。\n\n3.  **三步 AI 分析流水线（Gemini）：** 这是生成 SKF 清单的核心部分，由 `compacter.py` 中的 `compact_content_to_structured_text` 函数协调完成：\n\n    *   **步骤 1：全局术语表生成（仅内部使用）：**\n        *   每个文档块都会使用 `SKF_PROMPT_CALL1_GLOSSARY_TEMPLATE` 提示词进行分析，以识别关键的技术实体，并生成带有临时 `Gxxx` ID 的“块内”术语表片段。\n        *   这些片段会通过 `SKF_PROMPT_CALL1_5_MERGE_GLOSSARY_TEMPLATE` 提示词合并，解决重复项并创建统一的实体列表。\n        *   随后，`re_id_glossary_items` 函数会为这些合并后的实体分配全局连续的 `Gxxx` ID（G001、G002 等）。  \n        *   该全局术语表在整个过程中会保留在内存中，但**不会包含在最终的 `llm-min.txt` 输出中**，以节省空间。\n\n    *   **步骤 2：定义与交互（D & I）生成：**\n        *   对于第一个文档块（或当只有一个块时），AI 会结合全局术语表，使用 `SKF_PROMPT_CALL2_DETAILS_SINGLE_CHUNK_TEMPLATE` 生成初始的 D 和 I 条目。\n        *   对于后续块，则使用 `SKF_PROMPT_CALL2_DETAILS_ITERATIVE_TEMPLATE`，同时提供全局术语表和先前生成的 D&I 条目作为上下文，以避免重复。\n        *   随着每个块的处理，新识别的 D 和 I 条目会被累积起来，并被赋予全局连续的 ID（D001、D002 等以及 I001、I002 等）。\n\n    *   **步骤 3：使用模式（U）生成：**\n        *   类似于步骤 2，第一个块使用 `SKF_PROMPT_CALL3_USAGE_SINGLE_CHUNK_TEMPLATE`，接收全局术语表、所有已累积的 D&I 条目以及当前块的文本。\n        *   后续块则使用 `SKF_PROMPT_CALL3_USAGE_ITERATIVE_TEMPLATE`，额外接收之前生成的 U 条目，以便延续模式并避免重复。\n        *   使用模式会被赋予描述性名称（如 `U_BasicNetworkFetch`），并包含编号步骤（如 `U_BasicNetworkFetch.1`、`U_BasicNetworkFetch.2`）。\n\n4.  **最终组装：** 完整的 `llm-min.txt` 文件通过以下内容组合而成：\n    *   SKF 清单头部（协议版本、源文档、时间戳、主命名空间）\n    *   累积的“DEFINITIONS”部分\n    *   累积的“INTERACTIONS”部分\n    *   累积的“USAGE_PATTERNS”部分\n    *   最终的 `# END_OF_MANIFEST` 标记\n\n**概念性流程概览：**\n\n```\n用户输入      →  文档收集   →  文本处理   →  AI 步骤 1：术语表   →  内存中的全局    →  AI 步骤 2：D&I     →  累积的 D&I\n(CLI\u002FPython)       (包\u002FURL)      (分块)           (提取 + 合并)        术语表（Gxxx）        （按块）          （Dxxx、Ixxx）\n                                                                                                                                     ↓\n           ┌─────────────────────────────────────────────────────────────────────────────────────────────────┐                      ↓\n           ↓                                                                                                 ↑                      ↓\n最终 SKF 清单   ←   组装   ←   累积的使用   ←   AI 步骤 3：使用   ←   全局术语表 + 累积的 D&I\n(llm-min.txt)            （D,I,U）        模式（U_Name.N）      （按块）              （生成有效 U 条目的必要上下文）\n```\n\n这种多阶段方法确保了 SKF 清单的全面性，避免了跨块内容的重复，并保持了实体、定义、交互和使用模式之间的一致性引用。\n\n---\n\n## 下一步？未来计划 🔮\n\n我们正在探索几个令人兴奋的方向来进一步发展 `llm-min`：\n\n*   **预生成文件的公共仓库** 🌐\n    如果能建立一个中央枢纽，供社区共享和发现常用库的 `llm-min.txt` 文件，这将非常有价值。这样可以省去用户反复生成这些文件的麻烦，并确保信息的一致性和高质量。主要挑战包括质量控制、版本管理和托管基础设施成本。\n\n*   **基于代码的文档推断** 💻\n    一种有趣的可能性是利用源代码分析（通过抽象语法树）自动生成或补充文档摘要。尽管初步实验表明这在技术上颇具挑战性，尤其是对于具有动态行为的复杂库而言，但这仍然是一个很有前景的研究方向，有望实现更精确的文档生成。\n\n*   **模型控制协议集成** 🤔\n    尽管从技术上可行，但将 `llm-min` 实现为 MCP 服务器并不完全符合我们当前的设计理念。`llm-min.txt` 的优势在于提供可靠、静态的上下文——这是一种确定性的参考，能够降低动态 AI 集成有时带来的不确定性。我们正在密切关注用户需求，以判断未来是否可以通过服务器端方式提供价值。\n\n我们欢迎社区对这些潜在方向提出宝贵意见！\n\n---\n\n## 常见问题 (FAQ) ❓\n\n**问：生成 `llm-min.txt` 文件是否需要具备推理能力的模型？** 🧠\n\n答：是的，生成 `llm-min.txt` 文件需要像 Gemini 这样具有强大推理能力的模型。这一过程涉及复杂的信息提取、实体关系映射以及结构化知识表示。不过，一旦生成，`llm-min.txt` 文件就可以被任何具备良好编码能力的模型（例如 Claude 3.5 Sonnet）有效使用，以回答与特定库相关的问题。\n\n**问：`llm-min.txt` 是否保留了原始文档中的所有信息？** 📚\n\n答：不是。`llm-min.txt` 明确设计为一种有损压缩格式。它优先保留与编程相关的细节（类、方法、参数、返回类型、核心使用模式），而有意省略说明性文字、概念讨论及次要信息。正是这种有选择性的信息保留，才使得令牌数量大幅减少，同时仍能维持 AI 助手所需的基本技术参考信息。\n\n**问：为什么生成 `llm-min.txt` 文件需要花费时间？** ⏱️\n\n答：创建 `llm-min.txt` 文件涉及一个复杂的多阶段 AI 流程：\n1. 收集并预处理文档；\n2. 分析每个代码块以识别实体（生成术语表）；\n3. 整合各代码块中的实体；\n4. 从每个代码块中提取详细定义和交互信息；\n5. 生成具有代表性的使用模式。\n\n这一密集型过程可能需要数分钟，尤其是对于大型库而言。然而，一旦生成，生成的 `llm-min.txt` 文件可以无限期重复使用，为 AI 助手提供更快捷的参考信息。\n\n**问：我收到了“Gemini 生成因 MAX_TOKENS 限制而停止”的错误提示，该怎么办？** 🛑\n\n答：此错误表明 Gemini 模型在处理特别密集或复杂的文档代码块时达到了其输出限制。您可以尝试减小 `--chunk-size` 参数值（例如从 600,000 字符降至 300,000 字符），以便让模型每次处理较小的批次。虽然这可能会因更多独立调用而导致 API 费用略有增加，但通常可以解决令牌限制错误。\n\n**问：生成一个 `llm-min.txt` 文件的典型费用是多少？** 💰\n\n答：处理成本因文档大小和复杂度而异，但对于中等规模的库，预计 Gemini API 的费用将在 **0.01 至 1.00 美元** 之间。影响成本的主要因素包括：\n- 文档总大小；\n- 处理的代码块数量；\n- 库结构的复杂程度；\n- 所选的 Gemini 模型。\n\n有关当前定价详情，请参阅 [Google Cloud AI 定价页面](https:\u002F\u002Fcloud.google.com\u002Fvertex-ai\u002Fpricing#gemini)。\n\n**问：我能否在没有互联网连接的情况下处理本地文档文件？** 📁\n\n答：可以！`--input-folder` 选项非常适合离线处理。使用此选项时，`llm-min` 将：\n- 完全跳过网络爬取（无需互联网即可收集内容）；\n- 仅在压缩过程中需要访问互联网以调用 Gemini API；\n- 递归支持任意目录结构中的 `.md`、`.txt` 和 `.rst` 文件；\n- 可用于处理内部或专有文档，这些文档并未公开发布在网上。\n\n这使其成为处理私有文档、本地开发文档，或在互联网连接受限情况下工作的理想选择。\n\n**问：你是用代码来完成这个项目的吗？** 🤖\n\n答：是的，当然。该项目是使用 [Roocode](https:\u002F\u002Froocode.com\u002F) 并结合名为 [Rooroo](https:\u002F\u002Fgithub.com\u002Fmarv1nnnnn\u002Frooroo) 的自定义配置开发的。\n\n---\n\n## 想帮忙吗？贡献力量 🤝\n\n我们欢迎各位为使 `llm-min` 更加完善而做出贡献！🎉\n\n无论您是报告 bug、提出功能建议，还是通过拉取请求提交代码更改，您的参与都将帮助改进这款工具，造福所有人。请查看我们的 GitHub 仓库，了解贡献指南和当前的开放问题。\n\n---\n\n## 许可证 📜\n\n本项目采用 MIT 许可证授权。完整详情请参阅 `LICENSE` 文件。","# llm-min.txt 快速上手指南\n\n`llm-min.txt` 是一个专为大语言模型（LLM）设计的文档压缩工具。它受 `min.js` 启发，利用 AI 将庞大的技术文档蒸馏为高度结构化、机器优化的 **SKF (Structured Knowledge Format)** 格式。相比原始文档，它能减少 90%-95% 的 Token 消耗，同时保留核心知识，显著提升 AI 编程助手对最新库的理解和代码生成准确率。\n\n## 环境准备\n\n在开始之前，请确保满足以下系统要求：\n\n*   **操作系统**: Linux, macOS 或 Windows\n*   **Python 版本**: Python 3.10 或更高版本\n*   **API 密钥**: 需要拥有 Google Gemini API Key（用于驱动文档压缩过程）\n    *   获取地址：[Google AI Studio](https:\u002F\u002Faistudio.google.com\u002Fapp\u002Fapikey) 或 Google Cloud Console\n*   **浏览器自动化依赖**: 需要安装 Playwright 以支持部分抓取功能\n\n## 安装步骤\n\n推荐普通用户直接使用 `pip` 安装，开发者可选择源码安装。\n\n### 方式一：常规安装（推荐）\n\n```bash\n# 安装主程序\npip install llm-min\n\n# 安装必要的浏览器自动化驱动\nplaywright install\n```\n\n> **国内加速提示**：如果 `pip` 下载缓慢，建议使用国内镜像源：\n> ```bash\n> pip install llm-min -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n> ```\n\n### 方式二：开发者安装（源码模式）\n\n如果你需要贡献代码或修改内部逻辑：\n\n```bash\n# 克隆仓库（假设已 git clone）\ncd llm-min\n\n# 创建并激活虚拟环境\npython -m venv .venv\nsource .venv\u002Fbin\u002Factivate  # Windows 用户请使用: .venv\\Scripts\\activate\n\n# 使用 UV 工具同步依赖（比 pip 更快）\nuv sync\nuv pip install -e .\n\n# 可选：安装预提交钩子\n# uv pip install pre-commit\n# pre-commit install\n```\n\n## 基本使用\n\n### 1. 配置 API 密钥\n\n`llm-min` 依赖 Google Gemini AI 进行文档蒸馏。你可以通过环境变量或命令行参数提供密钥。\n\n**推荐方式：设置环境变量**\n\n*   **Linux \u002F macOS**:\n    ```bash\n    export GEMINI_API_KEY=your_api_key_here\n    ```\n*   **Windows (CMD)**:\n    ```cmd\n    set GEMINI_API_KEY=your_api_key_here\n    ```\n*   **Windows (PowerShell)**:\n    ```powershell\n    $env:GEMINI_API_KEY=\"your_api_key_here\"\n    ```\n\n**替代方式：命令行直接传入**\n在执行命令时添加 `--gemini-api-key your_api_key_here`。\n\n### 2. 生成压缩文档\n\n配置好密钥后，即可运行工具生成 `llm-min.txt` 文件。工具支持多种输入源（如 URL、本地路径等）。\n\n**最简示例：**\n\n假设你要为某个库的文档网址生成压缩文件：\n\n```bash\nllm-min --url \"https:\u002F\u002Fexample-lib-docs.com\" --output .\u002Foutput_dir\n```\n\n执行成功后，输出目录中将包含：\n*   `llm-min.txt`: 高度压缩的 SKF 格式知识库（通常仅约 10,000 tokens）。\n*   `llm-min-guideline.md`: 格式解码指南，辅助 AI 理解 SKF 结构。\n\n### 3. 在 AI 助手中使用\n\n将生成的 `llm-min.txt` 和 `llm-min-guideline.md` 文件作为上下文提供给你的 AI 编程助手（如 Cursor, GitHub Copilot, VS Code + LLM 插件等）。\n\n*   **操作方法**：在对话中引用这两个文件，或直接将其放入项目的 `.cursorrules` 或类似配置目录中。\n*   **效果**：AI 将立即掌握该库的最新用法、类定义和交互模式，从而生成更准确的代码，即使该库超出了 AI 模型的原始训练截止日期。","某后端团队正基于最新版的 FastAPI 框架开发高并发微服务，急需让 AI 编程助手生成符合最新语法的异步中间件代码。\n\n### 没有 llm-min.txt 时\n- **知识滞后导致代码报错**：AI 模型因训练数据截止较早，生成的代码仍使用已废弃的同步写法，导致项目启动失败。\n- **上下文窗口被撑爆**：试图通过粘贴完整的官方文档或巨大的 `llms-full.txt` 文件来补充知识，却因内容超过 80 万 token 直接超出 AI 的处理上限。\n- **信息检索效率低下**：文档中充斥大量人类可读但机器无关的排版、示例和冗余描述，AI 难以在海量文本中精准定位核心 API 定义。\n- **黑盒依赖不可控**：依赖外部黑盒服务动态抓取文档，无法确定其是否包含了刚刚发布的紧急补丁说明，存在安全隐患。\n\n### 使用 llm-min.txt 后\n- **即时同步最新特性**：llm-min.txt 将技术文档压缩为类似 `min.js` 的机器最优格式，让 AI 瞬间掌握 FastAPI 最新的异步中间件规范，生成代码一次通过。\n- **极致压缩节省算力**：通过剔除所有非必要元素，文档体积大幅缩小，轻松放入 AI 的上下文窗口，无需担心长度限制。\n- **高密度信息直达核心**：保留纯粹的结构化定义与逻辑关系，去除了自然语言噪音，使 AI 能更准确地理解参数类型与调用链路。\n- **透明可控的知识源**：团队可直接针对任意开源包生成专用的 llm-min.txt 文件，确保 AI 引用的永远是本地最新、最准确的文档版本。\n\nllm-min.txt 通过“文档压缩”理念，彻底解决了 AI 编程中知识过时与上下文受限的双重难题，让大模型真正具备实时演进的开发能力。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmarv1nnnnn_llm-min.txt_d802e5ac.png","marv1nnnnn",null,"https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fmarv1nnnnn_cf0cd689.jpg","https:\u002F\u002Fmarv1nnnnn.github.io\u002F","https:\u002F\u002Fgithub.com\u002Fmarv1nnnnn",[81],{"name":82,"color":83,"percentage":84},"Python","#3572A5",100,676,15,"2026-04-02T20:57:24","MIT","Linux, macOS, Windows","未说明",{"notes":92,"python":93,"dependencies":94},"该工具主要依赖 Google Gemini API 进行文档压缩，需配置 GEMINI_API_KEY 环境变量或通过命令行参数提供密钥。安装后需运行 'playwright install' 以安装浏览器自动化工具。推荐使用 uv 管理依赖以提升安装速度。","3.10+",[95,96],"playwright","google-generativeai (Gemini API)",[52,26,13],[99,100,101],"ide","llm","vibe-coding","2026-03-27T02:49:30.150509","2026-04-06T07:22:57.604536",[],[106],{"id":107,"version":108,"summary_zh":76,"released_at":109},62861,"v0.2.4","2025-06-01T08:47:11"]