[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-AlexsJones--llmfit":3,"tool-AlexsJones--llmfit":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",140436,2,"2026-04-05T23:32:43",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",107662,"2026-04-03T11:11:01",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":10,"last_commit_at":50,"category_tags":51,"status":17},4292,"Deep-Live-Cam","hacksider\u002FDeep-Live-Cam","Deep-Live-Cam 是一款专注于实时换脸与视频生成的开源工具，用户仅需一张静态照片，即可通过“一键操作”实现摄像头画面的即时变脸或制作深度伪造视频。它有效解决了传统换脸技术流程繁琐、对硬件配置要求极高以及难以实时预览的痛点，让高质量的数字内容创作变得触手可及。\n\n这款工具不仅适合开发者和技术研究人员探索算法边界，更因其极简的操作逻辑（仅需三步：选脸、选摄像头、启动），广泛适用于普通用户、内容创作者、设计师及直播主播。无论是为了动画角色定制、服装展示模特替换，还是制作趣味短视频和直播互动，Deep-Live-Cam 都能提供流畅的支持。\n\n其核心技术亮点在于强大的实时处理能力，支持口型遮罩（Mouth Mask）以保留使用者原始的嘴部动作，确保表情自然精准；同时具备“人脸映射”功能，可同时对画面中的多个主体应用不同面孔。此外，项目内置了严格的内容安全过滤机制，自动拦截涉及裸露、暴力等不当素材，并倡导用户在获得授权及明确标注的前提下合规使用，体现了技术发展与伦理责任的平衡。",88924,"2026-04-06T03:28:53",[14,15,13,52],"视频",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":32,"last_commit_at":59,"category_tags":60,"status":17},3704,"NextChat","ChatGPTNextWeb\u002FNextChat","NextChat 是一款轻量且极速的 AI 助手，旨在为用户提供流畅、跨平台的大模型交互体验。它完美解决了用户在多设备间切换时难以保持对话连续性，以及面对众多 AI 模型不知如何统一管理的痛点。无论是日常办公、学习辅助还是创意激发，NextChat 都能让用户随时随地通过网页、iOS、Android、Windows、MacOS 或 Linux 端无缝接入智能服务。\n\n这款工具非常适合普通用户、学生、职场人士以及需要私有化部署的企业团队使用。对于开发者而言，它也提供了便捷的自托管方案，支持一键部署到 Vercel 或 Zeabur 等平台。\n\nNextChat 的核心亮点在于其广泛的模型兼容性，原生支持 Claude、DeepSeek、GPT-4 及 Gemini Pro 等主流大模型，让用户在一个界面即可自由切换不同 AI 能力。此外，它还率先支持 MCP（Model Context Protocol）协议，增强了上下文处理能力。针对企业用户，NextChat 提供专业版解决方案，具备品牌定制、细粒度权限控制、内部知识库整合及安全审计等功能，满足公司对数据隐私和个性化管理的高标准要求。",87618,"2026-04-05T07:20:52",[14,35],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":72,"owner_name":73,"owner_avatar_url":74,"owner_bio":75,"owner_company":76,"owner_location":77,"owner_email":78,"owner_twitter":79,"owner_website":78,"owner_url":80,"languages":81,"stars":117,"forks":118,"last_commit_at":119,"license":120,"difficulty_score":121,"env_os":122,"env_gpu":123,"env_ram":124,"env_deps":125,"category_tags":134,"github_topics":135,"view_count":32,"oss_zip_url":78,"oss_zip_packed_at":78,"status":17,"created_at":142,"updated_at":143,"faqs":144,"releases":175},4223,"AlexsJones\u002Fllmfit","llmfit","Hundreds of models & providers. One command to find what runs on your hardware.","llmfit 是一款专为本地大语言模型（LLM）设计的终端工具，旨在帮助用户快速找到最适合自己硬件配置的模型。面对市面上数百种模型和不同的运行提供商，用户往往难以判断哪些模型能在自己的电脑上流畅运行。llmfit 通过一键检测系统的内存、CPU 和 GPU 规格，自动评估各模型在质量、速度、适配度及上下文长度等维度的表现，从而精准推荐“刚刚好”能跑起来的模型，避免了因显存或内存不足导致的运行失败。\n\n这款工具特别适合开发者、AI 研究人员以及希望在本地部署大模型的进阶用户。无论是拥有单张显卡的笔记本，还是配置了多卡工作站甚至苹果 M 系列芯片的用户，llmfit 都能提供针对性的建议。其独特亮点在于内置了交互式终端界面（TUI），不仅直观展示预估生成速度和最佳量化格式，还支持动态筛选、多后端适配（如 Ollama、llama.cpp、MLX 等）以及对混合专家模型（MoE）的特殊优化。只需一条命令，即可告别繁琐的手动试错，让本地大模型部署变得简单高效。","# llmfit\n\n\u003Cp align=\"center\">\n  \u003Cimg src=\"assets\u002Ficon.svg\" alt=\"llmfit icon\" width=\"128\" height=\"128\">\n\u003C\u002Fp>\n\n\u003Cp align=\"center\">\n  \u003Cb>English\u003C\u002Fb> ·\n  \u003Ca href=\"README.zh.md\">中文\u003C\u002Fa>\n\u003C\u002Fp>\n\n\u003Cp align=\"center\">\n  \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Factions\u002Fworkflows\u002Fci.yml\">\u003Cimg src=\"https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Factions\u002Fworkflows\u002Fci.yml\u002Fbadge.svg\" alt=\"CI\">\u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fcrates.io\u002Fcrates\u002Fllmfit\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fcrates\u002Fv\u002Fllmfit.svg\" alt=\"Crates.io\">\u003C\u002Fa>\n  \u003Ca href=\"LICENSE\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002Flicense-MIT-blue.svg\" alt=\"License\">\u003C\u002Fa>\n\u003C\u002Fp>\n\n**Hundreds of models & providers. One command to find what runs on your hardware.**\n\nA terminal tool that right-sizes LLM models to your system's RAM, CPU, and GPU. Detects your hardware, scores each model across quality, speed, fit, and context dimensions, and tells you which ones will actually run well on your machine.\n\nShips with an interactive TUI (default) and a classic CLI mode. Supports multi-GPU setups, MoE architectures, dynamic quantization selection, speed estimation, and local runtime providers (Ollama, llama.cpp, MLX, Docker Model Runner, LM Studio).\n\n> **Sister projects:**\n> - [sympozium](https:\u002F\u002Fgithub.com\u002Fsympozium-ai\u002Fsympozium\u002F) — managing agents in Kubernetes.\n> - [llmserve](https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmserve) — a simple TUI for serving local LLM models. Pick a model, pick a backend, serve it.\n\n![demo](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FAlexsJones_llmfit_readme_bfe3c444a49a.gif)\n\n---\n\n## Install\n\n### Windows\n```sh\nscoop install llmfit\n```\n\nIf Scoop is not installed, follow the [Scoop installation guide](https:\u002F\u002Fscoop.sh\u002F).\n\n### macOS \u002F Linux\n\n#### Homebrew\n```sh\nbrew install llmfit\n```\n\n#### Quick install\n```sh\ncurl -fsSL https:\u002F\u002Fllmfit.axjns.dev\u002Finstall.sh | sh\n```\n\nDownloads the latest release binary from GitHub and installs it to `\u002Fusr\u002Flocal\u002Fbin` (or `~\u002F.local\u002Fbin` if no sudo).\n\n**Install to `~\u002F.local\u002Fbin` without sudo:**\n```sh\ncurl -fsSL https:\u002F\u002Fllmfit.axjns.dev\u002Finstall.sh | sh -s -- --local\n```\n\n### Docker \u002F Podman\n```sh\ndocker run ghcr.io\u002Falexsjones\u002Fllmfit\n```\nThis prints JSON from `llmfit recommend` command. The JSON could be further queried with `jq`.\n```\npodman run ghcr.io\u002Falexsjones\u002Fllmfit recommend --use-case coding | jq '.models[].name'\n```\n\n### From source\n```sh\ngit clone https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit.git\ncd llmfit\ncargo build --release\n# binary is at target\u002Frelease\u002Fllmfit\n```\n\n---\n\n## Usage\n\n### TUI (default)\n\n```sh\nllmfit\n```\n\nLaunches the interactive terminal UI. Your system specs (CPU, RAM, GPU name, VRAM, backend) are shown at the top. Models are listed in a scrollable table sorted by composite score. Each row shows the model's score, estimated tok\u002Fs, best quantization for your hardware, run mode, memory usage, and use-case category.\n\n| Key                        | Action                                                                |\n|----------------------------|-----------------------------------------------------------------------|\n| `Up` \u002F `Down` or `j` \u002F `k` | Navigate models                                                       |\n| `\u002F`                        | Enter search mode (partial match on name, provider, params, use case) |\n| `Esc` or `Enter`           | Exit search mode                                                      |\n| `Ctrl-U`                   | Clear search                                                          |\n| `f`                        | Cycle fit filter: All, Runnable, Perfect, Good, Marginal              |\n| `a`                        | Cycle availability filter: All, GGUF Avail, Installed                 |\n| `s`                        | Cycle sort column: Score, Params, Mem%, Ctx, Date, Use Case           |\n| `v`                        | Enter Visual mode (select multiple models)                            |\n| `V`                        | Enter Select mode (column-based filtering)                            |\n| `t`                        | Cycle color theme (saved automatically)                               |\n| `p`                        | Open Plan mode for selected model (hardware planning)                 |\n| `P`                        | Open provider filter popup                                            |\n| `U`                        | Open use-case filter popup                                            |\n| `C`                        | Open capability filter popup                                          |\n| `m`                        | Mark selected model for compare                                       |\n| `c`                        | Open compare view (marked vs selected)                                |\n| `x`                        | Clear compare mark                                                    |\n| `i`                        | Toggle installed-first sorting (any detected runtime provider)        |\n| `d`                        | Download selected model (provider picker when multiple are available) |\n| `r`                        | Refresh installed models from runtime providers                       |\n| `Enter`                    | Toggle detail view for selected model                                 |\n| `PgUp` \u002F `PgDn`            | Scroll by 10                                                          |\n| `g` \u002F `G`                  | Jump to top \u002F bottom                                                  |\n| `q`                        | Quit                                                                  |\n\n### Vim-like modes\n\nThe TUI uses Vim-inspired modes shown in the bottom-left status bar. The current mode determines which keys are active.\n\n#### Normal mode\n\nThe default mode. Navigate, search, filter, and open views. All keys in the table above apply here.\n\n#### Visual mode (`v`)\n\nSelect a contiguous range of models for bulk comparison. Press `v` to anchor at the current row, then navigate with `j`\u002F`k` or arrow keys to extend the selection. Selected rows are highlighted.\n\n| Key                 | Action                                                 |\n|---------------------|--------------------------------------------------------|\n| `j` \u002F `k` or arrows | Extend selection up\u002Fdown                               |\n| `c`                 | Compare all selected models (opens multi-compare view) |\n| `m`                 | Mark current model for two-model compare               |\n| `Esc` or `v`        | Exit Visual mode                                       |\n\nThe multi-compare view displays a table where rows are attributes (Score, tok\u002Fs, Fit, Mem%, Params, Mode, Context, Quant, etc.) and columns are models. Best values are highlighted. Use `h`\u002F`l` or arrow keys to scroll horizontally if more models are selected than fit on screen.\n\n#### Select mode (`V`)\n\nColumn-based filtering. Press `V` (shift-v) to enter Select mode, then use `h`\u002F`l` or arrow keys to move between column headers. The active column is visually highlighted. Press `Enter` or `Space` to activate the appropriate filter for that column:\n\n| Column                        | Filter action                                                             |\n|-------------------------------|---------------------------------------------------------------------------|\n| Inst                          | Cycle availability filter                                                 |\n| Model                         | Enter search mode                                                         |\n| Provider                      | Open provider popup                                                       |\n| Params                        | Open parameter-size bucket popup (\u003C3B, 3-7B, 7-14B, 14-30B, 30-70B, 70B+) |\n| Score, tok\u002Fs, Mem%, Ctx, Date | Sort by that column                                                       |\n| Quant                         | Open quantization popup                                                   |\n| Mode                          | Open run-mode popup (GPU, MoE, CPU+GPU, CPU)                              |\n| Fit                           | Cycle fit filter                                                          |\n| Use Case                      | Open use-case popup                                                       |\n\nRow navigation (`j`\u002F`k`) still works in Select mode so you can see the effect of filters as you apply them. Press `Esc` to return to Normal mode.\n\n### TUI Plan mode (`p`)\n\nPlan mode inverts normal fit analysis: instead of asking \"what fits my hardware?\", it estimates \"what hardware is needed for this model config?\".\n\nUse `p` on a selected row, then:\n\n| Key                    | Action                                                    |\n|------------------------|-----------------------------------------------------------|\n| `Tab` \u002F `j` \u002F `k`      | Move between editable fields (Context, Quant, Target TPS) |\n| `Left` \u002F `Right`       | Move cursor in current field                              |\n| Type                   | Edit current field                                        |\n| `Backspace` \u002F `Delete` | Remove characters                                         |\n| `Ctrl-U`               | Clear current field                                       |\n| `Esc` or `q`           | Exit Plan mode                                            |\n\nPlan mode shows estimates for:\n- minimum and recommended VRAM\u002FRAM\u002FCPU cores\n- feasible run paths (GPU, CPU offload, CPU-only)\n- upgrade deltas to reach better fit targets\n\n### Themes\n\nPress `t` to cycle through 10 built-in color themes. Your selection is saved automatically to `~\u002F.config\u002Fllmfit\u002Ftheme` and restored on next launch.\n\n| Theme                    | Description                                       |\n|--------------------------|---------------------------------------------------|\n| **Default**              | Original llmfit colors                            |\n| **Dracula**              | Dark purple background with pastel accents        |\n| **Solarized**            | Ethan Schoonover's Solarized Dark palette         |\n| **Nord**                 | Arctic, cool blue-gray tones                      |\n| **Monokai**              | Monokai Pro warm syntax colors                    |\n| **Gruvbox**              | Retro groove palette with warm earth tones        |\n| **Catppuccin Latte**     | 🌻 Light theme — harmonious pastel inversion      |\n| **Catppuccin Frappé**    | 🪴 Low-contrast dark — muted, subdued aesthetic   |\n| **Catppuccin Macchiato** | 🌺 Medium-contrast dark — gentle, soothing tones  |\n| **Catppuccin Mocha**     | 🌿 Darkest variant — cozy with color-rich accents |\n\n### Web dashboard\n\nWhen you run `llmfit` in non-JSON mode, it automatically starts a background web dashboard on `0.0.0.0:8787`. Open it in any browser on the same network:\n\n```\nhttp:\u002F\u002F\u003Cyour-machine-ip>:8787\n```\n\nOverride the host or port with environment variables:\n\n```sh\nLLMFIT_DASHBOARD_HOST=0.0.0.0 LLMFIT_DASHBOARD_PORT=9000 llmfit\n```\n\n| Variable | Default | Description |\n|---|---|---|\n| `LLMFIT_DASHBOARD_HOST` | `0.0.0.0` | Interface to bind the dashboard server |\n| `LLMFIT_DASHBOARD_PORT` | `8787` | Port to bind the dashboard server |\n\nTo disable the auto-started dashboard, pass `--no-dashboard`:\n\n```sh\nllmfit --no-dashboard\n```\n\n### CLI mode\n\nUse `--cli` or any subcommand to get classic table output:\n\n```sh\n# Table of all models ranked by fit\nllmfit --cli\n\n# Only perfectly fitting models, top 5\nllmfit fit --perfect -n 5\n\n# Show detected system specs\nllmfit system\n\n# List all models in the database\nllmfit list\n\n# Search by name, provider, or size\nllmfit search \"llama 8b\"\n\n# Detailed view of a single model\nllmfit info \"Mistral-7B\"\n\n# Top 5 recommendations (JSON, for agent\u002Fscript consumption)\nllmfit recommend --json --limit 5\n\n# Recommendations filtered by use case\nllmfit recommend --json --use-case coding --limit 3\n\n# Force a specific runtime (bypass automatic MLX selection on Apple Silicon)\nllmfit recommend --force-runtime llamacpp\nllmfit recommend --force-runtime llamacpp --use-case coding --limit 3\n\n# Plan required hardware for a specific model configuration\nllmfit plan \"Qwen\u002FQwen3-4B-MLX-4bit\" --context 8192\nllmfit plan \"Qwen\u002FQwen3-4B-MLX-4bit\" --context 8192 --quant mlx-4bit\nllmfit plan \"Qwen\u002FQwen3-4B-MLX-4bit\" --context 8192 --target-tps 25 --json\n\n# Run as a node-level REST API (for cluster schedulers \u002F aggregators)\nllmfit serve --host 0.0.0.0 --port 8787\n```\n\n### REST API (`llmfit serve`)\n\n`llmfit serve` starts an HTTP API that exposes the same fit\u002Fscoring data used by TUI\u002FCLI, including filtering and top-model selection for a node.\n\n```sh\n# Liveness\ncurl http:\u002F\u002Flocalhost:8787\u002Fhealth\n\n# Node hardware info\ncurl http:\u002F\u002Flocalhost:8787\u002Fapi\u002Fv1\u002Fsystem\n\n# Full fit list with filters\ncurl \"http:\u002F\u002Flocalhost:8787\u002Fapi\u002Fv1\u002Fmodels?min_fit=marginal&runtime=llamacpp&sort=score&limit=20\"\n\n# Key scheduling endpoint: top runnable models for this node\ncurl \"http:\u002F\u002Flocalhost:8787\u002Fapi\u002Fv1\u002Fmodels\u002Ftop?limit=5&min_fit=good&use_case=coding\"\n\n# Search by model name\u002Fprovider text\ncurl \"http:\u002F\u002Flocalhost:8787\u002Fapi\u002Fv1\u002Fmodels\u002FMistral?runtime=any\"\n```\n\nSupported query params for `models`\u002F`models\u002Ftop`:\n\n- `limit` (or `n`): max number of rows returned\n- `perfect`: `true|false` (forces perfect-only when `true`)\n- `min_fit`: `perfect|good|marginal|too_tight`\n- `runtime`: `any|mlx|llamacpp`\n- `use_case`: `general|coding|reasoning|chat|multimodal|embedding`\n- `provider`: provider text filter (substring)\n- `search`: free-text filter across name\u002Fprovider\u002Fsize\u002Fuse-case\n- `sort`: `score|tps|params|mem|ctx|date|use_case`\n- `include_too_tight`: include non-runnable rows (default `false` on `\u002Ftop`, `true` on `\u002Fmodels`)\n- `max_context`: per-request context cap for memory estimation\n- `force_runtime`: `mlx|llamacpp|vllm` — override automatic runtime selection during analysis\n\nValidate API behavior locally:\n\n```sh\n# spawn server automatically and run endpoint\u002Fschema\u002Ffilter assertions\npython3 scripts\u002Ftest_api.py --spawn\n\n# or test an already-running server\npython3 scripts\u002Ftest_api.py --base-url http:\u002F\u002F127.0.0.1:8787\n```\n\n### GPU memory override\n\nGPU VRAM autodetection can fail on some systems (e.g. broken `nvidia-smi`, VMs, passthrough setups). Use `--memory` to manually specify your GPU's VRAM:\n\n```sh\n# Override with 32 GB VRAM\nllmfit --memory=32G\n\n# Megabytes also work (32000 MB ≈ 31.25 GB)\nllmfit --memory=32000M\n\n# Works with all modes: TUI, CLI, and subcommands\nllmfit --memory=24G --cli\nllmfit --memory=24G fit --perfect -n 5\nllmfit --memory=24G system\nllmfit --memory=24G info \"Llama-3.1-70B\"\nllmfit --memory=24G recommend --json\n```\n\nAccepted suffixes: `G`\u002F`GB`\u002F`GiB` (gigabytes), `M`\u002F`MB`\u002F`MiB` (megabytes), `T`\u002F`TB`\u002F`TiB` (terabytes). Case-insensitive. If no GPU was detected, the override creates a synthetic GPU entry so models are scored for GPU inference.\n\n### Context-length cap for estimation\n\nUse `--max-context` to cap context length used for memory estimation (without changing each model's advertised maximum context):\n\n```sh\n# Estimate memory fit at 4K context\nllmfit --max-context 4096 --cli\n\n# Works with subcommands\nllmfit --max-context 8192 fit --perfect -n 5\nllmfit --max-context 16384 recommend --json --limit 5\n```\n\nIf `--max-context` is not set, llmfit will use `OLLAMA_CONTEXT_LENGTH` when available.\n\n### JSON output\n\nAdd `--json` to any subcommand for machine-readable output:\n\n```sh\nllmfit --json system     # Hardware specs as JSON\nllmfit --json fit -n 10  # Top 10 fits as JSON\nllmfit recommend --json  # Top 5 recommendations (JSON is default for recommend)\nllmfit plan \"Qwen\u002FQwen2.5-Coder-0.5B-Instruct\" --context 8192 --json\n```\n\n`plan` JSON includes stable fields for:\n- request (`context`, `quantization`, `target_tps`)\n- estimated minimum\u002Frecommended hardware\n- per-path feasibility (`gpu`, `cpu_offload`, `cpu_only`)\n- upgrade deltas\n\n---\n\n## How it works\n\n1. **Hardware detection** -- Reads total\u002Favailable RAM via `sysinfo`, counts CPU cores, and probes for GPUs:\n   - **NVIDIA** -- Multi-GPU support via `nvidia-smi`. Aggregates VRAM across all detected GPUs. Falls back to VRAM estimation from GPU model name if reporting fails.\n   - **AMD** -- Detected via `rocm-smi`.\n   - **Intel Arc** -- Discrete VRAM via sysfs, integrated via `lspci`.\n   - **Apple Silicon** -- Unified memory via `system_profiler`. VRAM = system RAM.\n   - **Ascend** -- Detected via `npu-smi`.\n   - **Backend detection** -- Automatically identifies the acceleration backend (CUDA, Metal, ROCm, SYCL, CPU ARM, CPU x86, Ascend) for speed estimation.\n\n2. **Model database** -- Hundreds models sourced from the HuggingFace API, stored in `data\u002Fhf_models.json` and embedded at compile time. Memory requirements are computed from parameter counts across a quantization hierarchy (Q8_0 through Q2_K). VRAM is the primary constraint for GPU inference; system RAM is the fallback for CPU-only execution.\n\n   **MoE support** -- Models with Mixture-of-Experts architectures (Mixtral, DeepSeek-V2\u002FV3) are detected automatically. Only a subset of experts is active per token, so the effective VRAM requirement is much lower than total parameter count suggests. For example, Mixtral 8x7B has 46.7B total parameters but only activates ~12.9B per token, reducing VRAM from 23.9 GB to ~6.6 GB with expert offloading.\n\n3. **Dynamic quantization** -- Instead of assuming a fixed quantization, llmfit tries the best quality quantization that fits your hardware. It walks a hierarchy from Q8_0 (best quality) down to Q2_K (most compressed), picking the highest quality that fits in available memory. If nothing fits at full context, it tries again at half context.\n\n4. **Multi-dimensional scoring** -- Each model is scored across four dimensions (0–100 each):\n\n   | Dimension   | What it measures                                                               |\n   |-------------|--------------------------------------------------------------------------------|\n   | **Quality** | Parameter count, model family reputation, quantization penalty, task alignment |\n   | **Speed**   | Estimated tokens\u002Fsec based on backend, params, and quantization                |\n   | **Fit**     | Memory utilization efficiency (sweet spot: 50–80% of available memory)         |\n   | **Context** | Context window capability vs target for the use case                           |\n\n   Dimensions are combined into a weighted composite score. Weights vary by use-case category (General, Coding, Reasoning, Chat, Multimodal, Embedding). For example, Chat weights Speed higher (0.35) while Reasoning weights Quality higher (0.55). Models are ranked by composite score, with unrunnable models (Too Tight) always at the bottom.\n\n5. **Speed estimation** -- Token generation in LLM inference is memory-bandwidth-bound: each token requires reading the full model weights once from VRAM. When the GPU model is recognized, llmfit uses its actual memory bandwidth to estimate throughput:\n\n   Formula: `(bandwidth_GB_s \u002F model_size_GB) × efficiency_factor`\n\n   The efficiency factor (0.55) accounts for kernel overhead, KV-cache reads, and memory controller effects. This approach is validated against published benchmarks from llama.cpp ([Apple Silicon](https:\u002F\u002Fgithub.com\u002Fggml-org\u002Fllama.cpp\u002Fdiscussions\u002F4167), [NVIDIA T4](https:\u002F\u002Fgithub.com\u002Fggml-org\u002Fllama.cpp\u002Fdiscussions\u002F4225)) and real-world measurements.\n\n   The bandwidth lookup table covers ~80 GPUs across NVIDIA (consumer + datacenter), AMD (RDNA + CDNA), and Apple Silicon families.\n\n   For unrecognized GPUs, llmfit falls back to per-backend speed constants:\n\n   | Backend      | Speed constant |\n   |--------------|----------------|\n   | CUDA         | 220            |\n   | Metal        | 160            |\n   | ROCm         | 180            |\n   | SYCL         | 100            |\n   | CPU (ARM)    | 90             |\n   | CPU (x86)    | 70             |\n   | NPU (Ascend) | 390            |\n\n   Fallback formula: `K \u002F params_b × quant_speed_multiplier`, with penalties for CPU offload (0.5×), CPU-only (0.3×), and MoE expert switching (0.8×).\n\n6. **Fit analysis** -- Each model is evaluated for memory compatibility:\n\n   **Run modes:**\n   - **GPU** -- Model fits in VRAM. Fast inference.\n   - **MoE** -- Mixture-of-Experts with expert offloading. Active experts in VRAM, inactive in RAM.\n   - **CPU+GPU** -- VRAM insufficient, spills to system RAM with partial GPU offload.\n   - **CPU** -- No GPU. Model loaded entirely into system RAM.\n\n   **Fit levels:**\n   - **Perfect** -- Recommended memory met on GPU. Requires GPU acceleration.\n   - **Good** -- Fits with headroom. Best achievable for MoE offload or CPU+GPU.\n   - **Marginal** -- Tight fit, or CPU-only (CPU-only always caps here).\n   - **Too Tight** -- Not enough VRAM or system RAM anywhere.\n\n---\n\n## Model database\n\nThe model list is generated by `scripts\u002Fscrape_hf_models.py`, a standalone Python script (stdlib only, no pip dependencies) that queries the HuggingFace REST API. Hundreds models & providers including Meta Llama, Mistral, Qwen, Google Gemma, Microsoft Phi, DeepSeek, IBM Granite, Allen Institute OLMo, xAI Grok, Cohere, BigCode, 01.ai, Upstage, TII Falcon, HuggingFace, Zhipu GLM, Moonshot Kimi, Baidu ERNIE, and more. The scraper automatically detects MoE architectures via model config (`num_local_experts`, `num_experts_per_tok`) and known architecture mappings.\n\nModel categories span general purpose, coding (CodeLlama, StarCoder2, WizardCoder, Qwen2.5-Coder, Qwen3-Coder), reasoning (DeepSeek-R1, Orca-2), multimodal\u002Fvision (Llama 3.2 Vision, Llama 4 Scout\u002FMaverick, Qwen2.5-VL), chat, enterprise (IBM Granite), and embedding (nomic-embed, bge).\n\nSee [MODELS.md](MODELS.md) for the full list.\n\nTo refresh the model database:\n\n```sh\n# Automated update (recommended)\nmake update-models\n\n# Or run the script directly\n.\u002Fscripts\u002Fupdate_models.sh\n\n# Or manually\npython3 scripts\u002Fscrape_hf_models.py\ncargo build --release\n```\n\nThe scraper writes `data\u002Fhf_models.json`, which is baked into the binary via `include_str!`. The automated update script backs up existing data, validates JSON output, and rebuilds the binary.\n\nBy default, the scraper enriches models with known GGUF download sources from providers like [unsloth](https:\u002F\u002Fhuggingface.co\u002Funsloth) and [bartowski](https:\u002F\u002Fhuggingface.co\u002Fbartowski). Results are cached in `data\u002Fgguf_sources_cache.json` (7-day TTL) to avoid repeated API calls. Use `--no-gguf-sources` to skip enrichment for a faster scrape.\n\n---\n\n## Project structure\n\n```\nsrc\u002F\n  main.rs         -- CLI argument parsing, entrypoint, TUI launch\n  hardware.rs     -- System RAM\u002FCPU\u002FGPU detection (multi-GPU, backend identification)\n  models.rs       -- Model database, quantization hierarchy, dynamic quant selection\n  fit.rs          -- Multi-dimensional scoring (Q\u002FS\u002FF\u002FC), speed estimation, MoE offloading\n  providers.rs    -- Runtime provider integration (Ollama, llama.cpp, MLX, Docker Model Runner, LM Studio), install detection, pull\u002Fdownload\n  display.rs      -- Classic CLI table rendering + JSON output\n  tui_app.rs      -- TUI application state, filters, navigation\n  tui_ui.rs       -- TUI rendering (ratatui)\n  tui_events.rs   -- TUI keyboard event handling (crossterm)\ndata\u002F\n  hf_models.json  -- Model database (206 models)\nskills\u002F\n  llmfit-advisor\u002F -- OpenClaw skill for hardware-aware model recommendations\nscripts\u002F\n  scrape_hf_models.py        -- HuggingFace API scraper\n  update_models.sh            -- Automated database update script\n  install-openclaw-skill.sh   -- Install the OpenClaw skill\nMakefile           -- Build and maintenance commands\n```\n\n---\n\n## Publishing to crates.io\n\nThe `Cargo.toml` already includes the required metadata (description, license, repository). To publish:\n\n```sh\n# Dry run first to catch issues\ncargo publish --dry-run\n\n# Publish for real (requires a crates.io API token)\ncargo login\ncargo publish\n```\n\nBefore publishing, make sure:\n\n- The version in `Cargo.toml` is correct (bump with each release).\n- A `LICENSE` file exists in the repo root. Create one if missing:\n\n```sh\n# For MIT license:\ncurl -sL https:\u002F\u002Fopensource.org\u002Flicense\u002FMIT -o LICENSE\n# Or write your own. The Cargo.toml declares license = \"MIT\".\n```\n\n- `data\u002Fhf_models.json` is committed. It is embedded at compile time and must be present in the published crate.\n- The `exclude` list in `Cargo.toml` keeps `target\u002F`, `scripts\u002F`, and `https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FAlexsJones_llmfit_readme_bfe3c444a49a.gif` out of the published crate to keep the download small.\n\nTo publish updates:\n\n```sh\n# Bump version\n# Edit Cargo.toml: version = \"0.2.0\"\ncargo publish\n```\n\n---\n\n## Dependencies\n\n| Crate                  | Purpose                                          |\n|------------------------|--------------------------------------------------|\n| `clap`                 | CLI argument parsing with derive macros          |\n| `sysinfo`              | Cross-platform RAM and CPU detection             |\n| `serde` \u002F `serde_json` | JSON deserialization for model database          |\n| `tabled`               | CLI table formatting                             |\n| `colored`              | CLI colored output                               |\n| `ureq`                 | HTTP client for runtime\u002Fprovider API integration |\n| `ratatui`              | Terminal UI framework                            |\n| `crossterm`            | Terminal input\u002Foutput backend for ratatui        |\n\n---\n\n## Runtime provider integration\n\nllmfit supports multiple local runtime providers:\n\n- **Ollama** (daemon\u002FAPI based pulls)\n- **llama.cpp** (direct GGUF downloads from Hugging Face + local cache detection)\n- **MLX** (Apple Silicon \u002F mlx-community model cache + optional server)\n- **Docker Model Runner** (Docker Desktop's built-in model serving)\n- **LM Studio** (local model server with REST API for model management + downloads)\n\nWhen more than one compatible provider is available for a model, pressing `d` in the TUI opens a provider picker modal.\n\n### Ollama integration\n\nllmfit integrates with [Ollama](https:\u002F\u002Follama.com) to detect which models you already have installed and to download new ones directly from the TUI.\n\n### Requirements\n\n- **Ollama must be installed and running** (`ollama serve` or the Ollama desktop app)\n- llmfit connects to `http:\u002F\u002Flocalhost:11434` (Ollama's default API port)\n- No configuration needed — if Ollama is running, llmfit detects it automatically\n\n### Remote Ollama instances\n\nTo connect to Ollama running on a different machine or port, set the `OLLAMA_HOST` environment variable:\n\n```sh\n# Connect to Ollama on a specific IP and port\nOLLAMA_HOST=\"http:\u002F\u002F192.168.1.100:11434\" llmfit\n\n# Connect via hostname  \nOLLAMA_HOST=\"http:\u002F\u002Follama-server:666\" llmfit\n\n# Works with all TUI and CLI commands\nOLLAMA_HOST=\"http:\u002F\u002F192.168.1.100:11434\" llmfit --cli\nOLLAMA_HOST=\"http:\u002F\u002F192.168.1.100:11434\" llmfit fit --perfect -n 5\n```\n\nThis is useful for:\n- Running llmfit on one machine while Ollama serves from another (e.g., GPU server + laptop client)\n- Connecting to Ollama running in Docker containers with custom ports\n- Using Ollama behind reverse proxies or load balancers\n\n### How it works\n\nOn startup, llmfit queries `GET \u002Fapi\u002Ftags` to list your installed Ollama models. Each installed model gets a green **✓** in the **Inst** column of the TUI. The system bar shows `Ollama: ✓ (N installed)`.\n\nWhen you press `d` on a model, llmfit sends `POST \u002Fapi\u002Fpull` to Ollama to download it. The row highlights with an animated progress indicator showing download progress in real-time. Once complete, the model is immediately available for use with Ollama.\n\nIf Ollama is not running, Ollama-specific operations are skipped; the TUI still supports other providers like llama.cpp where available.\n\n### llama.cpp integration\n\nllmfit integrates with [llama.cpp](https:\u002F\u002Fgithub.com\u002Fggml-org\u002Fllama.cpp) as a runtime\u002Fdownload provider in both TUI and CLI.\n\nRequirements:\n\n- `llama-cli` or `llama-server` available in `PATH` (for runtime detection)\n- network access to Hugging Face for GGUF downloads\n\nHow it works:\n\n- llmfit maps HF models to known GGUF repos (with heuristic fallbacks)\n- downloads GGUF files into the local llama.cpp model cache\n- marks models installed when matching GGUF files are present locally\n\n### Docker Model Runner integration\n\nllmfit integrates with [Docker Model Runner](https:\u002F\u002Fdocs.docker.com\u002Fdesktop\u002Ffeatures\u002Fmodel-runner\u002F), Docker Desktop's built-in model serving feature.\n\nRequirements:\n\n- Docker Desktop with Model Runner enabled\n- Default endpoint: `http:\u002F\u002Flocalhost:12434`\n\nHow it works:\n\n- llmfit queries `GET \u002Fengines` to list models available in Docker Model Runner\n- models are matched to the HF database using Ollama-style tag mapping (Docker Model Runner uses `ai\u002F\u003Ctag>` naming)\n- pressing `d` in the TUI pulls via `docker model pull`\n\n### Remote Docker Model Runner instances\n\nTo connect to Docker Model Runner on a different host or port, set the `DOCKER_MODEL_RUNNER_HOST` environment variable:\n\n```sh\nDOCKER_MODEL_RUNNER_HOST=\"http:\u002F\u002F192.168.1.100:12434\" llmfit\n```\n\n### LM Studio integration\n\nllmfit integrates with [LM Studio](https:\u002F\u002Flmstudio.ai) as a local model server with built-in model download capabilities.\n\nRequirements:\n\n- LM Studio must be running with its local server enabled\n- Default endpoint: `http:\u002F\u002F127.0.0.1:1234`\n\nHow it works:\n\n- llmfit queries `GET \u002Fv1\u002Fmodels` to list models available in LM Studio\n- pressing `d` in the TUI triggers a download via `POST \u002Fapi\u002Fv1\u002Fmodels\u002Fdownload`\n- download progress is tracked by polling `GET \u002Fapi\u002Fv1\u002Fmodels\u002Fdownload-status`\n- LM Studio accepts HuggingFace model names directly, so no name mapping is needed\n\n### Remote LM Studio instances\n\nTo connect to LM Studio on a different host or port, set the `LMSTUDIO_HOST` environment variable:\n\n```sh\nLMSTUDIO_HOST=\"http:\u002F\u002F192.168.1.100:1234\" llmfit\n```\n\n### Model name mapping\n\nllmfit's database uses HuggingFace model names (e.g. `Qwen\u002FQwen2.5-Coder-14B-Instruct`) while Ollama uses its own naming scheme (e.g. `qwen2.5-coder:14b`). llmfit maintains an accurate mapping table between the two so that install detection and pulls resolve to the correct model. Each mapping is exact — `qwen2.5-coder:14b` maps to the Coder model, not the base `qwen2.5:14b`.\n\n---\n\n## Platform support\n\n- **Linux** -- Full support. GPU detection via `nvidia-smi` (NVIDIA), `rocm-smi` (AMD), sysfs\u002F`lspci` (Intel Arc) and `npu-smi` (Ascend).\n- **macOS (Apple Silicon)** -- Full support. Detects unified memory via `system_profiler`. VRAM = system RAM (shared pool). Models run via Metal GPU acceleration.\n- **macOS (Intel)** -- RAM and CPU detection works. Discrete GPU detection if `nvidia-smi` available.\n- **Windows** -- RAM and CPU detection works. NVIDIA GPU detection via `nvidia-smi` if installed.\n- **Android \u002F Termux \u002F PRoot** -- CPU and RAM detection usually work, but GPU autodetection is not currently supported. Mobile GPUs such as Adreno typically are not visible through the desktop\u002Fserver probing interfaces llmfit uses.\n\n### GPU support\n\n| Vendor                 | Detection method              | VRAM reporting                 |\n|------------------------|-------------------------------|--------------------------------|\n| NVIDIA                 | `nvidia-smi`                  | Exact dedicated VRAM           |\n| AMD                    | `rocm-smi`                    | Detected (VRAM may be unknown) |\n| Intel Arc (discrete)   | sysfs (`mem_info_vram_total`) | Exact dedicated VRAM           |\n| Intel Arc (integrated) | `lspci`                       | Shared system memory           |\n| Apple Silicon          | `system_profiler`             | Unified memory (= system RAM)  |\n| Ascend                 | `npu-smi`                     | Detected (VRAM may be unknown) |\n\nIf autodetection fails or reports incorrect values, use `--memory=\u003CSIZE>` to override (see [GPU memory override](#gpu-memory-override) above).\n\n### Android \u002F Termux note\n\nOn Android setups such as **Termux + PRoot**, llmfit usually cannot see mobile GPUs through the standard Linux detection paths (`nvidia-smi`, `rocm-smi`, DRM\u002Fsysfs, `lspci`, etc.). In those environments, \"no GPU detected\" is expected with the current implementation.\n\nIf you still want GPU-style recommendations on a unified-memory phone or tablet, use a manual memory override:\n\n```sh\nllmfit --memory=8G fit -n 20\nllmfit recommend --json --memory=8G --limit 10\n```\n\nThis is a workaround for recommendation\u002Fscoring only; it does not provide true Android GPU runtime detection.\n\n---\n\n## Contributing\n\nContributions are welcome, especially new models.\n\n### Adding a model\n\n1. Add the model's HuggingFace repo ID (e.g., `meta-llama\u002FLlama-3.1-8B`) to the `TARGET_MODELS` list in `scripts\u002Fscrape_hf_models.py`.\n2. If the model is gated (requires HuggingFace authentication to access metadata), add a fallback entry to the `FALLBACKS` list in the same script with the parameter count and context length.\n3. Run the automated update script:\n   ```sh\n   make update-models\n   # or: .\u002Fscripts\u002Fupdate_models.sh\n   ```\n4. Verify the updated model list: `.\u002Ftarget\u002Frelease\u002Fllmfit list`\n5. Update [MODELS.md](MODELS.md) by running: `python3 \u003C\u003C 'EOF' \u003C scripts\u002F...` (see commit history for the generator script)\n6. Open a pull request.\n\nSee [MODELS.md](MODELS.md) for the current list and [AGENTS.md](AGENTS.md) for architecture details.\n\n---\n\n## OpenClaw integration\n\nllmfit ships as an [OpenClaw](https:\u002F\u002Fgithub.com\u002Fopenclaw\u002Fopenclaw) skill that lets the agent recommend hardware-appropriate local models and auto-configure Ollama\u002FvLLM\u002FLM Studio providers.\n\n### Install the skill\n\n```sh\n# From the llmfit repo\n.\u002Fscripts\u002Finstall-openclaw-skill.sh\n\n# Or manually\ncp -r skills\u002Fllmfit-advisor ~\u002F.openclaw\u002Fskills\u002F\n```\n\nOnce installed, ask your OpenClaw agent things like:\n\n- \"What local models can I run?\"\n- \"Recommend a coding model for my hardware\"\n- \"Set up Ollama with the best models for my GPU\"\n\nThe agent will call `llmfit recommend --json` under the hood, interpret the results, and offer to configure your `openclaw.json` with optimal model choices.\n\n### How it works\n\nThe skill teaches the OpenClaw agent to:\n\n1. Detect your hardware via `llmfit --json system`\n2. Get ranked recommendations via `llmfit recommend --json`\n3. Map HuggingFace model names to Ollama\u002FvLLM\u002FLM Studio tags\n4. Configure `models.providers.ollama.models` in `openclaw.json`\n\nSee [skills\u002Fllmfit-advisor\u002FSKILL.md](skills\u002Fllmfit-advisor\u002FSKILL.md) for the full skill definition.\n\n---\n\n## Alternatives\n\nIf you're looking for a different approach, check out [llm-checker](https:\u002F\u002Fgithub.com\u002FPavelevich\u002Fllm-checker) -- a Node.js CLI tool with Ollama integration that can pull and benchmark models directly. It takes a more hands-on approach by actually running models on your hardware via Ollama, rather than estimating from specs. Good if you already have Ollama installed and want to test real-world performance. Note that it doesn't support MoE (Mixture-of-Experts) architectures -- all models are treated as dense, so memory estimates for models like Mixtral or DeepSeek-V3 will reflect total parameter count rather than the smaller active subset.\n\n---\n\n## License\n\nMIT\n","# llmfit\n\n\u003Cp align=\"center\">\n  \u003Cimg src=\"assets\u002Ficon.svg\" alt=\"llmfit 图标\" width=\"128\" height=\"128\">\n\u003C\u002Fp>\n\n\u003Cp align=\"center\">\n  \u003Cb>English\u003C\u002Fb> ·\n  \u003Ca href=\"README.zh.md\">中文\u003C\u002Fa>\n\u003C\u002Fp>\n\n\u003Cp align=\"center\">\n  \u003Ca href=\"https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Factions\u002Fworkflows\u002Fci.yml\">\u003Cimg src=\"https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Factions\u002Fworkflows\u002Fci.yml\u002Fbadge.svg\" alt=\"CI\">\u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fcrates.io\u002Fcrates\u002Fllmfit\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fcrates\u002Fv\u002Fllmfit.svg\" alt=\"Crates.io\">\u003C\u002Fa>\n  \u003Ca href=\"LICENSE\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002Flicense-MIT-blue.svg\" alt=\"License\">\u003C\u002Fa>\n\u003C\u002Fp>\n\n**数百种模型与提供商。一条命令即可找到适合您硬件的模型。**\n\n一款终端工具，可根据您的系统内存、CPU 和 GPU 资源，智能选择最合适的 LLM 模型。它会检测您的硬件配置，从质量、速度、适配性和上下文处理能力等多个维度对每个模型进行评分，并告诉您哪些模型在您的机器上能够流畅运行。\n\n默认提供交互式 TUI 界面，同时也支持经典 CLI 模式。支持多 GPU 配置、MoE 架构、动态量化选择、速度估算以及本地运行时提供商（Ollama、llama.cpp、MLX、Docker Model Runner、LM Studio）。\n\n> **姊妹项目：**\n> - [sympozium](https:\u002F\u002Fgithub.com\u002Fsympozium-ai\u002Fsympozium\u002F) — 在 Kubernetes 中管理代理。\n> - [llmserve](https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmserve) — 一个用于部署本地 LLM 模型的简单 TUI 工具。选择模型、选择后端，即可开始服务。\n\n![demo](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FAlexsJones_llmfit_readme_bfe3c444a49a.gif)\n\n---\n\n## 安装\n\n### Windows\n```sh\nscoop install llmfit\n```\n\n如果尚未安装 Scoop，请参考 [Scoop 安装指南](https:\u002F\u002Fscoop.sh\u002F)。\n\n### macOS \u002F Linux\n\n#### Homebrew\n```sh\nbrew install llmfit\n```\n\n#### 快速安装\n```sh\ncurl -fsSL https:\u002F\u002Fllmfit.axjns.dev\u002Finstall.sh | sh\n```\n\n该命令会从 GitHub 下载最新版本的二进制文件，并将其安装到 `\u002Fusr\u002Flocal\u002Fbin`（如果没有 sudo 权限，则安装到 `~\u002F.local\u002Fbin`）。\n\n**无需 sudo 安装到 `~\u002F.local\u002Fbin`：**\n```sh\ncurl -fsSL https:\u002F\u002Fllmfit.axjns.dev\u002Finstall.sh | sh -s -- --local\n```\n\n### Docker \u002F Podman\n```sh\ndocker run ghcr.io\u002Falexsjones\u002Fllmfit\n```\n此命令会输出 `llmfit recommend` 命令的 JSON 结果。您可以使用 `jq` 对 JSON 进一步查询。\n```\npodman run ghcr.io\u002Falexsjones\u002Fllmfit recommend --use-case coding | jq '.models[].name'\n```\n\n### 从源码编译\n```sh\ngit clone https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit.git\ncd llmfit\ncargo build --release\n# 生成的二进制文件位于 target\u002Frelease\u002Fllmfit\n```\n\n---\n\n## 使用方法\n\n### TUI（默认）\n\n```sh\nllmfit\n```\n\n启动交互式终端界面。屏幕顶部会显示您的系统规格（CPU、内存、GPU 型号、显存大小、运行时后端）。下方是一个可滚动的表格，模型按综合评分排序。每行显示模型的评分、预估的 token\u002Fs 速度、最适合您硬件的量化方式、运行模式、内存占用以及适用场景分类。\n\n| 键                        | 功能                                                                |\n|----------------------------|-----------------------------------------------------------------------|\n| `Up` \u002F `Down` 或 `j` \u002F `k` | 导航浏览模型                                                       |\n| `\u002F`                        | 进入搜索模式（支持名称、提供商、参数、使用场景的部分匹配）         |\n| `Esc` 或 `Enter`           | 退出搜索模式                                                      |\n| `Ctrl-U`                   | 清除搜索内容                                                      |\n| `f`                        | 切换适配性筛选器：全部、可运行、完美、良好、勉强                  |\n| `a`                        | 切换可用性筛选器：全部、GGUF 可用、已安装                          |\n| `s`                        | 切换排序列：评分、参数量、内存占比、上下文长度、发布日期、使用场景 |\n| `v`                        | 进入视觉模式（可多选模型）                                        |\n| `V`                        | 进入列筛选模式                                                    |\n| `t`                        | 切换颜色主题（自动保存）                                          |\n| `p`                        | 打开所选模型的规划模式（硬件资源规划）                            |\n| `P`                        | 打开提供商筛选弹出窗口                                            |\n| `U`                        | 打开使用场景筛选弹出窗口                                          |\n| `C`                        | 打开功能特性筛选弹出窗口                                          |\n| `m`                        | 标记所选模型以供比较                                              |\n| `c`                        | 打开比较视图（已标记模型与当前选中模型对比）                      |\n| `x`                        | 清除比较标记                                                      |\n| `i`                        | 切换“已安装优先”排序（基于检测到的运行时提供商）                  |\n| `d`                        | 下载所选模型（当有多个提供商时会弹出选择框）                     |\n| `r`                        | 从运行时提供商处刷新已安装模型                                   |\n| `Enter`                    | 切换所选模型的详细信息视图                                       |\n| `PgUp` \u002F `PgDn`            | 每次滚动 10 行                                                    |\n| `g` \u002F `G`                  | 跳转到列表顶部 \u002F 底部                                             |\n| `q`                        | 退出程序                                                          |\n\n### 类 Vim 模式\n\nTUI 使用受 Vim 启发的模式，显示在左下角的状态栏中。当前模式决定了哪些键是可用的。\n\n#### 正常模式\n\n默认模式。用于导航、搜索、筛选和打开视图。上表中的所有按键在此模式下均适用。\n\n#### 可视模式 (`v`)\n\n选择连续的模型范围以进行批量比较。按 `v` 键将锚点定位到当前行，然后使用 `j`\u002F`k` 或方向键扩展选择范围。选中的行会高亮显示。\n\n| 键                 | 动作                                                 |\n|---------------------|--------------------------------------------------------|\n| `j` \u002F `k` 或箭头键 | 上下扩展选区                               |\n| `c`                 | 比较所有选中的模型（打开多模型比较视图） |\n| `m`                 | 标记当前模型以进行双模型比较               |\n| `Esc` 或 `v`        | 退出可视模式                                       |\n\n多模型比较视图显示一个表格，其中行代表属性（得分、tok\u002Fs、适配度、内存占比、参数量、运行模式、上下文长度、量化等），列则为不同的模型。最佳值会被高亮显示。如果选择的模型数量超过屏幕宽度，可使用 `h`\u002F`l` 或方向键水平滚动。\n\n#### 选择模式 (`V`)\n\n基于列的筛选模式。按 `V`（Shift-v）进入选择模式，然后使用 `h`\u002F`l` 或方向键在列标题间移动。当前活动的列会以视觉方式突出显示。按下 `Enter` 或 `空格` 键即可激活该列对应的筛选功能：\n\n| 列                        | 筛选操作                                                             |\n|-------------------------------|---------------------------------------------------------------------------|\n| 实例                          | 循环切换可用性筛选                                                     |\n| 模型                         | 进入搜索模式                                                         |\n| 提供者                      | 打开提供者弹出菜单                                                   |\n| 参数量                        | 打开参数规模分桶弹出菜单（\u003C3B、3-7B、7-14B、14-30B、30-70B、70B+） |\n| 得分、tok\u002Fs、内存占比、上下文长度、日期 | 按该列排序                                                           |\n| 量化                         | 打开量化弹出菜单                                                     |\n| 运行模式                      | 打开运行模式弹出菜单（GPU、MoE、CPU+GPU、CPU）                       |\n| 适配度                           | 循环切换适配度筛选                                                   |\n| 使用场景                      | 打开使用场景弹出菜单                                                 |\n\n在选择模式下，行导航（`j`\u002F`k`）仍然有效，因此您可以在应用筛选器时查看其效果。按 `Esc` 键返回正常模式。\n\n### TUI 计划模式 (`p`)\n\n计划模式颠覆了常规的适配分析：它不是问“什么适合我的硬件？”，而是估算“这个模型配置需要什么样的硬件？”。\n\n在选中某一行后，按下 `p` 键，然后：\n\n| 键                    | 动作                                                    |\n|------------------------|-----------------------------------------------------------|\n| `Tab` \u002F `j` \u002F `k`      | 在可编辑字段之间移动（上下文长度、量化、目标 TPS） |\n| `Left` \u002F `Right`       | 在当前字段内移动光标                              |\n| 输入                   | 编辑当前字段                                        |\n| `Backspace` \u002F `Delete` | 删除字符                                            |\n| `Ctrl-U`               | 清除当前字段                                        |\n| `Esc` 或 `q`           | 退出计划模式                                            |\n\n计划模式会显示以下估算结果：\n- 最小和推荐的显存\u002F内存\u002FCPU 核心数\n- 可行的运行路径（GPU、CPU 卸载、纯 CPU）\n- 为达到更好的适配目标所需的升级幅度\n\n### 主题\n\n按 `t` 键可在 10 种内置颜色主题之间循环切换。您的选择会自动保存到 `~\u002F.config\u002Fllmfit\u002Ftheme` 文件中，并在下次启动时恢复。\n\n| 主题                    | 描述                                       |\n|--------------------------|---------------------------------------------------|\n| **默认**              | 原始 llmfit 颜色                            |\n| **Dracula**              | 深紫色背景搭配柔和色调                        |\n| **Solarized**            | 伊森·斯库诺弗的 Solarized Dark 调色板         |\n| **Nord**                 | 北极风，冷峻的蓝灰色调                      |\n| **Monokai**              | Monokai Pro 的温暖语法高亮颜色                |\n| **Gruvbox**              | 复古风格调色板，带有温暖的大地色调          |\n| **Catppuccin Latte**     | 🌻 浅色主题 — 和谐的粉彩色反转              |\n| **Catppuccin Frappé**    | 🪴 低对比度深色 — 沉稳、低调的美学           |\n| **Catppuccin Macchiato** | 🌺 中等对比度深色 — 温和、舒缓的色调        |\n| **Catppuccin Mocha**     | 🌿 最深的变体 — 舒适且色彩丰富的点缀        |\n\n### Web 控制面板\n\n当您以非 JSON 模式运行 `llmfit` 时，它会自动在 `0.0.0.0:8787` 启动一个后台 Web 控制面板。在同一网络中的任何浏览器中打开即可：\n\n```\nhttp:\u002F\u002F\u003Cyour-machine-ip>:8787\n```\n\n您可以通过环境变量覆盖主机或端口：\n\n```sh\nLLMFIT_DASHBOARD_HOST=0.0.0.0 LLMFIT_DASHBOARD_PORT=9000 llmfit\n```\n\n| 变量 | 默认值 | 描述 |\n|---|---|---|\n| `LLMFIT_DASHBOARD_HOST` | `0.0.0.0` | 控制面板服务器绑定的接口 |\n| `LLMFIT_DASHBOARD_PORT` | `8787` | 控制面板服务器绑定的端口 |\n\n要禁用自动启动的控制面板，可以传递 `--no-dashboard` 参数：\n\n```sh\nllmfit --no-dashboard\n```\n\n### CLI 模式\n\n使用 `--cli` 或任何子命令可获得经典的表格输出：\n\n```sh\n# 按适配度排名的所有模型表格\nllmfit --cli\n\n# 完美适配的模型，前 5 名\nllmfit fit --perfect -n 5\n\n# 显示检测到的系统规格\nllmfit system\n\n# 列出数据库中的所有模型\nllmfit list\n\n# 按名称、提供商或参数量搜索\nllmfit search \"llama 8b\"\n\n# 单个模型的详细信息\nllmfit info \"Mistral-7B\"\n\n# 前 5 名推荐（JSON 格式，供代理或脚本使用）\nllmfit recommend --json --limit 5\n\n# 按使用场景筛选的推荐\nllmfit recommend --json --use-case coding --limit 3\n\n# 强制指定运行时（绕过 Apple Silicon 上的自动 MLX 选择）\nllmfit recommend --force-runtime llamacpp\nllmfit recommend --force-runtime llamacpp --use-case coding --limit 3\n\n# 为特定模型配置规划所需硬件\nllmfit plan \"Qwen\u002FQwen3-4B-MLX-4bit\" --context 8192\nllmfit plan \"Qwen\u002FQwen3-4B-MLX-4bit\" --context 8192 --quant mlx-4bit\nllmfit plan \"Qwen\u002FQwen3-4B-MLX-4bit\" --context 8192 --target-tps 25 --json\n\n# 作为节点级 REST API 运行（适用于集群调度器\u002F聚合器）\nllmfit serve --host 0.0.0.0 --port 8787\n```\n\n### REST API (`llmfit serve`)\n\n`llmfit serve` 启动一个 HTTP API，公开与 TUI\u002FCLI 相同的拟合\u002F评分数据，包括节点的筛选和顶级模型选择。\n\n```sh\n# 活性检查\ncurl http:\u002F\u002Flocalhost:8787\u002Fhealth\n\n# 节点硬件信息\ncurl http:\u002F\u002Flocalhost:8787\u002Fapi\u002Fv1\u002Fsystem\n\n# 带筛选条件的完整拟合列表\ncurl \"http:\u002F\u002Flocalhost:8787\u002Fapi\u002Fv1\u002Fmodels?min_fit=marginal&runtime=llamacpp&sort=score&limit=20\"\n\n# 关键调度端点：该节点可运行的顶级模型\ncurl \"http:\u002F\u002Flocalhost:8787\u002Fapi\u002Fv1\u002Fmodels\u002Ftop?limit=5&min_fit=good&use_case=coding\"\n\n# 按模型名称\u002F提供商文本搜索\ncurl \"http:\u002F\u002Flocalhost:8787\u002Fapi\u002Fv1\u002Fmodels\u002FMistral?runtime=any\"\n```\n\n`models`\u002F`models\u002Ftop` 支持的查询参数：\n\n- `limit`（或 `n`）：返回的最大行数\n- `perfect`：`true|false`（为 `true` 时强制仅显示完美匹配）\n- `min_fit`：`perfect|good|marginal|too_tight`\n- `runtime`：`any|mlx|llamacpp`\n- `use_case`：`general|coding|reasoning|chat|multimodal|embedding`\n- `provider`：提供商文本过滤器（子字符串）\n- `search`：跨名称\u002F提供商\u002F尺寸\u002F使用场景的自由文本过滤\n- `sort`：`score|tps|params|mem|ctx|date|use_case`\n- `include_too_tight`：是否包含不可运行的行（默认在 `\u002Ftop` 上为 `false`，在 `\u002Fmodels` 上为 `true`）\n- `max_context`：用于内存估算的每请求上下文上限\n- `force_runtime`：`mlx|llamacpp|vllm` — 在分析过程中覆盖自动运行时选择\n\n在本地验证 API 行为：\n\n```sh\n# 自动启动服务器并运行端点\u002F模式\u002F筛选断言\npython3 scripts\u002Ftest_api.py --spawn\n\n# 或测试已运行的服务器\npython3 scripts\u002Ftest_api.py --base-url http:\u002F\u002F127.0.0.1:8787\n```\n\n### GPU 内存覆盖\n\n在某些系统上，GPU 显存自动检测可能会失败（例如 `nvidia-smi` 出现问题、虚拟机、直通设置等）。可以使用 `--memory` 手动指定 GPU 的显存大小：\n\n```sh\n# 覆盖为 32 GB 显存\nllmfit --memory=32G\n\n# 也可以使用 MB 单位（32000 MB ≈ 31.25 GB）\nllmfit --memory=32000M\n\n# 适用于所有模式：TUI、CLI 和子命令\nllmfit --memory=24G --cli\nllmfit --memory=24G fit --perfect -n 5\nllmfit --memory=24G system\nllmfit --memory=24G info \"Llama-3.1-70B\"\nllmfit --memory=24G recommend --json\n```\n\n支持的后缀：`G`\u002F`GB`\u002F`GiB`（千兆字节）、`M`\u002F`MB`\u002F`MiB`（兆字节）、`T`\u002F`TB`\u002F`TiB`（太字节）。不区分大小写。如果未检测到 GPU，则覆盖会创建一个模拟的 GPU 条目，以便对模型进行 GPU 推理的评分。\n\n### 用于估算的上下文长度上限\n\n使用 `--max-context` 可以限制用于内存估算的上下文长度（而不改变每个模型所宣传的最大上下文长度）：\n\n```sh\n# 在 4K 上下文长度下估算内存占用\nllmfit --max-context 4096 --cli\n\n# 也适用于子命令\nllmfit --max-context 8192 fit --perfect -n 5\nllmfit --max-context 16384 recommend --json --limit 5\n```\n\n如果未设置 `--max-context`，llmfit 将在可用时使用 `OLLAMA_CONTEXT_LENGTH`。\n\n### JSON 输出\n\n在任何子命令中添加 `--json` 可获得机器可读的输出：\n\n```sh\nllmfit --json system     # 硬件规格以 JSON 格式输出\nllmfit --json fit -n 10  # 前 10 名拟合结果以 JSON 格式输出\nllmfit recommend --json  # 前 5 名推荐结果（recommend 默认为 JSON 格式）\nllmfit plan \"Qwen\u002FQwen2.5-Coder-0.5B-Instruct\" --context 8192 --json\n```\n\n`plan` 的 JSON 包含以下稳定字段：\n- 请求信息（上下文、量化、目标 TPS）\n- 估计的最低\u002F推荐硬件配置\n- 各路径的可行性（GPU、CPU 卸载、纯 CPU）\n- 升级差异\n\n---\n\n## 工作原理\n\n1. **硬件检测** -- 通过 `sysinfo` 读取总内存\u002F可用内存，统计 CPU 核心数，并探测 GPU：\n   - **NVIDIA** -- 通过 `nvidia-smi` 支持多 GPU。汇总所有检测到的 GPU 的显存。若报告失败，则回退到根据 GPU 型号名称估算显存。\n   - **AMD** -- 通过 `rocm-smi` 检测。\n   - **Intel Arc** -- 独立显存通过 sysfs 获取，集成显存则通过 `lspci` 获取。\n   - **Apple Silicon** -- 统一内存通过 `system_profiler` 获取。显存等于系统内存。\n   - **Ascend** -- 通过 `npu-smi` 检测。\n   - **后端检测** -- 自动识别加速后端（CUDA、Metal、ROCm、SYCL、CPU ARM、CPU x86、Ascend），用于速度估算。\n\n2. **模型数据库** -- 数百个模型来自 HuggingFace API，存储在 `data\u002Fhf_models.json` 中，并在编译时嵌入。内存需求根据参数量，结合量化层级（Q8_0 至 Q2_K）计算得出。显存在 GPU 推理中是主要限制；仅 CPU 运行时则以系统内存作为后备。\n\n   **MoE 支持** -- 具有专家混合架构的模型（Mixtral、DeepSeek-V2\u002FV3）会自动检测。每个 token 只激活部分专家，因此实际显存需求远低于总参数量所暗示的数值。例如，Mixtral 8x7B 总参数量为 467 亿，但每 token 只激活约 129 亿，通过专家卸载可将显存需求从 23.9 GB 降至约 6.6 GB。\n\n3. **动态量化** -- llmfit 不假设固定量化级别，而是尝试最适合您硬件的质量量化。它从 Q8_0（质量最高）开始，逐步降低到 Q2_K（压缩比最高），选择能在可用内存内运行的最高质量量化。如果全上下文都不满足，再尝试半上下文。\n\n4. **多维度评分** -- 每个模型在四个维度上进行评分（每个维度满分为 100 分）：\n\n   | 维度       | 衡量内容                                                               |\n   |------------|------------------------------------------------------------------------|\n   | **质量**   | 参数量、模型家族声誉、量化惩罚、任务匹配程度                         |\n   | **速度**   | 根据后端、参数量和量化估计的每秒生成 token 数                        |\n   | **契合度** | 内存使用效率（理想范围：50–80% 的可用内存）                           |\n   | **上下文** | 上下文窗口能力与用例目标的匹配程度                                     |\n\n   各维度按加权综合得分排序。权重因用例类别而异（通用、编码、推理、聊天、多模态、嵌入）。例如，聊天场景更注重速度（权重 0.35），而推理场景则更注重质量（权重 0.55）。无法运行的模型（过于紧张）始终排在最后。\n\n5. **速度估算** -- LLM 推理中的 token 生成受内存带宽限制：每个 token 需从显存中读取一次完整的模型权重。当 GPU 型号被识别后，llmfit 使用其实际内存带宽来估算吞吐量：\n\n   公式：`(带宽_GB_s \u002F 模型大小_GB) × 效率因子`\n\n   效率因子（0.55）考虑了内核开销、KV 缓存读取及内存控制器的影响。该方法已通过 llama.cpp 的公开基准测试（[Apple Silicon](https:\u002F\u002Fgithub.com\u002Fggml-org\u002Fllama.cpp\u002Fdiscussions\u002F4167)、[NVIDIA T4](https:\u002F\u002Fgithub.com\u002Fggml-org\u002Fllama.cpp\u002Fdiscussions\u002F4225)）以及实际测量结果验证。\n\n   带宽查找表覆盖了 NVIDIA（消费级 + 数据中心）、AMD（RDNA + CDNA）和 Apple Silicon 系列的约 80 款 GPU。\n\n   对于未识别的 GPU，llmfit 会回退到各后端的速度常数：\n\n   | 后端      | 超速常数 |\n   |-----------|----------|\n   | CUDA      | 220      |\n   | Metal     | 160      |\n   | ROCm      | 180      |\n   | SYCL      | 100      |\n   | CPU (ARM) | 90       |\n   | CPU (x86) | 70       |\n   | NPU (Ascend)| 390    |\n\n   回退公式：`K \u002F params_b × 量化速度倍增因子`，其中对 CPU 卸载（0.5×）、仅 CPU 运行（0.3×）以及 MoE 专家切换（0.8×）均设有惩罚。\n\n6. **契合度分析** -- 每个模型都会评估其内存兼容性：\n\n   **运行模式：**\n   - **GPU** -- 模型可完全容纳在显存中。推理速度快。\n   - **MoE** -- 专家混合架构，采用专家卸载策略。活跃专家位于显存，不活跃专家位于内存。\n   - **CPU+GPU** -- 显存不足，部分数据溢出至系统内存，同时进行部分 GPU 卸载。\n   - **CPU** -- 无 GPU。模型完全加载到系统内存中。\n\n   **契合度等级：**\n   - **完美** -- 在 GPU 上达到推荐内存要求。需要 GPU 加速。\n   - **良好** -- 有余量，适合专家卸载或 CPU+GPU 模式。\n   - **临界** -- 内存较为紧张，或仅限 CPU 运行（仅 CPU 模式一律归于此）。\n   - **过于紧张** -- 显存或系统内存均不足。\n\n---\n\n## 模型数据库\n\n模型列表由 `scripts\u002Fscrape_hf_models.py` 生成，这是一段独立的 Python 脚本（仅使用标准库，无需 pip 依赖），用于查询 HuggingFace REST API。涵盖数百个模型及提供商，包括 Meta Llama、Mistral、Qwen、Google Gemma、Microsoft Phi、DeepSeek、IBM Granite、Allen Institute OLMo、xAI Grok、Cohere、BigCode、01.ai、Upstage、TII Falcon、HuggingFace、Zhipu GLM、Moonshot Kimi、Baidu ERNIE 等。爬虫会自动通过模型配置文件（`num_local_experts`、`num_experts_per_tok`）及已知架构映射检测 MoE 架构。\n\n模型类别涵盖通用、编码（CodeLlama、StarCoder2、WizardCoder、Qwen2.5-Coder、Qwen3-Coder）、推理（DeepSeek-R1、Orca-2）、多模态\u002F视觉（Llama 3.2 Vision、Llama 4 Scout\u002FMaverick、Qwen2.5-VL）、聊天、企业级（IBM Granite）以及嵌入（nomic-embed、bge）等。\n\n完整列表请参阅 [MODELS.md](MODELS.md)。\n\n要刷新模型数据库：\n\n```sh\n# 自动更新（推荐）\nmake update-models\n\n# 或直接运行脚本\n.\u002Fscripts\u002Fupdate_models.sh\n\n# 或手动操作\npython3 scripts\u002Fscrape_hf_models.py\ncargo build --release\n```\n\n爬虫会将数据写入 `data\u002Fhf_models.json`，并通过 `include_str!` 将其嵌入二进制文件中。自动更新脚本会备份现有数据，验证 JSON 输出，并重新构建二进制文件。\n\n默认情况下，爬虫会为模型补充来自 unsloth 和 bartowski 等提供商的已知 GGUF 下载源。结果会缓存到 `data\u002Fgguf_sources_cache.json`（有效期 7 天），以避免重复调用 API。如需加快抓取速度，可使用 `--no-gguf-sources` 参数跳过补充下载源的步骤。\n\n---\n\n## 项目结构\n\n```\nsrc\u002F\n  main.rs         -- CLI 参数解析、入口点、TUI 启动\n  hardware.rs     -- 系统内存\u002FCPU\u002FGPU 检测（多 GPU、后端识别）\n  models.rs       -- 模型数据库、量化层级、动态量化选择\n  fit.rs          -- 多维度评分（Q\u002FS\u002FF\u002FC）、速度估算、MoE 分载\n  providers.rs    -- 运行时提供者集成（Ollama、llama.cpp、MLX、Docker Model Runner、LM Studio）、安装检测、拉取\u002F下载\n  display.rs      -- 经典 CLI 表格渲染 + JSON 输出\n  tui_app.rs      -- TUI 应用程序状态、过滤器、导航\n  tui_ui.rs       -- TUI 渲染（ratatui）\n  tui_events.rs   -- TUI 键盘事件处理（crossterm）\ndata\u002F\n  hf_models.json  -- 模型数据库（206 个模型）\nskills\u002F\n  llmfit-advisor\u002F -- OpenClaw 技能，用于硬件感知的模型推荐\nscripts\u002F\n  scrape_hf_models.py        -- HuggingFace API 爬虫\n  update_models.sh            -- 自动化数据库更新脚本\n  install-openclaw-skill.sh   -- 安装 OpenClaw 技能\nMakefile           -- 构建和维护命令\n```\n\n---\n\n## 发布到 crates.io\n\n`Cargo.toml` 已包含必要的元数据（描述、许可证、仓库）。发布步骤如下：\n\n```sh\n# 先进行干运行以捕获问题\ncargo publish --dry-run\n\n# 正式发布（需要 crates.io API token）\ncargo login\ncargo publish\n```\n\n在发布前，请确保：\n\n- `Cargo.toml` 中的版本号正确（每次发布都需递增）。\n- 仓库根目录下存在 `LICENSE` 文件。如果缺失，可使用以下命令创建：\n\n```sh\n# 对于 MIT 许可证：\ncurl -sL https:\u002F\u002Fopensource.org\u002Flicense\u002FMIT -o LICENSE\n# 或者自行编写许可证内容。`Cargo.toml` 中已声明 license = \"MIT\"。\n```\n\n- `data\u002Fhf_models.json` 已提交。该文件会在编译时嵌入，必须存在于发布的 crate 中。\n- `Cargo.toml` 中的 `exclude` 列表应将 `target\u002F`、`scripts\u002F` 和 `https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FAlexsJones_llmfit_readme_bfe3c444a49a.gif` 排除在外，以减小下载包的大小。\n\n更新发布步骤如下：\n\n```sh\n# 递增版本号\n# 编辑 Cargo.toml：version = \"0.2.0\"\ncargo publish\n```\n\n---\n\n## 依赖项\n\n| Crate                  | 用途                                          |\n|------------------------|--------------------------------------------------|\n| `clap`                 | 使用 derive 宏进行 CLI 参数解析          |\n| `sysinfo`              | 跨平台内存和 CPU 检测             |\n| `serde` \u002F `serde_json` | 用于模型数据库的 JSON 反序列化          |\n| `tabled`               | CLI 表格格式化                             |\n| `colored`              | CLI 彩色输出                               |\n| `ureq`                 | 用于运行时\u002F提供者 API 集成的 HTTP 客户端 |\n| `ratatui`              | 终端 UI 框架                            |\n| `crossterm`            | ratatui 的终端输入输出后端        |\n\n---\n\n## 运行时提供者集成\n\nllmfit 支持多种本地运行时提供者：\n\n- **Ollama**（基于守护进程\u002FAPI 的拉取）\n- **llama.cpp**（直接从 Hugging Face 下载 GGUF 文件并检测本地缓存）\n- **MLX**（Apple Silicon \u002F mlx-community 模型缓存 + 可选服务器）\n- **Docker Model Runner**（Docker Desktop 内置的模型服务）\n- **LM Studio**（具有 REST API 的本地模型服务器，支持模型管理和下载）\n\n当一个模型有多个兼容的提供者可用时，在 TUI 中按下 `d` 键会打开提供者选择模态框。\n\n### Ollama 集成\n\nllmfit 与 [Ollama](https:\u002F\u002Follama.com) 集成，用于检测您已安装哪些模型，并直接从 TUI 下载新模型。\n\n### 要求\n\n- **Ollama 必须已安装并正在运行**（`ollama serve` 或 Ollama 桌面应用）。\n- llmfit 会连接到 `http:\u002F\u002Flocalhost:11434`（Ollama 的默认 API 端口）。\n- 无需额外配置——只要 Ollama 在运行，llmfit 就会自动检测到它。\n\n### 远程 Ollama 实例\n\n要连接到运行在不同机器或端口上的 Ollama，可以设置 `OLLAMA_HOST` 环境变量：\n\n```sh\n# 连接到特定 IP 和端口的 Ollama\nOLLAMA_HOST=\"http:\u002F\u002F192.168.1.100:11434\" llmfit\n\n# 通过主机名连接\nOLLAMA_HOST=\"http:\u002F\u002Follama-server:666\" llmfit\n\n# 适用于所有 TUI 和 CLI 命令\nOLLAMA_HOST=\"http:\u002F\u002F192.168.1.100:11434\" llmfit --cli\nOLLAMA_HOST=\"http:\u002F\u002F192.168.1.100:11434\" llmfit fit --perfect -n 5\n```\n\n这在以下场景中非常有用：\n- 在一台机器上运行 llmfit，而 Ollama 在另一台机器上提供服务（例如，GPU 服务器 + 笔记本客户端）。\n- 连接到运行在 Docker 容器中且使用自定义端口的 Ollama。\n- 在反向代理或负载均衡器后使用 Ollama。\n\n### 工作原理\n\n启动时，llmfit 会查询 `GET \u002Fapi\u002Ftags` 来列出您已安装的 Ollama 模型。每个已安装的模型在 TUI 的 **Inst** 列中都会显示一个绿色的 **✓**。系统栏会显示 `Ollama: ✓ (N 已安装)`。\n\n当您对某个模型按下 `d` 键时，llmfit 会向 Ollama 发送 `POST \u002Fapi\u002Fpull` 请求来下载该模型。对应的行会高亮显示，并带有动画进度条实时显示下载进度。下载完成后，该模型即可立即通过 Ollama 使用。\n\n如果 Ollama 未运行，与 Ollama 相关的操作会被跳过；TUI 仍会支持其他提供者，例如 llama.cpp（如果可用）。\n\n### llama.cpp 集成\n\nllmfit 与 [llama.cpp](https:\u002F\u002Fgithub.com\u002Fggml-org\u002Fllama.cpp) 集成，作为 TUI 和 CLI 中的运行时\u002F下载提供者。\n\n要求：\n\n- `llama-cli` 或 `llama-server` 必须在 `PATH` 中可用（用于运行时检测）。\n- 需要有网络访问权限，以便从 Hugging Face 下载 GGUF 文件。\n\n工作原理：\n\n- llmfit 会将 HF 模型映射到已知的 GGUF 仓库（并提供启发式回退方案）。\n- 将 GGUF 文件下载到本地 llama.cpp 模型缓存中。\n- 当本地存在匹配的 GGUF 文件时，会标记该模型已安装。\n\n### Docker Model Runner 集成\n\nllmfit 与 [Docker Model Runner](https:\u002F\u002Fdocs.docker.com\u002Fdesktop\u002Ffeatures\u002Fmodel-runner\u002F) 集成，后者是 Docker Desktop 内置的模型服务功能。\n\n要求：\n\n- Docker Desktop 必须启用 Model Runner 功能。\n- 默认端点为 `http:\u002F\u002Flocalhost:12434`。\n\n工作原理：\n\n- llmfit 会查询 `GET \u002Fengines` 来列出 Docker Model Runner 中可用的模型。\n- 模型会通过 Ollama 式的标签映射与 HF 数据库匹配（Docker Model Runner 使用 `ai\u002F\u003Ctag>` 命名）。\n- 在 TUI 中按下 `d` 键会通过 `docker model pull` 进行拉取。\n\n### 远程 Docker Model Runner 实例\n\n要连接到运行在不同主机或端口上的 Docker Model Runner，可以设置 `DOCKER_MODEL_RUNNER_HOST` 环境变量：\n\n```sh\nDOCKER_MODEL_RUNNER_HOST=\"http:\u002F\u002F192.168.1.100:12434\" llmfit\n```\n\n### LM Studio 集成\n\nllmfit 与 [LM Studio](https:\u002F\u002Flmstudio.ai) 集成，作为本地模型服务器，并内置模型下载功能。\n\n要求：\n\n- LM Studio 必须运行且已启用本地服务器\n- 默认端点：`http:\u002F\u002F127.0.0.1:1234`\n\n工作原理：\n\n- llmfit 通过 `GET \u002Fv1\u002Fmodels` 查询 LM Studio 中可用的模型列表\n- 在 TUI 中按下 `d` 键会触发通过 `POST \u002Fapi\u002Fv1\u002Fmodels\u002Fdownload` 进行下载\n- 下载进度通过轮询 `GET \u002Fapi\u002Fv1\u002Fmodels\u002Fdownload-status` 来跟踪\n- LM Studio 直接接受 HuggingFace 模型名称，因此无需进行名称映射\n\n### 远程 LM Studio 实例\n\n若要连接到不同主机或端口上的 LM Studio，请设置 `LMSTUDIO_HOST` 环境变量：\n\n```sh\nLMSTUDIO_HOST=\"http:\u002F\u002F192.168.1.100:1234\" llmfit\n```\n\n### 模型名称映射\n\nllmfit 的数据库使用 HuggingFace 模型名称（例如 `Qwen\u002FQwen2.5-Coder-14B-Instruct`），而 Ollama 使用自己的命名方案（例如 `qwen2.5-coder:14b`）。llmfit 维护了一个准确的映射表，确保安装检测和拉取操作能够解析为正确的模型。每个映射都是精确的——`qwen2.5-coder:14b` 映射的是 Coder 模型，而不是基础的 `qwen2.5:14b`。\n\n---\n\n## 平台支持\n\n- **Linux** -- 完全支持。可通过 `nvidia-smi`（NVIDIA）、`rocm-smi`（AMD）、sysfs\u002F`lspci`（Intel Arc）以及 `npu-smi`（Ascend）检测 GPU。\n- **macOS（Apple Silicon）** -- 完全支持。通过 `system_profiler` 检测统一内存。VRAM 即系统 RAM（共享池）。模型通过 Metal GPU 加速运行。\n- **macOS（Intel）** -- 可检测 RAM 和 CPU。如果安装了 `nvidia-smi`，则可检测独立 GPU。\n- **Windows** -- 可检测 RAM 和 CPU。如果已安装 `nvidia-smi`，则可检测 NVIDIA GPU。\n- **Android \u002F Termux \u002F PRoot** -- 通常可以检测 CPU 和 RAM，但目前不支持 GPU 自动检测。移动 GPU（如 Adreno）通常无法通过 llmfit 使用的桌面\u002F服务器探测接口被识别。\n\n### GPU 支持\n\n| 厂商                 | 检测方法              | VRAM 报告                 |\n|------------------------|-------------------------------|--------------------------------|\n| NVIDIA                 | `nvidia-smi`                  | 精确的专用 VRAM           |\n| AMD                    | `rocm-smi`                    | 已检测（VRAM 可能未知）    |\n| Intel Arc（独立）   | sysfs (`mem_info_vram_total`) | 精确的专用 VRAM           |\n| Intel Arc（集成） | `lspci`                       | 共享系统内存           |\n| Apple Silicon          | `system_profiler`             | 独立内存（等于系统 RAM）  |\n| Ascend                 | `npu-smi`                     | 已检测（VRAM 可能未知）    |\n\n如果自动检测失败或报告值不正确，可使用 `--memory=\u003CSIZE>` 参数进行覆盖（参见上文的 [GPU 内存覆盖]）。\n\n### Android \u002F Termux 注意事项\n\n在 Android 系统中，例如 **Termux + PRoot**，llmfit 通常无法通过标准的 Linux 检测路径（`nvidia-smi`、`rocm-smi`、DRM\u002Fsysfs、`lspci` 等）检测到移动 GPU。在这种环境下，当前实现下出现“未检测到 GPU”的情况是正常的。\n\n如果您仍然希望在使用统一内存的手机或平板上获得 GPU 类似的推荐，可以手动覆盖内存大小：\n\n```sh\nllmfit --memory=8G fit -n 20\nllmfit recommend --json --memory=8G --limit 10\n```\n\n这只是用于推荐和评分的临时解决方案，并不能真正检测 Android 设备上的 GPU 运行情况。\n\n---\n\n## 贡献\n\n欢迎贡献，尤其是新增模型。\n\n### 添加模型\n\n1. 将模型的 HuggingFace 仓库 ID（例如 `meta-llama\u002FLlama-3.1-8B`）添加到 `scripts\u002Fscrape_hf_models.py` 中的 `TARGET_MODELS` 列表。\n2. 如果模型受限制（需要 HuggingFace 认证才能访问元数据），请在同一脚本的 `FALLBACKS` 列表中添加一个备用条目，包含参数量和上下文长度。\n3. 运行自动化更新脚本：\n   ```sh\n   make update-models\n   # 或：.\u002Fscripts\u002Fupdate_models.sh\n   ```\n4. 验证更新后的模型列表：`.\u002Ftarget\u002Frelease\u002Fllmfit list`\n5. 更新 [MODELS.md](MODELS.md)，运行：`python3 \u003C\u003C 'EOF' \u003C scripts\u002F...`（参见提交历史中的生成脚本）。\n6. 打开拉取请求。\n\n当前列表请参阅 [MODELS.md](MODELS.md)，架构详情请参阅 [AGENTS.md](AGENTS.md)。\n\n---\n\n## OpenClaw 集成\n\nllmfit 以 [OpenClaw](https:\u002F\u002Fgithub.com\u002Fopenclaw\u002Fopenclaw) 技能的形式提供，该技能可以让代理推荐适合硬件的本地模型，并自动配置 Ollama\u002FvLLM\u002FLM Studio 提供者。\n\n### 安装技能\n\n```sh\n# 从 llmfit 仓库\n.\u002Fscripts\u002Finstall-openclaw-skill.sh\n\n# 或手动\ncp -r skills\u002Fllmfit-advisor ~\u002F.openclaw\u002Fskills\u002F\n```\n\n安装完成后，您可以向 OpenClaw 代理询问以下问题：\n\n- “我可以运行哪些本地模型？”\n- “为我的硬件推荐一个编码模型”\n- “用最适合我 GPU 的模型来配置 Ollama”\n\n代理会在后台调用 `llmfit recommend --json`，解析结果，并提示您将最佳模型选择配置到 `openclaw.json` 文件中。\n\n### 工作原理\n\n该技能教会 OpenClaw 代理：\n\n1. 通过 `llmfit --json system` 检测您的硬件\n2. 通过 `llmfit recommend --json` 获取排名推荐\n3. 将 HuggingFace 模型名称映射到 Ollama\u002FvLLM\u002FLM Studio 标签\n4. 配置 `openclaw.json` 中的 `models.providers.ollama.models`\n\n完整技能定义请参阅 [skills\u002Fllmfit-advisor\u002FSKILL.md](skills\u002Fllmfit-advisor\u002FSKILL.md)。\n\n---\n\n## 替代方案\n\n如果您正在寻找其他方法，可以查看 [llm-checker](https:\u002F\u002Fgithub.com\u002FPavelevich\u002Fllm-checker) —— 一个集成了 Ollama 的 Node.js 命令行工具，可以直接拉取和基准测试模型。它采用更直接的方式，通过 Ollama 在您的硬件上实际运行模型，而不是仅根据规格估算性能。如果您已经安装了 Ollama 并希望测试真实性能，这将是一个不错的选择。需要注意的是，它不支持 MoE（专家混合）架构——所有模型都被视为密集型模型，因此像 Mixtral 或 DeepSeek-V3 这样的模型的内存估算将反映总参数量，而非较小的活跃子集。\n\n---\n\n## 许可证\n\nMIT","# llmfit 快速上手指南\n\n**llmfit** 是一款终端工具，旨在根据你的硬件配置（RAM、CPU、GPU）智能推荐最适合运行的大语言模型（LLM）。它能检测系统规格，从质量、速度、适配度和上下文长度等维度对模型进行评分，并告诉你哪些模型能在你的机器上流畅运行。\n\n## 环境准备\n\n*   **操作系统**：支持 Windows、macOS (Intel\u002FApple Silicon) 和 Linux。\n*   **硬件要求**：无特殊限制，工具会自动检测你的 CPU、内存、显卡及显存信息。\n*   **前置依赖**：\n    *   **Windows**: 建议安装 [Scoop](https:\u002F\u002Fscoop.sh\u002F) 包管理器。\n    *   **macOS**: 建议安装 [Homebrew](https:\u002F\u002Fbrew.sh\u002F)。\n    *   **Linux\u002FmacOS (源码编译)**: 需安装 Rust 工具链 (`cargo`)。\n    *   **运行时 (可选)**: 若需直接运行模型，建议预先安装 Ollama、llama.cpp 或 LM Studio 等后端，llmfit 会自动识别它们。\n\n## 安装步骤\n\n请选择适合你系统的安装方式：\n\n### Windows (使用 Scoop)\n```sh\nscoop install llmfit\n```\n*若未安装 Scoop，请先访问 https:\u002F\u002Fscoop.sh\u002F 进行安装。*\n\n### macOS \u002F Linux\n\n**方式一：使用 Homebrew (推荐 macOS 用户)**\n```sh\nbrew install llmfit\n```\n\n**方式二：一键脚本安装 (通用)**\n下载最新二进制文件并安装到 `\u002Fusr\u002Flocal\u002Fbin` (需要 sudo 权限)：\n```sh\ncurl -fsSL https:\u002F\u002Fllmfit.axjns.dev\u002Finstall.sh | sh\n```\n\n**方式三：免 sudo 安装到用户目录**\n将二进制文件安装到 `~\u002F.local\u002Fbin`：\n```sh\ncurl -fsSL https:\u002F\u002Fllmfit.axjns.dev\u002Finstall.sh | sh -s -- --local\n```\n*注意：如果选择此方式，请确保 `~\u002F.local\u002Fbin` 已添加到你的 `$PATH` 环境变量中。*\n\n### 使用 Docker \u002F Podman\n无需安装二进制文件，直接运行容器：\n```sh\ndocker run ghcr.io\u002Falexsjones\u002Fllmfit\n```\n\n### 从源码编译\n```sh\ngit clone https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit.git\ncd llmfit\ncargo build --release\n# 编译后的二进制文件位于 target\u002Frelease\u002Fllmfit\n```\n\n## 基本使用\n\n### 1. 启动交互式界面 (TUI) - **推荐**\n直接运行命令即可启动带有图形界面的终端模式，自动检测硬件并列出推荐模型：\n\n```sh\nllmfit\n```\n\n**核心操作快捷键：**\n*   `j` \u002F `k` 或 `↑` \u002F `↓`：上下浏览模型列表。\n*   `\u002F`：进入搜索模式（支持按名称、提供商、参数量搜索）。\n*   `f`：切换适配度过滤（所有 \u002F 可运行 \u002F 完美适配 \u002F 良好 \u002F 勉强）。\n*   `p`：进入 **Plan 模式**，反向推算运行选定模型所需的硬件配置。\n*   `d`：下载选中的模型（需已安装对应的运行时后端）。\n*   `Enter`：查看模型详细信息。\n*   `q`：退出程序。\n\n> **提示**：启动 TUI 后，工具会自动在后台开启一个 Web 仪表盘，默认地址为 `http:\u002F\u002F0.0.0.0:8787`，你可以在浏览器中查看相同的分析结果。\n\n### 2. 命令行模式 (CLI)\n如果你需要在脚本中使用或仅需文本输出，可以使用 `--cli` 标志或子命令。\n\n**查看适配度排名列表：**\n```sh\nllmfit --cli\n```\n\n**仅显示“完美适配”的前 5 个模型：**\n```sh\nllmfit fit --perfect -n 5\n```\n\n**查看当前系统硬件检测结果：**\n```sh\nllmfit system\n```\n\n**搜索特定模型（如 llama 8b）：**\n```sh\nllmfit search \"llama 8b\"\n```\n\n**以 JSON 格式输出推荐结果（适合集成到 Agent 或脚本）：**\n```sh\nllmfit recommend --json --limit 5\n```\n\n**针对特定场景推荐（例如编程）：**\n```sh\nllmfit recommend --json --use-case coding --limit 3\n```","一名拥有 32GB 内存和单张 RTX 4090 显卡的开发者，正试图在本地搭建一个高效的代码辅助助手，却面对 Hugging Face 上成千上万个模型版本无从下手。\n\n### 没有 llmfit 时\n- **盲目试错成本高**：只能凭经验猜测哪些量化版本（如 Q4_K_M 或 Q8_0）能塞进显存，频繁下载数十 GB 的模型文件后才发现运行时报错“内存不足”。\n- **性能预估靠猜**：无法提前知道选定模型在当前硬件上的推理速度（tok\u002Fs），部署后才发现生成速度只有 2 tokens\u002Fs，完全无法满足实时编码需求。\n- **选型维度单一**：往往只关注参数量大小，忽略了上下文窗口（Context）是否足够长，导致模型无法读取完整的项目文件结构。\n- **环境配置繁琐**：需要手动查阅文档确认模型是否支持 Ollama 或 llama.cpp 后端，反复切换工具链进行兼容性测试。\n\n### 使用 llmfit 后\n- **一键精准匹配**：运行 `llmfit` 后，工具自动扫描硬件配置，直接过滤掉所有无法运行的模型，仅列出“完美适配”或“良好运行”的选项。\n- **量化与速度透明化**：界面清晰展示每个推荐模型的最佳量化格式及预估推理速度，开发者可立即锁定既能跑满显存又能保证流畅度的模型。\n- **多维场景筛选**：通过 `\u002F` 搜索 \"coding\" 并按 \"Ctx\" 排序，瞬间找到专为代码优化且支持长上下文的模型，无需人工比对参数表。\n- **后端无缝衔接**：直接在交互界面中查看模型对 Ollama 或 LM Studio 的支持情况，选中即可获取运行命令，省去了排查后端兼容性的时间。\n\nllmfit 将原本需要数小时的模型选型与兼容性测试过程，压缩为一次直观的终端交互，让开发者能立刻在本地硬件上运行最优的大语言模型。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FAlexsJones_llmfit_92a925a6.png","AlexsJones","Alex Jones","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002FAlexsJones_358a156e.jpg","Principal Engineer @aws\r\nBluesky: @axjns.dev","AWS","London",null,"AlexJonesax","https:\u002F\u002Fgithub.com\u002FAlexsJones",[82,86,90,94,98,102,106,110,114],{"name":83,"color":84,"percentage":85},"Rust","#dea584",76.1,{"name":87,"color":88,"percentage":89},"Python","#3572A5",12.2,{"name":91,"color":92,"percentage":93},"JavaScript","#f1e05a",6.6,{"name":95,"color":96,"percentage":97},"CSS","#663399",3.2,{"name":99,"color":100,"percentage":101},"Shell","#89e051",1.1,{"name":103,"color":104,"percentage":105},"HTML","#e34c26",0.3,{"name":107,"color":108,"percentage":109},"Makefile","#427819",0.2,{"name":111,"color":112,"percentage":113},"Nix","#7e7eff",0.1,{"name":115,"color":116,"percentage":113},"Dockerfile","#384d54",21484,1262,"2026-04-06T02:53:03","MIT",1,"Windows, macOS, Linux","非必需。支持多 GPU 设置、MoE 架构及动态量化选择。工具会自动检测 GPU 名称和显存 (VRAM) 以评估模型适配度，未指定具体型号或最低显存要求。","未说明具体数值。工具会根据系统 RAM 自动检测并评分，以确定哪些模型可以运行。",{"notes":126,"python":127,"dependencies":128},"1. 该工具主要提供二进制文件安装 (Scoop, Homebrew, curl 脚本) 或通过 Rust (cargo) 从源码编译，无需 Python 环境。\n2. 核心功能是检测本地硬件 (CPU\u002FRAM\u002FGPU) 并推荐适合的 LLM 模型及量化版本。\n3. 支持多种后端运行时提供者 (Ollama, llama.cpp, MLX, Docker Model Runner, LM Studio)。\n4. 包含交互式 TUI 界面和 Web 仪表盘 (默认端口 8787)。\n5. Windows 用户推荐使用 Scoop 安装，macOS\u002FLinux 用户推荐使用 Homebrew 或官方安装脚本。","未说明 (该工具基于 Rust 开发，通过 Cargo 编译，不依赖 Python 环境)",[129,130,131,132,133],"Rust\u002FCargo (编译源码头需)","Ollama (可选运行时)","llama.cpp (可选运行时)","MLX (可选运行时，针对 Apple Silicon)","LM Studio (可选运行时)",[35,14],[136,137,138,139,140,141],"llm","skill","localai","gguf","mlx","unsloth","2026-03-27T02:49:30.150509","2026-04-06T15:54:50.284462",[145,150,155,160,165,170],{"id":146,"question_zh":147,"answer_zh":148,"source_url":149},19485,"为什么在 NVIDIA DGX 或其他高性能机器上运行时，系统检测不到 GPU 且模型评分很低？","这通常是因为旧版本无法正确识别统一内存架构（如 GB10\u002FGB20）。该问题已在 PR #102 中修复，并包含在 v0.4.6 及更高版本中。请升级工具以启用统一的内存检测功能。","https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fissues\u002F17",{"id":151,"question_zh":152,"answer_zh":153,"source_url":154},19486,"为什么安装了 llama.cpp 却提示“没有兼容的运行时”或不显示 llama.cpp 格式的模型？","这通常是因为模型格式不匹配。llama.cpp 原生仅支持 GGUF 格式。如果模型是 AWQ 或 GPTQ 格式，即使安装了 llama.cpp 也无法直接运行。新版本（v0.6.9+）已优化了错误提示，区分了“运行时”和“提供者”。若需运行非 GGUF 模型，请安装对应的运行时（如 vLLM），或将模型转换为 GGUF 格式。","https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fissues\u002F123",{"id":156,"question_zh":157,"answer_zh":158,"source_url":159},19487,"在 Windows 上安装 llmfit 是否必须安装庞大的 Visual Studio 和 Cargo？","不需要编译即可使用。Windows 用户可以直接下载发布页面（Releases）中的预编译可执行文件。请注意选择正确的架构版本：大多数现代 Windows 电脑应下载 `x86_64-pc-windows-msvc` 版本，而非 `aarch64`。此外，也可以通过 Scoop 包管理器安装：`scoop install llmfit`。","https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fissues\u002F90",{"id":161,"question_zh":162,"answer_zh":163,"source_url":164},19488,"为什么某些模型显示“此模型没有兼容的提供者（No compatible provider available）”？","这里的术语可能令人困惑。“提供者”在此处指代推理运行时（Runtime）。出现此错误通常是因为：虽然你安装了某个运行时（如 llama.cpp），但该特定模型版本（例如 AWQ 量化版）没有对应的可下载文件格式（如缺少 GGUF 文件）。简单来说，运行时已就绪，但缺乏适配该运行时的模型文件。建议尝试该模型的非 AWQ 版本（通常有 GGUF 格式），或安装支持该格式的其它运行时。","https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fissues\u002F198",{"id":166,"question_zh":167,"answer_zh":168,"source_url":169},19489,"如何在列表中看到最新的模型（例如 Qwen 3.5 系列）？","如果你使用的是预编译版本，可能需要等待下一次正式发布。如果你想立即使用，可以通过以下两种方式更新本地模型数据库：\n1. 从源码构建：运行 `cargo build --release`。\n2. 更新模型列表：运行 `make update-models` 命令重新生成模型数据库。","https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fissues\u002F136",{"id":171,"question_zh":172,"answer_zh":173,"source_url":174},19490,"如何使用命令行参数手动覆盖检测到的内存大小？","可以使用 `--memory` 参数手动指定内存大小。注意参数格式：在较新版本中，Clap 解析器支持带或不带等号的写法，但为了兼容性，建议遵循帮助文档格式，例如：`llmfit --memory=24GB` 或 `llmfit --memory 24GB`。如果遇到解析错误，请检查数值与单位之间是否有不必要的空格。","https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fissues\u002F89",[176,181,186,191,196,201,206,211,216,221,226,231,236,241,246,251,256,261,266,271],{"id":177,"version":178,"summary_zh":179,"released_at":180},117525,"v0.9.1","**完整更新日志**: https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fcompare\u002Fv0.9.0...v0.9.1","2026-04-05T19:47:17",{"id":182,"version":183,"summary_zh":184,"released_at":185},117526,"v0.9.0","## 变更内容\n* 功能（Web）：通过 @sloweyyy 在 https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F313 中的提交，使仪表板功能与全 TUI 功能完全一致。\n* 修复：将默认估算上下文限制在 8192 个 token，由 @octo-patch 在 https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F311 中完成。\n\n## 新贡献者\n* @sloweyyy 在 https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F313 中完成了首次贡献。\n\n**完整变更日志**：https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fcompare\u002Fv0.8.9...v0.9.0","2026-04-05T13:21:57",{"id":187,"version":188,"summary_zh":189,"released_at":190},117527,"v0.8.9","**完整更新日志**: https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fcompare\u002Fv0.8.8...v0.8.9","2026-04-04T08:40:15",{"id":192,"version":193,"summary_zh":194,"released_at":195},117528,"v0.8.8","**完整更新日志**: https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fcompare\u002Fv0.8.7...v0.8.8","2026-04-04T08:10:10",{"id":197,"version":198,"summary_zh":199,"released_at":200},117529,"v0.8.7","**完整更新日志**: https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fcompare\u002Fv0.8.6...v0.8.7","2026-04-03T09:14:18",{"id":202,"version":203,"summary_zh":204,"released_at":205},117530,"v0.8.6","## 变更内容\n* chore(deps): 由 @dependabot[bot] 在 https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F297 中将 clap 从 4.5.61 升级至 4.6.0\n* chore(deps): 由 @dependabot[bot] 在 https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F299 中将 softprops\u002Faction-gh-release 从 2.5.0 升级至 2.6.1\n* feat: 由 @maskedsyntax 在 https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F300 中添加模型许可证过滤器\n* fix: 由 @AlexsJones 在 https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F305 中新增模型\n\n## 新贡献者\n* @maskedsyntax 在 https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F300 中完成了首次贡献\n\n**完整变更日志**: https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fcompare\u002Fv0.8.5...v0.8.6","2026-04-01T14:12:15",{"id":207,"version":208,"summary_zh":209,"released_at":210},117531,"v0.8.5","## 变更内容\n* chore(deps): 由 @dependabot[bot] 在 https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F285 中将 clap 从 4.5.60 升级至 4.5.61\n* 使用 'which' crate 替代 'which' 命令，由 @longhuan1999 在 https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F286 中完成\n* chore(deps): 由 @dependabot[bot] 在 https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F283 中将 sysinfo 从 0.38.3 升级至 0.38.4\n* feat: 添加多 GPU 模型训练的张量并行感知功能，由 @michaeljabbour 在 https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F277 中实现\n* chore(deps): 由 @dependabot[bot] 在 https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F284 中将 actions\u002Fdownload-artifact 从 8.0.0 升级至 8.0.1\n* feat: 添加从 HuggingFace 实时更新模型数据库的功能，由 @saiteja007-mv 在 https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F131 中完成\n* chore(deps): 由 @dependabot[bot] 在 https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F282 中将 tokio 从 1.49.0 升级至 1.50.0\n* chore(deps): 由 @dependabot[bot] 在 https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F281 中将 actions\u002Fsetup-node 从 4 升级至 6\n\n## 新贡献者\n* @longhuan1999 在 https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F286 中完成了首次贡献\n* @michaeljabbour 在 https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F277 中完成了首次贡献\n* @saiteja007-mv 在 https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F131 中完成了首次贡献\n\n**完整变更日志**: https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fcompare\u002Fv0.8.4...v0.8.5","2026-03-27T14:57:45",{"id":212,"version":213,"summary_zh":214,"released_at":215},117532,"v0.8.4","## 变更内容\n* @commandlinetips 在 https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F280 中添加了将模型名称复制到剪贴板的功能\n\n## 新贡献者\n* @commandlinetips 在 https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F280 中完成了首次贡献\n\n**完整变更日志**: https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fcompare\u002Fv0.0.2...v0.8.4","2026-03-22T17:57:23",{"id":217,"version":218,"summary_zh":219,"released_at":220},117533,"v0.0.2","## 变更内容\n* 移除模型去重逻辑，由 @TobTobXX 在 https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F276 中完成\n\n## 新贡献者\n* @TobTobXX 在 https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F276 中完成了首次贡献\n\n**完整变更日志**: https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fcompare\u002Fv0.8.2...v0.0.2","2026-03-22T15:35:17",{"id":222,"version":223,"summary_zh":224,"released_at":225},117534,"v0.8.2","**完整更新日志**: https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fcompare\u002Fv0.8.1...v0.8.2","2026-03-21T13:50:35",{"id":227,"version":228,"summary_zh":229,"released_at":230},117535,"v0.8.1","## What's Changed\n* chore(deps): bump docker\u002Fbuild-push-action from 6.19.2 to 7.0.0 by @dependabot[bot] in https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F244\n\n\n**Full Changelog**: https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fcompare\u002Fv0.8.0...v0.8.1","2026-03-20T14:20:57",{"id":232,"version":233,"summary_zh":234,"released_at":235},117536,"v0.8.0","## What's Changed\n* Fix confusing runtime\u002Fprovider error messages by @AlexsJones in https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F267\n\n\n**Full Changelog**: https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fcompare\u002Fv0.7.9...v0.8.0","2026-03-19T09:08:34",{"id":237,"version":238,"summary_zh":239,"released_at":240},117537,"v0.7.9","**Full Changelog**: https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fcompare\u002Fv0.7.8...v0.7.9","2026-03-18T20:05:59",{"id":242,"version":243,"summary_zh":244,"released_at":245},117538,"v0.7.8","## What's Changed\n* Fix: filter models by GPU compute capability for vLLM quants by @AlexsJones in https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F266\n\n\n**Full Changelog**: https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fcompare\u002Fv0.7.7...v0.7.8","2026-03-18T20:00:23",{"id":247,"version":248,"summary_zh":249,"released_at":250},117539,"v0.7.7","## What's Changed\n* feat(dashboard): ship embedded web UI and auto-start from CLI by @AlexsJones in https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F265\n* chore(deps): bump sysinfo from 0.38.2 to 0.38.3 by @dependabot[bot] in https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F245\n* chore(deps): bump docker\u002Fsetup-qemu-action from 3 to 4 by @dependabot[bot] in https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F246\n\n\n**Full Changelog**: https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fcompare\u002Fv0.1.1...v0.7.7","2026-03-18T17:04:09",{"id":252,"version":253,"summary_zh":254,"released_at":255},117540,"v0.7.6","## What's Changed\n* feat: add MiniMax-M2.7 to curated model database by @octo-patch in https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F261\n\n## New Contributors\n* @octo-patch made their first contribution in https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F261\n\n**Full Changelog**: https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fcompare\u002Fv0.7.5...v0.7.6","2026-03-18T11:53:25",{"id":257,"version":258,"summary_zh":259,"released_at":260},117541,"v0.7.5","## What's Changed\n* Feat: Docs\u002Fchinese translation by @AlexsJones in https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F253\n* docs: add Chinese translation (README.zh.md) by @JasonYeYuhe in https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F252\n* chore(deps): bump docker\u002Fmetadata-action from 5.10.0 to 6.0.0 by @dependabot[bot] in https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F243\n* fix: prefer exact matches in info selection by @haosenwang1018 in https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F206\n* chore(deps): bump tauri-build from 2.5.5 to 2.5.6 by @dependabot[bot] in https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F250\n* chore(deps): bump docker\u002Fsetup-buildx-action from 3 to 4 by @dependabot[bot] in https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F249\n* Docs\u002Fchinese translation by @JasonYeYuhe in https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F256\n* Deduplicate duplicate model catalog entries by @jasperan in https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F254\n* chore(deps): bump tauri from 2.10.2 to 2.10.3 by @dependabot[bot] in https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F248\n* feat: add Catppuccin color themes to TUI by @tuxtof in https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F258\n\n## New Contributors\n* @JasonYeYuhe made their first contribution in https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F252\n* @jasperan made their first contribution in https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F254\n* @tuxtof made their first contribution in https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F258\n\n**Full Changelog**: https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fcompare\u002Fv0.7.4...v0.7.5","2026-03-18T07:45:28",{"id":262,"version":263,"summary_zh":264,"released_at":265},117542,"v0.7.4","**Full Changelog**: https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fcompare\u002Fv0.7.3...v0.7.4","2026-03-15T07:41:05",{"id":267,"version":268,"summary_zh":269,"released_at":270},117543,"v0.7.3","## What's Changed\n* fix: surface MoE offloaded RAM in JSON output by @AlexsJones in https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fpull\u002F235\n\n\n**Full Changelog**: https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fcompare\u002Fv0.7.2...v0.7.3","2026-03-14T17:49:09",{"id":272,"version":273,"summary_zh":274,"released_at":275},117544,"v0.7.2","**Full Changelog**: https:\u002F\u002Fgithub.com\u002FAlexsJones\u002Fllmfit\u002Fcompare\u002Fv0.7.1...v0.7.2","2026-03-13T04:41:56"]