[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-liguodongiot--llm-resource":3,"tool-liguodongiot--llm-resource":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",148568,2,"2026-04-09T23:34:24",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",108111,"2026-04-08T11:23:26",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[52,13,15,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":32,"last_commit_at":59,"category_tags":60,"status":17},4721,"markitdown","microsoft\u002Fmarkitdown","MarkItDown 是一款由微软 AutoGen 团队打造的轻量级 Python 工具，专为将各类文件高效转换为 Markdown 格式而设计。它支持 PDF、Word、Excel、PPT、图片（含 OCR）、音频（含语音转录）、HTML 乃至 YouTube 链接等多种格式的解析，能够精准提取文档中的标题、列表、表格和链接等关键结构信息。\n\n在人工智能应用日益普及的今天，大语言模型（LLM）虽擅长处理文本，却难以直接读取复杂的二进制办公文档。MarkItDown 恰好解决了这一痛点，它将非结构化或半结构化的文件转化为模型“原生理解”且 Token 效率极高的 Markdown 格式，成为连接本地文件与 AI 分析 pipeline 的理想桥梁。此外，它还提供了 MCP（模型上下文协议）服务器，可无缝集成到 Claude Desktop 等 LLM 应用中。\n\n这款工具特别适合开发者、数据科学家及 AI 研究人员使用，尤其是那些需要构建文档检索增强生成（RAG）系统、进行批量文本分析或希望让 AI 助手直接“阅读”本地文件的用户。虽然生成的内容也具备一定可读性，但其核心优势在于为机器",93400,"2026-04-06T19:52:38",[52,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":72,"owner_name":73,"owner_avatar_url":74,"owner_bio":75,"owner_company":76,"owner_location":77,"owner_email":78,"owner_twitter":76,"owner_website":79,"owner_url":80,"languages":81,"stars":86,"forks":87,"last_commit_at":88,"license":89,"difficulty_score":90,"env_os":91,"env_gpu":92,"env_ram":92,"env_deps":93,"category_tags":96,"github_topics":97,"view_count":32,"oss_zip_url":76,"oss_zip_packed_at":76,"status":17,"created_at":100,"updated_at":101,"faqs":102,"releases":103},6089,"liguodongiot\u002Fllm-resource","llm-resource","LLM全栈优质资源汇总","llm-resource 是一个专为大语言模型（LLM）领域打造的全栈资源导航库，被誉为\"LLM 百宝箱”。面对大模型技术迭代快、知识体系庞杂且资料分散的痛点，它将海量优质内容进行了系统化梳理与整合。\n\n该工具覆盖了从理论基础到工程落地的完整链路，内容涵盖 Transformer 等核心算法原理、模型训练与微调策略、推理加速、数据工程、模型压缩及评测体系。此外，它还深入涉及 AI 基础设施（如芯片、CUDA）、编译器优化、主流框架以及 LLMOps 等关键领域，并提供了丰富的源码解析链接和前沿技术文章集锦。\n\n无论是希望夯实基础的研究人员、需要快速定位技术方案的开发者，还是关注行业趋势的技术管理者，都能在这里找到极具价值的学习路径和实战参考。其独特亮点在于不仅罗列资源，更按技术栈逻辑构建了清晰的知识地图，帮助用户高效跨越从理论理解到代码实现的鸿沟，是探索大模型世界不可或缺的得力助手。","# llm-resource（LLM 百宝箱）\n\nLLM全栈优质资源汇总\n\n> 非常欢迎大家也参与进来，收集更多优质大模型相关资源。\n\n## 目录\n\n- 🐼 [LLM算法](#llm算法)\n- 🐘 [LLM训练](#llm训练)\n\t- 🐘 [LLM微调](#llm微调)\n\t- 🐼 [LLM对齐](#llm对齐)\n- 🔥 [LLM推理](#llm推理)\n- :palm_tree: [LLM数据工程（Data Engineering）](#llm数据工程)\n- 📡 [LLM压缩](#llm压缩)\n- 🐰 [LLM测评](#llm测评)\n- 🐘 [AI基础知识](#ai基础知识)\n- 📡 [AI基础设施](#ai基础设施)\n\t- :palm_tree: [AI芯片](#ai芯片)\n\t- 🐰 [CUDA](#cuda)\n- 🐘 [AI编译器](#ai编译器)\n- 🐰 [AI框架](#ai框架)\n- 📡 [LLM应用开发](#llm应用开发)\n- 🐘 [LLMOps](#llmops)\n- 📡 [LLM实践](llm实践)\n- 📡[微信公众号文章集锦](#微信公众号文章集锦)\n\n\n\n## LLM算法\n\n\n### Transformer\n\n原理：\n- [Transformer模型详解（图解最完整版](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F338817680)\n- [OpenAI ChatGPT（一）：十分钟读懂 Transformer](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F600773858)\n- [Transformer的结构是什么样的？各个子模块各有什么作用？](https:\u002F\u002Fblog.csdn.net\u002Fm0_54929869\u002Farticle\u002Fdetails\u002F118881804)\n- [以Transformer结构为基础的大模型参数量、计算量、中间激活以及KV cache剖析](https:\u002F\u002Fmp.weixin.qq.com\u002Fs\u002F3JYz6yrLeBr5ujip3LZe6w)\n- [Transformer 一起动手编码学原理](https:\u002F\u002Fmp.weixin.qq.com\u002Fs\u002FNgUNuWhvp2SqG-XWYv2PGQ)\n- [为什么transformer(Bert)的多头注意力要对每一个head进行降维？](http:\u002F\u002Fwww.sniper97.cn\u002Findex.php\u002Fnote\u002Fdeep-learning\u002Fnote-deep-learning\u002F4002\u002F)\n- [Decoder-Only Transformers: The Workhorse of Generative LLMs](https:\u002F\u002Fcameronrwolfe.substack.com\u002Fp\u002Fdecoder-only-transformers-the-workhorse)\n\n\n源码：\n\n- [OpenAI ChatGPT（一）：Tensorflow实现Transformer](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F603243890)\n- [OpenAI ChatGPT（一）：十分钟读懂 Transformer](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F600773858)\n- [GPT （一）transformer原理和代码详解](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F632880248)\n- [Transformer源码详解（Pytorch版本）](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F398039366)\n- [搞懂Transformer结构，看这篇PyTorch实现就够了](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F339207092)\n\n\n\n### GPT1\n\n\n### GPT2\n\n\n- GPT2 源码：https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Ftransformers\u002Fblob\u002Fmain\u002Fsrc\u002Ftransformers\u002Fmodels\u002Fgpt2\u002Fmodeling_gpt2.py\n- GPT2 源码解析：https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F630970209\n- nanoGPT：https:\u002F\u002Fgithub.com\u002Fkarpathy\u002FnanoGPT\u002Fblob\u002Fmaster\u002Fmodel.py\n\n\n- 7.3 GPT2模型深度解析：http:\u002F\u002F121.199.45.168:13013\u002F7_3.html\n- GPT（三）GPT2原理和代码详解: https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F637782385\n- GPT2参数量剖析: https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F640501114\n\n\n### ChatGPT\n\n- [State of GPT：大神Andrej揭秘OpenAI大模型原理和训练过程](https:\u002F\u002Fmp.weixin.qq.com\u002Fs\u002FzmEGzm1cdXupNoqZ65h7yg)\n- [OpenAI联合创始人亲自上场科普GPT，让技术小白也能理解最强AI](https:\u002F\u002Fmp.weixin.qq.com\u002Fs\u002FMD4WwwJLXm8rEm-sniX8Gw)\n\n\n\n\n\n### GLM\n\n- [预训练语言模型：GLM](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F641499380)\n\n\n### LLaMA\n\n\n\n### MOE 大模型\n\n- [Mixtral-8x7B MoE大模型微调实践，超越Llama2-65B](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247486657&idx=1&sn=c5a5e55b01243f477d063c9194d24f42&chksm=fd3be592ca4c6c84bf5eefff23dcc38eeb83624e9f53bbd9a72afba71e235dddf814549322ba&token=499509118&lang=zh_CN#rd)\n- [大模型分布式训练并行技术（八）-MOE并行](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247486145&idx=1&sn=299c28153b286465be26e18153c6db5d&chksm=fd3be392ca4c6a84be283dad80f584443302ea29fc95744f83727e7d9d68952d3a0f8b1b66d5&token=499509118&lang=zh_CN#rd)\n- [MoE架构模型爆发或将带飞国产AI芯片](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247488422&idx=1&sn=eeb18ec0f5b9e972df31d65e7db13f8f&chksm=fd3bfaf5ca4c73e38a696fe7b6f33a30af962fdddfabd92d74b1d06190442759aabe7b560f22&token=499509118&lang=zh_CN#rd)\n- [大模型的模型融合方法概述](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247487652&idx=1&sn=1bbf692b6e1dc6bae719c8e0a10293a0&chksm=fd3bf9f7ca4c70e16473a98d5408f6daea5e8c116a88cb3f41dfb00ffb7f6016874ee092224c&token=499509118&lang=zh_CN#rd)\n- [混合专家模型 (MoE) 详解](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F674698482)\n- [群魔乱舞：MoE大模型详解](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F677638939)\n- [大模型LLM之混合专家模型MoE（上-基础篇）](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F672712751)\n- [大模型LLM之混合专家模型MoE（下-实现篇）](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F673048264)\n\n\n### 下一代大模型\n\n- https:\u002F\u002Fgithub.com\u002FNExT-GPT\u002FNExT-GPT\n- https:\u002F\u002Fnext-gpt.github.io\u002F\n- [Introduction to NExT-GPT: Any-to-Any Multimodal Large Language Model](https:\u002F\u002Fwww.kdnuggets.com\u002Fintroduction-to-nextgpt-anytoany-multimodal-large-language-model)\n\n\n\n### 多模态大模型\n\nA Survey on Multimodal Large Language Models：https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.13549\nEfficient-Multimodal-LLMs-Survey：https:\u002F\u002Fgithub.com\u002Flijiannuist\u002FEfficient-Multimodal-LLMs-Survey\n\n\n### 其他\n\n- [大模型时代的归一化技术：解密Transformer架构中Pre-Norm与RMSNorm的黄金组合](https:\u002F\u002Fblog.csdn.net\u002Fqq_54445177\u002Farticle\u002Fdetails\u002F147096307)\n\n\n\n\n\n## LLM训练\n\n\n- [分布式训练 Playbook](https:\u002F\u002Fhuggingface.co\u002Fspaces\u002Fnanotron\u002Fultrascale-playbook)\n- [OPT-175B是如何炼成的](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F622061951)\n- [全网最全-混合精度训练原理](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F441591808)\n- [飞桨分布式训练4D混合并行可训千亿级AI模型](https:\u002F\u002Fai.baidu.com\u002Fforum\u002Ftopic\u002Fshow\u002F987996)\n- [Transformer Math 101](https:\u002F\u002Fblog.eleuther.ai\u002Ftransformer-math\u002F) - 如何计算显存消耗?\n- [Megatron-LM 第三篇Paper总结——Sequence Parallelism & Selective Checkpointing](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F522198082)\n- [大模型训练踩坑](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F660759033)\n\n\n- 学习率(warmup, decay)：\n\t- [模型调优，学习率设置（Warm Up、loss自适应衰减等），batch size调优技巧，基于方差放缩初始化方法](https:\u002F\u002Fblog.csdn.net\u002Fsinat_39620217\u002Farticle\u002Fdetails\u002F130236886)\n\t- [深度学习模型训练小技巧](https:\u002F\u002Fblog.csdn.net\u002Fsgyuanshi\u002Farticle\u002Fdetails\u002F108394444)\n\n\n### LLM微调\n\n- [Adapting P-Tuning to Solve Non-English Downstream Tasks](https:\u002F\u002Fdeveloper.nvidia.com\u002Fblog\u002Fadapting-p-tuning-to-solve-non-english-downstream-tasks\u002F)\n\n\n### LLM对齐\n\n- [MOSS-RLHF](https:\u002F\u002Fgithub.com\u002FOpenLMLab\u002FMOSS-RLHF)\n- [模型调优（RLHF\u002FDPO\u002FORPO）- 终极指南](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F692594519)\n- [DPO: Direct Preference Optimization 论文解读及代码实践](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F642569664)\n- [强化学习入门：基本思想和经典算法](https:\u002F\u002Fimzhanghao.com\u002F2022\u002F02\u002F10\u002Freinforcement-learning\u002F)\n- [人人都能看懂的PPO原理与源码解读](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F677607581)\n- [关于Instruct GPT复现的一些细节与想法](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F609078527)\n- [【RLHF】RL 究竟是如何与 LLM 做结合的？](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F675329917)\n- [【RLHF】想训练ChatGPT？得先弄明白Reward Model怎么训（附源码）](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F595579042)\n- [Reinforcement Learning from Human Feedback 全家桶（RL 侧）](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F700149886)\n\npaper:\n\n- [LLM对齐综述](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2407.16216)\n- [RLHF-PPO](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2203.02155)\n- [DPO](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.18290)\n- [ORPO](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2403.07691)\n\n\n## LLM推理\n\n\n- [使用HuggingFace的Accelerate库加载和运行超大模型](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F605640431) : device_map、no_split_module_classes、 offload_folder、 offload_state_dict\n- [借助 PyTorch，Accelerate 如何运行超大模型](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Faccelerate-large-models)\n- [使用 DeepSpeed 和 Accelerate 进行超快 BLOOM 模型推理](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Fzh\u002Fbloom-inference-pytorch-scripts)\n- [LLM七种推理服务框架总结](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F653352979)\n- [LLM投机采样（Speculative Sampling）为何能加速模型推理](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F653734659)\n- [大模型推理妙招—投机采样（Speculative Decoding）](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F651359908)\n- https:\u002F\u002Fgithub.com\u002Fflexflow\u002FFlexFlow\u002Ftree\u002Finference\n- [TensorRT-LLM(3)--架构](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F665595557)\n- NLP（十八）：LLM 的推理优化技术纵览：https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F642412124\n- ​揭秘NVIDIA大模型推理框架：TensorRT-LLM：https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F680808866\n- [如何生成文本: 通过 Transformers 用不同的解码方法生成文本](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Fzh\u002Fhow-to-generate) | [How to generate text: using different decoding methods for language generation with Transformers](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Fhow-to-generate)\n- [DeepSeek-V3 \u002F R1 推理系统概览](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F27181462601)\n\n\n\n### 大模型推理优化技术\n\n\nKV Cache：\n- [图解大模型推理优化：KV Cache](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247486956&idx=1&sn=cd5e36857bbd8ebd750d2c172550d2bd&chksm=fd3be4bfca4c6da9f2276310995c7d60a42c0d01a960a42a38226cf954bab0d2d2a5772905df&token=1409805983&lang=zh_CN#rd)\n- [大模型推理百倍加速之KV cache篇](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247487886&idx=1&sn=38d3cd36c6c5acb2fe5c80ceffcba2cf&chksm=fd3bf8ddca4c71cb243566b593dfa095926b003a4a06442cc96e8ce3f2c64171b34f0bca8428&token=1409805983&lang=zh_CN#rd)\n- [大模型推理加速：看图学KV Cache](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F662498827)\n- [大模型推理性能优化之KV Cache解读](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F630832593)\n\n\n解码优化：\n- [大模型推理妙招—投机采样（Speculative Decoding）](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F651359908)\n\n\n\n\n### vLLM\n\n- [vLLM（六）源码解读下 @HelloWorld](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F694442998)\n- [猛猿：图解大模型计算加速系列：vLLM源码解析1，整体架构](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F691045737)\n- [LLM推理2：vLLM源码学习 @ akaihaoshuai ](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F643336063)\n- [大模型推理框架 vLLM 源码解析（一）：框架概览](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F681402162)\n\n\n## LLM数据工程\n\n- [An Initial Exploration of Theoretical Support for Language Model Data Engineering. Part 1: Pretraining @\n符尧](https:\u002F\u002Fyaofu.notion.site\u002FAn-Initial-Exploration-of-Theoretical-Support-for-Language-Model-Data-Engineering-Part-1-Pretraini-dc480d9bf7ff4659afd8c9fb738086eb)\n\n\n\n## LLM压缩\n\n\n\n- [Awesome Model Quantization](https:\u002F\u002Fgithub.com\u002Fhtqin\u002Fawesome-model-quantization)\n- [Efficient-LLMs-Survey](https:\u002F\u002Fgithub.com\u002FAIoT-MLSys-Lab\u002FEfficient-LLMs-Survey)\n- [Awesome LLM Compression](https:\u002F\u002Fgithub.com\u002FHuangOwen\u002FAwesome-LLM-Compression)\n- [模型转换、模型压缩、模型加速工具汇总](https:\u002F\u002Fblog.csdn.net\u002FWZZ18191171661\u002Farticle\u002Fdetails\u002F99700992)\n- [AI 框架部署方案之模型转换](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F396781295)\n- [Pytorch 模型转 TensorRT (torch2trt 教程)](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F570822430)\n\n\n\n## LLM测评\n\n- [CLiB中文大模型能力评测榜单](https:\u002F\u002Fgithub.com\u002Fjeinlee1991\u002Fchinese-llm-benchmark)\n- [huggingface Open LLM Leaderboard](https:\u002F\u002Fhuggingface.co\u002Fspaces\u002FHuggingFaceH4\u002Fopen_llm_leaderboard)\n- HELM：https:\u002F\u002Fgithub.com\u002Fstanford-crfm\u002Fhelm\n- HELM：https:\u002F\u002Fcrfm.stanford.edu\u002Fhelm\u002Flatest\u002F\n- lm-evaluation-harness：https:\u002F\u002Fgithub.com\u002FEleutherAI\u002Flm-evaluation-harness\u002F\n- CLEVA：http:\u002F\u002Fwww.lavicleva.com\u002F#\u002Fhomepage\u002Foverview\n- CLEVA：https:\u002F\u002Fgithub.com\u002FLaVi-Lab\u002FCLEVA\u002Fblob\u002Fmain\u002FREADME_zh-CN.md\n\n\n\n## 提示工程\n\n\n- [做数据关键步骤：怎么写好prompt？](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247486771&idx=1&sn=359c029b010d7ad96fff33952ad634a8&chksm=fd3be460ca4c6d76b4996f971ff21080ca0a83f3042893bb6827752ad8af812b4afeb1151af1&token=1288418017&lang=zh_CN#rd)\n- [从1000+模板中总结出的10大提示工程方法助你成为提示词大师！](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247486174&idx=1&sn=97ddcd5fb44eb4e3143fa746b7d617c8&chksm=fd3be38dca4c6a9b94fb88bd3f7a5009dee53812412e6f62f9f0a5955d165dd0d5f6ce698208&scene=21#wechat_redirect)\n- [一文搞懂提示工程的原理及前世今生](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247485231&idx=1&sn=acfa77264da611983a49297ab8376e8f&chksm=fd3bee7cca4c676a3ccbc459e70a9e9920b08369a4d618c4ed550c96e9acd09b594cc04b21a6&scene=21#wechat_redirect)\n- [Effective Prompt: 编写高质量Prompt的14个有效方法](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247486087&idx=1&sn=118b82abd4b22975e9aeb9f23ed0c9c5&chksm=fd3be3d4ca4c6ac2b41f1c3e908b845d4497a84dc9741034d1e1a830cba93515439b60a835e5&scene=21#wechat_redirect)\n- [提示工程和提示构造技巧](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247487107&idx=1&sn=337325ee6a9a4d4c56821b1e759f1555&chksm=fd3be7d0ca4c6ec60b6394bf76282ee3eef6beccfe2c31885cbb111a5bdc32022ba346509681&token=1288418017&lang=zh_CN#rd)\n- [一文带你了解提示攻击！](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247485936&idx=1&sn=0bcc72e5bfeb50c437253626d763f67d&chksm=fd3be0a3ca4c69b52bba0e0f22730b497c56fad99444b23d437cf49262cd5e52489fb141d338&token=1288418017&lang=zh_CN#rd)\n\n\n\n## 综合\n\n- [通向AGI之路：大型语言模型（LLM）技术精要](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F597586623)\n- [大语言模型的涌现能力：现象与解释](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F621438653)\n- [NLP（十八）：LLM 的推理优化技术纵览](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F642412124)\n- [并行计算3：并行计算模型](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F568947162)\n- [大模型“幻觉”，看这一篇就够了 | 哈工大华为出品](https:\u002F\u002Fwww.thepaper.cn\u002FnewsDetail_forward_25344873)\n- [深入理解语言模型的困惑度(perplexity)](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F686808564)\n\n\n\n**safetensors**：\n\n- [bin和safetensors区别是什么？](https:\u002F\u002Fwww.zhihu.com\u002Fquestion\u002F629624037\u002Fanswer\u002F3307818120)\n- [Safetensors：保存模型权重的新格式](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F691446249)\n- [github: safetensors](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fsafetensors)\n- [huggingface: safetensors](https:\u002F\u002Fhuggingface.co\u002Fdocs\u002Fsafetensors\u002Findex)\n- [Safetensors: a simple, safe and faster way to store and distribute tensors.](https:\u002F\u002Fmedium.com\u002F@mandalsouvik\u002Fsafetensors-a-simple-and-safe-way-to-store-and-distribute-tensors-d9ba1931ba04)\n- https:\u002F\u002Fhuggingface.co\u002Fdocs\u002Fsafetensors\u002Findex\n- https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fsafetensors\u002Ftree\u002Fv0.3.3\n- [手把手教你：LLama2原始权重转HF模型](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F669158180)\n\n\n## AI框架\n\n\n\n\n### PyTorch\n\n- [PyTorch 源码解读系列](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F328674159) @ OpenMMLab 团队\n- [[源码解析] PyTorch 分布式](https:\u002F\u002Fjuejin.cn\u002Fpost\u002F7026144707591815175) @ 罗西的思考\n- [PyTorch 分布式(18) --- 使用 RPC 的分布式流水线并行](https:\u002F\u002Fjuejin.cn\u002Fpost\u002F7043601075307282462) @ 罗西的思考\n- [【Pytorch】model.train() 和 model.eval() 原理与用法](https:\u002F\u002Fblog.csdn.net\u002Fweixin_44211968\u002Farticle\u002Fdetails\u002F123774649)\n\n### DeepSpeed\n\n- [DeepSpeed使用指南(简略版)](https:\u002F\u002Fblog.csdn.net\u002Fweixin_43301333\u002Farticle\u002Fdetails\u002F127237122)\n- [关于Deepspeed的一些总结与心得](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F650824387)\n\n\n### Megatron-LM\n\n- [Megatron-LM 近期的改动](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F651192295)\n- [深入理解 Megatron-LM（1）基础知识](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F650234985) @ 简枫\n- [深入理解 Megatron-LM（2）原理介绍](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F650383289)\n- [[源码解析] 模型并行分布式训练Megatron (1) --- 论文 & 基础](https:\u002F\u002Fjuejin.cn\u002Fpost\u002F7057837676430360584) @ 罗西的思考\n- [[源码解析] 模型并行分布式训练Megatron (2) --- 整体架构](https:\u002F\u002Fjuejin.cn\u002Fpost\u002F7061942798957674504)\n- [[细读经典]Megatron论文和代码详细分析(1)](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F366906920) @迷途小书僮​\n- [[细读经典]Megatron论文和代码详细分析(2)](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F388830967)\n\n\n### Megatron-DeepSpeed\n\n\n### Huggingface Transformers\n\n\n\n\n## [AI基础知识](.\u002Fai-base.md)\n\n\n## AI基础设施\n\n### AI芯片\n\n- [业界AI加速芯片浅析（一）百度昆仑芯](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F593143821)\n- NVIDIA CUDA-X AI：https:\u002F\u002Fwww.nvidia.cn\u002Ftechnologies\u002Fcuda-x\u002F\n- [Intel，Nvidia，AMD三大巨头火拼GPU与CPU](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F629024100)\n- 处理器与AI芯片-Google-TPU：https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F646793355\n- [一文看懂国产AI芯片玩家](https:\u002F\u002Fwww.xckfsq.com\u002Fnews\u002Fshow.html?id=29187)\n- [深度 | 国产AI芯片，玩家几何](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzIwMzgzNTQ1Nw==&mid=2247599349&idx=1&sn=12459cbc418d3831d0c28e87ddb71b2f&scene=21#wechat_redirect)\n\n\n### CUDA\n\n- [CUDA编程入门（一）CUDA编程模型](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F97044592)\n- [CUDA编程入门（二）GPU硬件基础](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F97131966)\n- [GPU编程（CUDA）](https:\u002F\u002Fface2ai.com\u002Fprogram-blog\u002F)\n- [CUDA编程入门极简教程](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F34587739)\n\n\n\n\n## AI编译器\n\n- [TVM资料](https:\u002F\u002Fgithub.com\u002FBBuf\u002Ftvm_mlir_learn)\n- [AI编译器原理](https:\u002F\u002Fwww.bilibili.com\u002Fread\u002Fcv21242696\u002F?spm_id_from=333.999.0.0) @ZIMO酱\n\n\n## LLM应用开发\n\n- [动手学大模型应用开发](https:\u002F\u002Fgithub.com\u002Fdatawhalechina\u002Fllm-universe)\n- [langchain java](https:\u002F\u002Fgithub.com\u002FHamaWhiteGG\u002Flangchain-java)\n- [大模型主流应用RAG的介绍——从架构到技术细节](https:\u002F\u002Fluxiangdong.com\u002F2023\u002F09\u002F25\u002Fragone\u002F#\u002F%E5%86%99%E5%9C%A8%E5%89%8D%E9%9D%A2)\n- [基于检索的大语言模型和应用（陈丹琦）](https:\u002F\u002Facl2023-retrieval-lm.github.io\u002F)\n- [大模型bad case修复方案思考](https:\u002F\u002Fmp.weixin.qq.com\u002Fs\u002FxqFkfzHVnePf1ub_sCk9iw)\n- [《综述：全新大语言模型驱动的Agent》——4.5万字详细解读复旦NLP和米哈游最新Agent Survey](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F656676717)\n\n\n\n\n## LLMOps\n\n- [MLOps Landscape in 2023: Top Tools and Platforms](https:\u002F\u002Fneptune.ai\u002Fblog\u002Fmlops-tools-platforms-landscape)\n- [What Constitutes A Large Language Model Application?  ](https:\u002F\u002Fcobusgreyling.medium.com\u002Fwhat-constitutes-a-large-language-model-application-bacf81103475)：LLM Functionality Landscape\n- [AI System @吃果冻不吐果冻皮](https:\u002F\u002Fgithub.com\u002Fliguodongiot\u002Fai-system)\n\n\n\n\n## RAG\n\n- https:\u002F\u002Fgithub.com\u002Fhymie122\u002FRAG-Survey\n\n## 书籍\n\n- 大语言模型原理与工程 @杨青\n- [大语言模型从理论到实践](https:\u002F\u002Fintro-llm.github.io\u002Fchapter\u002FLLM-TAP.pdf) @张奇 ：https:\u002F\u002Fintro-llm.github.io\u002F\n- [动手学大模型](https:\u002F\u002Fgithub.com\u002FLordog\u002Fdive-into-llms?tab=readme-ov-file)\n\n## LLM实践\n\n- [minGPT @karpathy](https:\u002F\u002Fgithub.com\u002Fkarpathy\u002FminGPT)\n- [llm.c @karpathy](https:\u002F\u002Fgithub.com\u002Fkarpathy\u002Fllm.c): LLM training in simple, raw C\u002FCUDA\n- [LLM101n](https:\u002F\u002Fgithub.com\u002Fkarpathy\u002FLLM101n)\n- [llama2.c](https:\u002F\u002Fgithub.com\u002Fkarpathy\u002Fllama2.c): Inference Llama 2 in one file of pure C\n- [nanoGPT](https:\u002F\u002Fgithub.com\u002Fkarpathy\u002FnanoGPT)\n- [Baby-Llama2-Chinese](https:\u002F\u002Fgithub.com\u002FDLLXW\u002Fbaby-llama2-chinese)\n- [从0到1构建一个MiniLLM](https:\u002F\u002Fgithub.com\u002FTongjilibo\u002Fbuild_MiniLLM_from_scratch)\n- [gpt-fast](https:\u002F\u002Fgithub.com\u002Fpytorch-labs\u002Fgpt-fast) 、[blog](https:\u002F\u002Fpytorch.org\u002Fblog\u002Faccelerating-generative-ai-2\u002F)\n- [CSE 234: Data Systems for Machine Learning](https:\u002F\u002Fhao-ai-lab.github.io\u002Fcse234-w25\u002F)\n- [DSC 291: Machine Learning Systems](https:\u002F\u002Fhao-ai-lab.github.io\u002Fdsc291-s24\u002F)\n\n\n## 大模型汇总资料\n\n- [Awesome-Chinese-LLM](https:\u002F\u002Fgithub.com\u002FHqWu-HITCS\u002FAwesome-Chinese-LLM)\n- [Awesome-LLM-Survey](https:\u002F\u002Fgithub.com\u002FHqWu-HITCS\u002FAwesome-LLM-Survey)\n- [Large Language Model Course](https:\u002F\u002Fgithub.com\u002Fmlabonne\u002Fllm-course)\n- [Awesome-Quantization-Papers](https:\u002F\u002Fgithub.com\u002FZhen-Dong\u002FAwesome-Quantization-Papers)\n- [Awesome Model Quantization (GitHub)](https:\u002F\u002Fgithub.com\u002Fhtqin\u002Fawesome-model-quantization)\n- [Awesome Transformer Attention (GitHub)](https:\u002F\u002Fgithub.com\u002Fcmhungsteve\u002FAwesome-Transformer-Attention)\n- [语言模型数据选择综述](https:\u002F\u002Fgithub.com\u002Falon-albalak\u002Fdata-selection-survey)\n- [Awesome Knowledge Distillation of LLM Papers](https:\u002F\u002Fgithub.com\u002FTebmer\u002FAwesome-Knowledge-Distillation-of-LLMs)\n- [Awasome-Pruning @ghimiredhikura](https:\u002F\u002Fgithub.com\u002Fghimiredhikura\u002FAwasome-Pruning)\n- [Awesome-Pruning @he-y](https:\u002F\u002Fgithub.com\u002Fhe-y\u002FAwesome-Pruning)\n- [awesome-pruning @hrcheng1066](https:\u002F\u002Fgithub.com\u002Fhrcheng1066\u002Fawesome-pruning)\n- [Awesome-LLM-Inference](https:\u002F\u002Fgithub.com\u002FDefTruth\u002FAwesome-LLM-Inference)\n\n\n## 微信公众号文章集锦\n\n- [2024年2月大模型文章集锦](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247487320&idx=2&sn=522fdf838d4ec03f24dbc7a11a3a5a65&chksm=fd3be60bca4c6f1d0c9b0643db0d7334940fb592dac3b5fbf286c7232f6bb08b968fbd237a20&scene=21#wechat_redirect)\n- [2024年1月大模型文章集锦](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247487067&idx=2&sn=33594e6a82cf79a7580272c064635d75&chksm=fd3be708ca4c6e1ece0e1f6cc22bfd286bf3e9073350b91369b1d0e7fb52b50fac8113288e43&scene=21#wechat_redirect)\n- [2023年12月大模型文章集锦](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247486824&idx=2&sn=4faaac42f983af46cce44b35dd416c5f&chksm=fd3be43bca4c6d2d6f5fd1cf3004c37782d0b829111ad5ecd155d6cd3adedd40655653271ba1&scene=21#wechat_redirect)\n- [2023年6-11月大模型文章集锦](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247486480&idx=2&sn=b6b504f9d67a3cdad5ba0eb68eee647b&chksm=fd3be543ca4c6c55e0c2fd335de92103a1aee4e5631be34f06d7557463bc7e339fb63680ad54&scene=21&poc_token=HCwA9WWjTC-CNeedW8iQ1lZwSAwg4fwWFAVcUnai)\n\n\n## 其他\n\n- [Hugging Face 博客](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fblog\u002Ftree\u002Fmain)\n\n\n\n","# llm-resource（LLM 百宝箱）\n\nLLM全栈优质资源汇总\n\n> 非常欢迎大家也参与进来，收集更多优质大模型相关资源。\n\n## 目录\n\n- 🐼 [LLM算法](#llm算法)\n- 🐘 [LLM训练](#llm训练)\n\t- 🐘 [LLM微调](#llm微调)\n\t- 🐼 [LLM对齐](#llm对齐)\n- 🔥 [LLM推理](#llm推理)\n- :palm_tree: [LLM数据工程（Data Engineering）](#llm数据工程)\n- 📡 [LLM压缩](#llm压缩)\n- 🐰 [LLM测评](#llm测评)\n- 🐘 [AI基础知识](#ai基础知识)\n- 📡 [AI基础设施](#ai基础设施)\n\t- :palm_tree: [AI芯片](#ai芯片)\n\t- 🐰 [CUDA](#cuda)\n- 🐘 [AI编译器](#ai编译器)\n- 🐰 [AI框架](#ai框架)\n- 📡 [LLM应用开发](#llm应用开发)\n- 🐘 [LLMOps](#llmops)\n- 📡 [LLM实践](llm实践)\n- 📡[微信公众号文章集锦](#微信公众号文章集锦)\n\n\n\n## LLM算法\n\n\n### Transformer\n\n原理：\n- [Transformer模型详解（图解最完整版](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F338817680)\n- [OpenAI ChatGPT（一）：十分钟读懂 Transformer](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F600773858)\n- [Transformer的结构是什么样的？各个子模块各有什么作用？](https:\u002F\u002Fblog.csdn.net\u002Fm0_54929869\u002Farticle\u002Fdetails\u002F118881804)\n- [以Transformer结构为基础的大模型参数量、计算量、中间激活以及KV cache剖析](https:\u002F\u002Fmp.weixin.qq.com\u002Fs\u002F3JYz6yrLeBr5ujip3LZe6w)\n- [Transformer 一起动手编码学原理](https:\u002F\u002Fmp.weixin.qq.com\u002Fs\u002FNgUNuWhvp2SqG-XWYv2PGQ)\n- [为什么transformer(Bert)的多头注意力要对每一个head进行降维？](http:\u002F\u002Fwww.sniper97.cn\u002Findex.php\u002Fnote\u002Fdeep-learning\u002Fnote-deep-learning\u002F4002\u002F)\n- [Decoder-Only Transformers: The Workhorse of Generative LLMs](https:\u002F\u002Fcameronrwolfe.substack.com\u002Fp\u002Fdecoder-only-transformers-the-workhorse)\n\n\n源码：\n\n- [OpenAI ChatGPT（一）：Tensorflow实现Transformer](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F603243890)\n- [OpenAI ChatGPT（一）：十分钟读懂 Transformer](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F600773858)\n- [GPT （一）transformer原理和代码详解](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F632880248)\n- [Transformer源码详解（Pytorch版本）](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F398039366)\n- [搞懂Transformer结构，看这篇PyTorch实现就够了](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F339207092)\n\n\n\n### GPT1\n\n\n### GPT2\n\n\n- GPT2 源码：https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Ftransformers\u002Fblob\u002Fmain\u002Fsrc\u002Ftransformers\u002Fmodels\u002Fgpt2\u002Fmodeling_gpt2.py\n- GPT2 源码解析：https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F630970209\n- nanoGPT：https:\u002F\u002Fgithub.com\u002Fkarpathy\u002FnanoGPT\u002Fblob\u002Fmaster\u002Fmodel.py\n\n\n- 7.3 GPT2模型深度解析：http:\u002F\u002F121.199.45.168:13013\u002F7_3.html\n- GPT（三）GPT2原理和代码详解: https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F637782385\n- GPT2参数量剖析: https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F640501114\n\n\n### ChatGPT\n\n- [State of GPT：大神Andrej揭秘OpenAI大模型原理和训练过程](https:\u002F\u002Fmp.weixin.qq.com\u002Fs\u002FzmEGzm1cdXupNoqZ65h7yg)\n- [OpenAI联合创始人亲自上场科普GPT，让技术小白也能理解最强AI](https:\u002F\u002Fmp.weixin.qq.com\u002Fs\u002FMD4WwwJLXm8rEm-sniX8Gw)\n\n\n\n\n\n### GLM\n\n- [预训练语言模型：GLM](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F641499380)\n\n\n### LLaMA\n\n\n\n### MOE 大模型\n\n- [Mixtral-8x7B MoE大模型微调实践，超越Llama2-65B](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247486657&idx=1&sn=c5a5e55b01243f477d063c9194d24f42&chksm=fd3be592ca4c6c84bf5eefff23dcc38eeb83624e9f53bbd92d74b1d06190442759aabe7b560f22&token=499509118&lang=zh_CN#rd)\n- [大模型分布式训练并行技术（八）-MOE并行](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247486145&idx=1&sn=299c28153b286465be26e18153c6db5d&chksm=fd3be392ca4c6a84be283dad80f584443302ea29fc95744f837272759aabe7b560f22&token=499509118&lang=zh_CN#rd)\n- [MoE架构模型爆发或将带飞国产AI芯片](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247488422&idx=1&sn=eeb18ec0f5b9e972df31d65e7db13f8f&chksm=fd3bfaf5ca4c73e38a696fe7b6f33a30af962fdddfabd92d74b1d06190442759aabe7b560f22&token=499509118&lang=zh_CN#rd)\n- [大模型的模型融合方法概述](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247487652&idx=1&sn=1bbf692b6e1dc6bae719c8e0a10293a0&chksm=fd3bf9f7ca4c70e16473a98d5408f6daea5e8c116a88cb3f41dfb00ffb7f6016874ee092224c&token=499509118&lang=zh_CN#rd)\n- [混合专家模型 (MoE) 详解](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F674698482)\n- [群魔乱舞：MoE大模型详解](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F677638939)\n- [大模型LLM之混合专家模型MoE（上-基础篇）](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F672712751)\n- [大模型LLM之混合专家模型MoE（下-实现篇）](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F673048264)\n\n\n### 下一代大模型\n\n- https:\u002F\u002Fgithub.com\u002FNExT-GPT\u002FNExT-GPT\n- https:\u002F\u002Fnext-gpt.github.io\u002F\n- [Introduction to NExT-GPT: Any-to-Any Multimodal Large Language Model](https:\u002F\u002Fwww.kdnuggets.com\u002Fintroduction-to-nextgpt-anytoany-multimodal-large-language-model)\n\n\n\n### 多模态大模型\n\nA Survey on Multimodal Large Language Models：https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.13549\nEfficient-Multimodal-LLMs-Survey：https:\u002F\u002Fgithub.com\u002Flijiannuist\u002FEfficient-Multimodal-LLMs-Survey\n\n\n### 其他\n\n- [大模型时代的归一化技术：解密Transformer架构中Pre-Norm与RMSNorm的黄金组合](https:\u002F\u002Fblog.csdn.net\u002Fqq_54445177\u002Farticle\u002Fdetails\u002F147096307)\n\n\n\n\n\n## LLM训练\n\n\n- [分布式训练 Playbook](https:\u002F\u002Fhuggingface.co\u002Fspaces\u002Fnanotron\u002Fultrascale-playbook)\n- [OPT-175B是如何炼成的](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F622061951)\n- [全网最全-混合精度训练原理](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F441591808)\n- [飞桨分布式训练4D混合并行可训千亿级AI模型](https:\u002F\u002Fai.baidu.com\u002Fforum\u002Ftopic\u002Fshow\u002F987996)\n- [Transformer Math 101](https:\u002F\u002Fblog.eleuther.ai\u002Ftransformer-math\u002F) - 如何计算显存消耗?\n- [Megatron-LM 第三篇Paper总结——Sequence Parallelism & Selective Checkpointing](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F522198082)\n- [大模型训练踩坑](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F660759033)\n\n\n- 学习率(warmup, decay)：\n\t- [模型调优，学习率设置（Warm Up、loss自适应衰减等），batch size调优技巧，基于方差放缩初始化方法](https:\u002F\u002Fblog.csdn.net\u002Fsinat_39620217\u002Farticle\u002Fdetails\u002F130236886)\n\t- [深度学习模型训练小技巧](https:\u002F\u002Fblog.csdn.net\u002Fsgyuanshi\u002Farticle\u002Fdetails\u002F108394444)\n\n\n### LLM微调\n\n- [Adapting P-Tuning to Solve Non-English Downstream Tasks](https:\u002F\u002Fdeveloper.nvidia.com\u002Fblog\u002Fadapting-p-tuning-to-solve-non-english-downstream-tasks\u002F)\n\n\n### LLM对齐\n\n- [MOSS-RLHF](https:\u002F\u002Fgithub.com\u002FOpenLMLab\u002FMOSS-RLHF)\n- [模型调优（RLHF\u002FDPO\u002FORPO）- 终极指南](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F692594519)\n- [DPO: Direct Preference Optimization 论文解读及代码实践](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F642569664)\n- [强化学习入门：基本思想和经典算法](https:\u002F\u002Fimzhanghao.com\u002F2022\u002F02\u002F10\u002Freinforcement-learning\u002F)\n- [人人都能看懂的PPO原理与源码解读](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F677607581)\n- [关于Instruct GPT复现的一些细节与想法](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F609078527)\n- [【RLHF】RL 究竟是如何与 LLM 做结合的？](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F675329917)\n- [【RLHF】想训练ChatGPT？得先弄明白Reward Model怎么训（附源码）](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F595579042)\n- [Reinforcement Learning from Human Feedback 全家桶（RL 侧）](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F700149886)\n\npaper:\n\n- [LLM对齐综述](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2407.16216)\n- [RLHF-PPO](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2203.02155)\n- [DPO](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.18290)\n- [ORPO](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2403.07691)\n\n## LLM推理\n\n\n- [使用HuggingFace的Accelerate库加载和运行超大模型](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F605640431) : device_map、no_split_module_classes、 offload_folder、 offload_state_dict\n- [借助 PyTorch，Accelerate 如何运行超大模型](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Faccelerate-large-models)\n- [使用 DeepSpeed 和 Accelerate 进行超快 BLOOM 模型推理](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Fzh\u002Fbloom-inference-pytorch-scripts)\n- [LLM七种推理服务框架总结](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F653352979)\n- [LLM投机采样（Speculative Sampling）为何能加速模型推理](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F653734659)\n- [大模型推理妙招—投机采样（Speculative Decoding）](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F651359908)\n- https:\u002F\u002Fgithub.com\u002Fflexflow\u002FFlexFlow\u002Ftree\u002Finference\n- [TensorRT-LLM(3)--架构](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F665595557)\n- NLP（十八）：LLM 的推理优化技术纵览：https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F642412124\n- ​揭秘NVIDIA大模型推理框架：TensorRT-LLM：https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F680808866\n- [如何生成文本: 通过 Transformers 用不同的解码方法生成文本](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Fzh\u002Fhow-to-generate) | [How to generate text: using different decoding methods for language generation with Transformers](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Fhow-to-generate)\n- [DeepSeek-V3 \u002F R1 推理系统概览](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F27181462601)\n\n\n\n### 大模型推理优化技术\n\n\nKV Cache：\n- [图解大模型推理优化：KV Cache](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247486956&idx=1&sn=cd5e36857bbd8ebd750d2c172550d2bd&chksm=fd3be4bfca4c6da9f2276310995c7d60a42c0d01a960a42a38226cf954bab0d2d2bd&token=1409805983&lang=zh_CN#rd)\n- [大模型推理百倍加速之KV cache篇](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247487886&idx=1&sn=38d3cd36c6c5acb2fe5c80ceffcba2cf&chksm=fd3bf8ddca4c71cb243566b593dfa095926b0836e812b4af9a5c10d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d3d......## LLM推理\n\n\n- [使用HuggingFace的Accelerate库加载和运行超大模型](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F605640431) : device_map、no_split_module_classes、 offload_folder、 offload_state_dict\n- [借助 PyTorch，Accelerate 如何运行超大模型](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Faccelerate-large-models)\n- [使用 DeepSpeed 和 Accelerate 进行超快 BLOOM 模型推理](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Fzh\u002Fbloom-inference-pytorch-scripts)\n- [LLM七种推理服务框架总结](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F653352979)\n- [LLM投机采样（Speculative Sampling）为何能加速模型推理](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F653734659)\n- [大模型推理妙招—投机采样（Speculative Decoding）](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F651359908)\n- https:\u002F\u002Fgithub.com\u002Fflexflow\u002FFlexFlow\u002Ftree\u002Finference\n- [TensorRT-LLM(3)--架构](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F665595557)\n- NLP（十八）：LLM 的推理优化技术纵览：https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F642412124\n- ​揭秘NVIDIA大模型推理框架：TensorRT-LLM：https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F680808866\n- [如何生成文本: 通过 Transformers 用不同的解码方法生成文本](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Fzh\u002Fhow-to-generate) | [How to generate text: using different decoding methods for language generation with Transformers](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Fhow-to-generate)\n- [DeepSeek-V3 \u002F R1 推理系统概览](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F27181462601)\n\n\n\n### 大模型推理优化技术\n\n\nKV Cache：\n- [图解大模型推理优化：KV Cache](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247486956&idx=1&sn=cd5e36857bbd8ebd750d2c172550d2bd&chksm=fd3be4bfca4c6da9f2276310995c7d60a42c0d01a960a42a38226cf954bab0d2d2bd&token=1409805983&lang=zh_CN#rd)\n- [大模型推理百倍加速之KV cache篇](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247487886&idx=1&sn=38d3cd36c6c5acb2fe5c80ceffcba2cf&chksm=fd3bf8ddca4c71cb243566b593dfa095926b0836e908b4af812b4afeb1151af1&token=1409805983&lang=zh_CN#rd)\n- [大模型推理加速：看图学KV Cache](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F662498827)\n- [大模型推理性能优化之KV Cache解读](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F630832593)\n\n\n解码优化：\n- [大模型推理妙招—投机采样（Speculative Decoding）](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F651359908)\n\n\n\n\n### vLLM\n\n- [vLLM（六）源码解读下 @HelloWorld](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F694442998)\n- [猛猿：图解大模型计算加速系列：vLLM源码解析1，整体架构](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F691045737)\n- [LLM推理2：vLLM源码学习 @ akaihaoshuai ](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F643336063)\n- [大模型推理框架 vLLM 源码解析（一）：框架概览](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F681402162)\n\n\n## LLM数据工程\n\n- [An Initial Exploration of Theoretical Support for Language Model Data Engineering. Part 1: Pretraining @\n符尧](https:\u002F\u002Fyaofu.notion.site\u002FAn-Initial-Exploration-of-Theoretical-Support-for-Language-Model-Data-Engineering-Part-1-Pretraini-dc480d9bf7ff4659afd8c9fb738086eb)\n\n\n\n## LLM压缩\n\n\n\n- [Awesome Model Quantization](https:\u002F\u002Fgithub.com\u002Fhtqin\u002Fawesome-model-quantization)\n- [Efficient-LLMs-Survey](https:\u002F\u002Fgithub.com\u002FAIoT-MLSys-Lab\u002FEfficient-LLMs-Survey)\n- [Awesome LLM Compression](https:\u002F\u002Fgithub.com\u002FHuangOwen\u002FAwesome-LLM-Compression)\n- [模型转换、模型压缩、模型加速工具汇总](https:\u002F\u002Fblog.csdn.net\u002FWZZ18191171661\u002Farticle\u002Fdetails\u002F99700992)\n- [AI 框架部署方案之模型转换](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F396781295)\n- [Pytorch 模型转 TensorRT (torch2trt 教程)](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F570822430)\n\n\n\n## LLM测评\n\n- [CLiB中文大模型能力评测榜单](https:\u002F\u002Fgithub.com\u002Fjeinlee1991\u002Fchinese-llm-benchmark)\n- [huggingface Open LLM Leaderboard](https:\u002F\u002Fhuggingface.co\u002Fspaces\u002FHuggingFaceH4\u002Fopen_llm_leaderboard)\n- HELM：https:\u002F\u002Fgithub.com\u002Fstanford-crfm\u002Fhelm\n- HELM：https:\u002F\u002Fcrfm.stanford.edu\u002Fhelm\u002Flatest\u002F\n- lm-evaluation-harness：https:\u002F\u002Fgithub.com\u002FEleutherAI\u002Flm-evaluation-harness\u002F\n- CLEVA：http:\u002F\u002Fwww.lavicleva.com\u002F#\u002Fhomepage\u002Foverview\n- CLEVA：https:\u002F\u002Fgithub.com\u002FLaVi-Lab\u002FCLEVA\u002Fblob\u002Fmain\u002FREADME_zh-CN.md\n\n\n\n## 提示工程\n\n\n- [做数据关键步骤：怎么写好prompt？](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247486771&idx=1&sn=359c029b010d7ad96fff33952ad634a8&chksm=fd3be460ca4c6d76b4996f971ff21080ca0a83f3042893bb6827752ad8af812b4afeb1151af1&token=1288418017&lang=zh_CN#rd)\n- [从1000+模板中总结出的10大提示工程方法助你成为提示词大师！](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247486174&idx=1&sn=97ddcd5fb44eb4e3143fa746b7d617c8&chksm=fd3be38dca4c6a9b94fb88bd3f7a5009dee53812412e6f62cd5e52489fb141d338&scene=21#wechat_redirect)\n- [一文搞懂提示工程的原理及前世今生](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247485231&idx=1&sn=acfa77264da611983a49297ab8376e8f&chksm=fd3bee7cca4c676a3ccbc459e70a9e99444b23d437cf49262cd5e52489fb141d338&scene=21#wechat_redirect)\n- [Effective Prompt: 编写高质量Prompt的14个有效方法](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247486087&idx=1&sn=118b82abd4b22975e9aeb9f23ed0c9c5&chksm=fd3be3d4ca4c6ac2b41f1c3e908b845d44977412e6f62cd5e52489fb141d338&token=1288418017&lang=zh_CN#rd)\n- [提示工程和提示构造技巧](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247487107&idx=1&sn=337325ee6a9a4d4c56821b1e759f1555&chksm=fd3be7d0ca4c6ec60b6394bf76282ee3eef6beccfe2c31885cbb111a5bdc32022ba346509681&token=1288418017&lang=zh_CN#rd)\n- [一文带你了解提示攻击！](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247485936&idx=1&sn=0bcc72e5bfeb50c437253626d763f67d&chksm=fd3be0a3ca4c69b52bba0e0f22730b497c56fad99444b23d437cf49262cd5e52489fb141d338&token=1288418017&lang=zh_CN#rd)\n\n\n\n## 综合\n\n- [通向AGI之路：大型语言模型（LLM）技术精要](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F597586623)\n- [大语言模型的涌现能力：现象与解释](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F621438653)\n- [NLP（十八）：LLM 的推理优化技术纵览](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F642412124)\n- [并行计算3：并行计算模型](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F568947162)\n- [大模型“幻觉”，看这一篇就够了 | 哈工大华为出品](https:\u002F\u002Fwww.thepaper.cn\u002FnewsDetail_forward_25344873)\n- [深入理解语言模型的困惑度(perplexity)](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F686808564)\n\n\n\n**safetensors**：\n\n- [bin和safetensors区别是什么？](https:\u002F\u002Fwww.zhihu.com\u002Fquestion\u002F629624037\u002Fanswer\u002F3307818120)\n- [Safetensors：保存模型权重的新格式](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F691446249)\n- [github: safetensors](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fsafetensors)\n- [huggingface: safetensors](https:\u002F\u002Fhuggingface.co\u002Fdocs\u002Fsafetensors\u002Findex)\n- [Safetensors: a simple, safe and faster way to store and distribute tensors.](https:\u002F\u002Fmedium.com\u002F@mandalsouvik\u002Fsafetensors-a-simple-and-safe-way-to-store-and-distribute-tensors-d9ba1931ba04)\n- https:\u002F\u002Fhuggingface.co\u002Fdocs\u002Fsafetensors\u002Findex\n- https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fsafetensors\u002Ftree\u002Fv0.3.3\n- [手把手教你：LLama2原始权重转HF模型](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F669158180)\n\n\n## AI框架\n\n\n\n\n### PyTorch\n\n- [PyTorch 源码解读系列](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F328674159) @ OpenMMLab 团队\n- [[源码解析] PyTorch 分布式](https:\u002F\u002Fjuejin.cn\u002Fpost\u002F7026144707591815175) @ 罗西的思考\n- [PyTorch 分布式(18) --- 使用 RPC 的分布式流水线并行](https:\u002F\u002Fjuejin.cn\u002Fpost\u002F7043601075307282462) @ 罗西的思考\n- [【Pytorch】model.train() 和 model.eval() 原理与用法](https:\u002F\u002Fblog.csdn.net\u002Fweixin_44211968\u002Farticle\u002Fdetails\u002F123774649)\n\n### DeepSpeed\n\n- [DeepSpeed使用指南(简略版)](https:\u002F\u002Fblog.csdn.net\u002Fweixin_43301333\u002Farticle\u002Fdetails\u002F127237122)\n- [关于Deepspeed的一些总结与心得](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F650824387)\n\n### Megatron-LM\n\n- [Megatron-LM 近期的改动](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F651192295)\n- [深入理解 Megatron-LM（1）基础知识](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F650234985) @ 简枫\n- [深入理解 Megatron-LM（2）原理介绍](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F650383289)\n- [[源码解析] 模型并行分布式训练Megatron (1) --- 论文 & 基础](https:\u002F\u002Fjuejin.cn\u002Fpost\u002F7057837676430360584) @ 罗西的思考\n- [[源码解析] 模型并行分布式训练Megatron (2) --- 整体架构](https:\u002F\u002Fjuejin.cn\u002Fpost\u002F7061942798957674504)\n- [[细读经典]Megatron论文和代码详细分析(1)](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F366906920) @迷途小书僮​\n- [[细读经典]Megatron论文和代码详细分析(2)](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F388830967)\n\n\n### Megatron-DeepSpeed\n\n\n### Huggingface Transformers\n\n\n\n\n## [AI基础知识](.\u002Fai-base.md)\n\n\n## AI基础设施\n\n### AI芯片\n\n- [业界AI加速芯片浅析（一）百度昆仑芯](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F593143821)\n- NVIDIA CUDA-X AI：https:\u002F\u002Fwww.nvidia.cn\u002Ftechnologies\u002Fcuda-x\u002F\n- [Intel，Nvidia，AMD三大巨头火拼GPU与CPU](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F629024100)\n- 处理器与AI芯片-Google-TPU：https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F646793355\n- [一文看懂国产AI芯片玩家](https:\u002F\u002Fwww.xckfsq.com\u002Fnews\u002Fshow.html?id=29187)\n- [深度 | 国产AI芯片，玩家几何](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzIwMzgzNTQ1Nw==&mid=2247599349&idx=1&sn=12459cbc418d3831d0c28e87ddb71b2f&scene=21#wechat_redirect)\n\n\n### CUDA\n\n- [CUDA编程入门（一）CUDA编程模型](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F97044592)\n- [CUDA编程入门（二）GPU硬件基础](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F97131966)\n- [GPU编程（CUDA）](https:\u002F\u002Fface2ai.com\u002Fprogram-blog\u002F)\n- [CUDA编程入门极简教程](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F34587739)\n\n\n\n\n## AI编译器\n\n- [TVM资料](https:\u002F\u002Fgithub.com\u002FBBuf\u002Ftvm_mlir_learn)\n- [AI编译器原理](https:\u002F\u002Fwww.bilibili.com\u002Fread\u002Fcv21242696\u002F?spm_id_from=333.999.0.0) @ZIMO酱\n\n\n## LLM应用开发\n\n- [动手学大模型应用开发](https:\u002F\u002Fgithub.com\u002Fdatawhalechina\u002Fllm-universe)\n- [langchain java](https:\u002F\u002Fgithub.com\u002FHamaWhiteGG\u002Flangchain-java)\n- [大模型主流应用RAG的介绍——从架构到技术细节](https:\u002F\u002Fluxiangdong.com\u002F2023\u002F09\u002F25\u002Fragone\u002F#\u002F%E5%86%99%E5%9C%A8%E5%89%8D%E9%9D%A2)\n- [基于检索的大语言模型和应用（陈丹琦）](https:\u002F\u002Facl2023-retrieval-lm.github.io\u002F)\n- [大模型bad case修复方案思考](https:\u002F\u002Fmp.weixin.qq.com\u002Fs\u002FxqFkfzHVnePf1ub_sCk9iw)\n- [《综述：全新大语言模型驱动的Agent》——4.5万字详细解读复旦NLP和米哈游最新Agent Survey](https:\u002F\u002Fzhuanlan.zhihu.com\u002Fp\u002F656676717)\n\n\n\n\n## LLMOps\n\n- [MLOps Landscape in 2023: Top Tools and Platforms](https:\u002F\u002Fneptune.ai\u002Fblog\u002Fmlops-tools-platforms-landscape)\n- [What Constitutes A Large Language Model Application?  ](https:\u002F\u002Fcobusgreyling.medium.com\u002Fwhat-constitutes-a-large-language-model-application-bacf81103475)：LLM Functionality Landscape\n- [AI System @吃果冻不吐果冻皮](https:\u002F\u002Fgithub.com\u002Fliguodongiot\u002Fai-system)\n\n\n\n\n## RAG\n\n- https:\u002F\u002Fgithub.com\u002Fhymie122\u002FRAG-Survey\n\n## 书籍\n\n- 大语言模型原理与工程 @杨青\n- [大语言模型从理论到实践](https:\u002F\u002Fintro-llm.github.io\u002Fchapter\u002FLLM-TAP.pdf) @张奇 ：https:\u002F\u002Fintro-llm.github.io\u002F\n- [动手学大模型](https:\u002F\u002Fgithub.com\u002FLordog\u002Fdive-into-llms?tab=readme-ov-file)\n\n## LLM实践\n\n- [minGPT @karpathy](https:\u002F\u002Fgithub.com\u002Fkarpathy\u002FminGPT)\n- [llm.c @karpathy](https:\u002F\u002Fgithub.com\u002Fkarpathy\u002Fllm.c): LLM training in simple, raw C\u002FCUDA\n- [LLM101n](https:\u002F\u002Fgithub.com\u002Fkarpathy\u002FLLM101n)\n- [llama2.c](https:\u002F\u002Fgithub.com\u002Fkarpathy\u002Fllama2.c): Inference Llama 2 in one file of pure C\n- [nanoGPT](https:\u002F\u002Fgithub.com\u002Fkarpathy\u002FnanoGPT)\n- [Baby-Llama2-Chinese](https:\u002F\u002Fgithub.com\u002FDLLXW\u002Fbaby-llama2-chinese)\n- [从0到1构建一个MiniLLM](https:\u002F\u002Fgithub.com\u002FTongjilibo\u002Fbuild_MiniLLM_from_scratch)\n- [gpt-fast](https:\u002F\u002Fgithub.com\u002Fpytorch-labs\u002Fgpt-fast) 、[blog](https:\u002F\u002Fpytorch.org\u002Fblog\u002Faccelerating-generative-ai-2\u002F)\n- [CSE 234: Data Systems for Machine Learning](https:\u002F\u002Fhao-ai-lab.github.io\u002Fcse234-w25\u002F)\n- [DSC 291: Machine Learning Systems](https:\u002F\u002Fhao-ai-lab.github.io\u002Fdsc291-s24\u002F)\n\n\n## 大模型汇总资料\n\n- [Awesome-Chinese-LLM](https:\u002F\u002Fgithub.com\u002FHqWu-HITCS\u002FAwesome-Chinese-LLM)\n- [Awesome-LLM-Survey](https:\u002F\u002Fgithub.com\u002FHqWu-HITCS\u002FAwesome-LLM-Survey)\n- [Large Language Model Course](https:\u002F\u002Fgithub.com\u002Fmlabonne\u002Fllm-course)\n- [Awesome-Quantization-Papers](https:\u002F\u002Fgithub.com\u002FZhen-Dong\u002FAwesome-Quantization-Papers)\n- [Awesome Model Quantization (GitHub)](https:\u002F\u002Fgithub.com\u002Fhtqin\u002Fawesome-model-quantization)\n- [Awesome Transformer Attention (GitHub)](https:\u002F\u002Fgithub.com\u002Fcmhungsteve\u002FAwesome-Transformer-Attention)\n- [语言模型数据选择综述](https:\u002F\u002Fgithub.com\u002Falon-albalak\u002Fdata-selection-survey)\n- [Awesome Knowledge Distillation of LLM Papers](https:\u002F\u002Fgithub.com\u002FTebmer\u002FAwesome-Knowledge-Distillation-of-LLMs)\n- [Awasome-Pruning @ghimiredhikura](https:\u002F\u002Fgithub.com\u002Fghimiredhikura\u002FAwasome-Pruning)\n- [Awesome-Pruning @he-y](https:\u002F\u002Fgithub.com\u002Fhe-y\u002FAwesome-Pruning)\n- [awesome-pruning @hrcheng1066](https:\u002F\u002Fgithub.com\u002Fhrcheng1066\u002Fawesome-pruning)\n- [Awesome-LLM-Inference](https:\u002F\u002Fgithub.com\u002FDefTruth\u002FAwesome-LLM-Inference)\n\n\n## 微信公众号文章集锦\n\n- [2024年2月大模型文章集锦](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247487320&idx=2&sn=522fdf838d4ec03f24dbc7a11a3a5a65&chksm=fd3be60bca4c6f1d0c9b0643db0d7334940fb592dac3b5fbf286c7232f6bb08b968fbd237a20&scene=21#wechat_redirect)\n- [2024年1月大模型文章集锦](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247487067&idx=2&sn=33594e6a82cf79a7580272c064635d75&chksm=fd3be708ca4c6e1ece0e1f6cc22bfd286bf3e9073350b91369b1d0e7fb52b50fac8113288e43&scene=21#wechat_redirect)\n- [2023年12月大模型文章集锦](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247486824&idx=2&sn=4faaac42f983af46cce44b35dd416c5f&chksm=fd3be43bca4c6d2d6f5fd1cf3004c37782d0b829111ad5ecd155d6cd3adedd40655653271ba1&scene=21#wechat_redirect)\n- [2023年6-11月大模型文章集锦](https:\u002F\u002Fmp.weixin.qq.com\u002Fs?__biz=MzU3Mzg5ODgxMg==&mid=2247486480&idx=2&sn=b6b504f9d67a3cdad5ba0eb68eee647b&chksm=fd3be543ca4c6c55e0c2fd335de92103a1aee4e5631be34f06d7557463bc7e339fb63680ad54&scene=21&poc_token=HCwA9WWjTC-CNeedW8iQ1lZwSAwg4fwWFAVcUnai)\n\n\n## 其他\n\n- [Hugging Face 博客](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fblog\u002Ftree\u002Fmain)","# llm-resource 快速上手指南\n\n`llm-resource`（LLM 百宝箱）并非一个可安装的软件库或框架，而是一个**大模型全栈优质资源汇总清单**。它旨在为开发者提供从算法原理、训练微调、推理优化到应用开发的全链路学习路径和资源索引。\n\n本指南将指导你如何高效利用该资源库进行学习和开发准备。\n\n## 1. 环境准备\n\n由于本项目是资源索引，无需特定的系统环境即可浏览。但为了实践资源中涉及的代码（如 Transformer 实现、模型微调、推理部署等），建议准备以下开发环境：\n\n*   **操作系统**: Linux (推荐 Ubuntu 20.04+) 或 macOS。Windows 用户建议使用 WSL2。\n*   **编程语言**: Python 3.8+\n*   **核心依赖**:\n    *   PyTorch 或 TensorFlow (根据具体资源选择)\n    *   Transformers (Hugging Face)\n    *   CUDA Toolkit (如需使用 GPU 加速，版本需与显卡驱动匹配)\n*   **网络环境**: 部分资源链接托管于 GitHub 或 Hugging Face，国内访问可能较慢，建议配置科学上网环境或使用国内镜像源。\n\n## 2. 获取资源\n\n你可以通过以下方式获取并浏览该资源列表：\n\n### 方式一：直接在线浏览（推荐）\n直接访问项目的 README 页面（通常在 GitHub 或相关文档平台），利用目录跳转快速查找所需领域的文章、论文或代码库链接。\n\n### 方式二：克隆仓库到本地\n如果你希望离线查阅或贡献内容，可以将项目克隆到本地：\n\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Fyour-repo\u002Fllm-resource.git\ncd llm-resource\n```\n*(注：请将上述 URL 替换为该项目实际的 GitHub 地址)*\n\n## 3. 基本使用指南\n\n`llm-resource` 的核心价值在于其分类清晰的目录结构。以下是针对不同需求的使用路径：\n\n### 场景 A：初学者入门原理\n如果你想理解大模型的基础架构：\n1.  打开目录中的 **🐼 [LLM 算法](#llm 算法)** 章节。\n2.  阅读 **Transformer** 子栏目下的“原理”类文章（如《Transformer 模型详解》）。\n3.  结合“源码”栏目中的 PyTorch\u002FTensorFlow 实现教程进行代码复现。\n\n### 场景 B：模型训练与微调\n如果你需要训练或微调自己的模型：\n1.  前往 **🐘 [LLM 训练](#llm 训练)** 章节。\n2.  **全量训练**: 参考“分布式训练 Playbook\"和显存计算相关文章。\n3.  **微调 (Fine-tuning)**: 查看 **LLM 微调** 子栏目，学习 P-Tuning 等技术。\n4.  **对齐 (Alignment)**: 在 **LLM 对齐** 子栏目中查找 RLHF、DPO 的论文解读及代码实践（如 MOSS-RLHF）。\n\n### 场景 C：推理加速与部署\n如果你关注模型上线后的性能优化：\n1.  进入 **🔥 [LLM 推理](#llm 推理)** 章节。\n2.  学习 **KV Cache**、**投机采样 (Speculative Decoding)** 等优化技术原理。\n3.  参考 **vLLM**、**TensorRT-LLM** 等框架的源码解析和架构介绍，选择合适的推理引擎。\n4.  查看 **📡 [LLM 压缩](#llm 压缩)** 章节，了解量化（Quantization）和模型转换工具。\n\n### 场景 D：跟进前沿技术\n如果你想了解最新的技术动态：\n1.  查阅 **MOE 大模型**、**多模态大模型** 及 **下一代大模型** 章节。\n2.  阅读其中收录的最新论文（ArXiv）和技术博客，了解 Mixtral、NExT-GPT 等前沿架构。\n\n## 4. 参与贡献\n\n该项目鼓励社区共同参与建设。如果你发现了优质的新资源：\n1.  遵循现有的 Markdown 目录结构。\n2.  将新的链接添加到对应的分类下（例如将新的微调教程放入 `LLM 微调`）。\n3.  提交 Pull Request 至项目仓库。","某初创公司算法团队正计划基于 MoE 架构微调开源大模型以构建垂直领域客服助手，但在技术选型与资料搜集阶段陷入困境。\n\n### 没有 llm-resource 时\n- **资料分散难检索**：团队成员需分别在 GitHub、知乎、公众号及论文库中碎片化搜索\"MoE 并行训练”或\"LLM 量化”教程，耗费数天仍难以拼凑完整知识图谱。\n- **源码实现无头绪**：面对复杂的 Transformer 变体，缺乏从原理图解到 PyTorch\u002FTensorFlow 逐行源码解析的连贯指引，导致复现经典模型（如 GPT-2）时频频报错。\n- **前沿动态易遗漏**：难以及时获取如 NExT-GPT 等多模态最新进展或国产芯片适配方案，技术栈规划滞后于社区发展。\n- **试错成本高昂**：因缺乏系统的测评基准与数据工程指南，团队在数据清洗格式和推理加速方案上反复踩坑，严重拖慢项目进度。\n\n### 使用 llm-resource 后\n- **全栈资源一站式获取**：直接通过目录定位到\"MOE 大模型”与\"LLM 推理”板块，瞬间获得从理论详解到分布式并行技术的精选文章列表，调研时间缩短 80%。\n- **原理代码无缝衔接**：利用\"Transformer 源码详解”与\"nanoGPT\"等链接，成员可对照高质量博客快速理解注意力机制并跑通基线代码，大幅降低入门门槛。\n- **技术视野同步前沿**：借助“下一代大模型”与“多模态”专栏，团队迅速掌握 Any-to-Any 架构趋势，及时调整技术路线以兼容未来需求。\n- **避坑指南精准导航**：参考\"LLM 测评”与“数据工程”中的最佳实践，快速选定高效的数据处理流水线与推理后端，显著减少无效实验次数。\n\nllm-resource 将散落的珍珠串成项链，让开发者从“大海捞针”的信息焦虑中解脱，专注于核心算法的创新与落地。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fliguodongiot_llm-resource_dea90f22.png","liguodongiot","Jelly Lee","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fliguodongiot_05ceedb1.png","LLM\u002FLLMOps，博客：吃果冻不吐果冻皮\r\n",null,"Chengdu, China","liguodongiot@foxmail.com","https:\u002F\u002Fliguodong.blog.csdn.net","https:\u002F\u002Fgithub.com\u002Fliguodongiot",[82],{"name":83,"color":84,"percentage":85},"Shell","#89e051",100,697,79,"2026-04-09T13:57:38","Apache-2.0",1,"","未说明",{"notes":94,"python":92,"dependencies":95},"该仓库（llm-resource）并非一个可执行的软件工具或框架，而是一个大模型（LLM）全栈优质资源的汇总列表（README）。它主要收集了关于算法原理、训练、微调、推理、数据工程、压缩、测评等领域的文章、论文和开源项目链接。因此，该仓库本身没有特定的操作系统、GPU、内存、Python 版本或依赖库要求。用户需根据列表中引用的具体子项目（如 vLLM, DeepSpeed, Transformers 等）去查阅各自的环境需求。",[],[14,35],[98,99],"llm","llmops","2026-03-27T02:49:30.150509","2026-04-10T11:24:48.483225",[],[]]