[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-ghimiresunil--LLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing":3,"tool-ghimiresunil--LLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",150037,2,"2026-04-10T23:33:47",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",108322,"2026-04-10T11:39:34",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[52,13,15,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":32,"last_commit_at":59,"category_tags":60,"status":17},4721,"markitdown","microsoft\u002Fmarkitdown","MarkItDown 是一款由微软 AutoGen 团队打造的轻量级 Python 工具，专为将各类文件高效转换为 Markdown 格式而设计。它支持 PDF、Word、Excel、PPT、图片（含 OCR）、音频（含语音转录）、HTML 乃至 YouTube 链接等多种格式的解析，能够精准提取文档中的标题、列表、表格和链接等关键结构信息。\n\n在人工智能应用日益普及的今天，大语言模型（LLM）虽擅长处理文本，却难以直接读取复杂的二进制办公文档。MarkItDown 恰好解决了这一痛点，它将非结构化或半结构化的文件转化为模型“原生理解”且 Token 效率极高的 Markdown 格式，成为连接本地文件与 AI 分析 pipeline 的理想桥梁。此外，它还提供了 MCP（模型上下文协议）服务器，可无缝集成到 Claude Desktop 等 LLM 应用中。\n\n这款工具特别适合开发者、数据科学家及 AI 研究人员使用，尤其是那些需要构建文档检索增强生成（RAG）系统、进行批量文本分析或希望让 AI 助手直接“阅读”本地文件的用户。虽然生成的内容也具备一定可读性，但其核心优势在于为机器",93400,"2026-04-06T19:52:38",[52,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":72,"owner_name":73,"owner_avatar_url":74,"owner_bio":75,"owner_company":76,"owner_location":77,"owner_email":78,"owner_twitter":79,"owner_website":80,"owner_url":81,"languages":82,"stars":98,"forks":99,"last_commit_at":100,"license":101,"difficulty_score":32,"env_os":102,"env_gpu":103,"env_ram":103,"env_deps":104,"category_tags":107,"github_topics":108,"view_count":32,"oss_zip_url":79,"oss_zip_packed_at":79,"status":17,"created_at":118,"updated_at":119,"faqs":120,"releases":121},6465,"ghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing","LLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing","LLM-PowerHouse: Unleash LLMs' potential through curated tutorials, best practices, and ready-to-use code for custom training and inferencing.","LLM-PowerHouse 是一个专为大型语言模型（LLM）定制训练与推理打造的开源指南库。它致力于解决开发者在从理论入门到落地生产全过程中面临的资源分散、实践门槛高以及最佳实践缺失等痛点。通过整合精心策划的教程、行业公认的最佳实践以及开箱即用的代码示例，该项目帮助用户充分释放大模型的潜力。\n\n无论是希望夯实数学与神经网络基础的学习者，还是专注于模型微调、对齐及压缩优化的研究人员，亦或是需要构建包含 RAG（检索增强生成）、部署与安全机制的生产级应用的工程师，都能在此找到针对性的指导。其独特亮点在于结构化的学习路径设计：用户可根据目标快速导航，从底层原理直达可运行的代码库；同时涵盖了从数据集探索、评估指标分析到成本优化策略的深度文章。LLM-PowerHouse 不仅提供了丰富的技术文档，更通过实战案例连接了理论与应用，是构建智能自然语言处理应用的理想资源站。","\u003Cdiv align=\"center\">\n  \u003Ch1>🗣️ LLM PowerHouse\u003C\u002Fh1>\n  \u003Cp>\n    \u003Cp>\n  \u003Ca href=\"https:\u002F\u002Ftwitter.com\u002FGhimire12Sunil\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002Ftwitter-%231DA1F2.svg?&style=for-the-badge&logo=twitter&logoColor=white\" height=25>\u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Fghimiresunil\u002F\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002Flinkedin-%230077B5.svg?&style=for-the-badge&logo=linkedin&logoColor=white\" height=25>\u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fwww.instagram.com\u002F_tech_tutor\u002F\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002Finstagram-%23E4405F.svg?&style=for-the-badge&logo=instagram&logoColor=white\" height=25>\u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fsunilghimire.com.np\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002Fwebsite-25D366?style=for-the-badge&logo=website&logoColor=white\" height=25>\u003C\u002Fa>\n  \u003C\u002Fp>\n   \u003Cp>\u003Cem>Unleash LLMs' potential through curated tutorials, best practices, and ready-to-use code for custom training and inferencing.\u003C\u002Fem>\u003C\u002Fp>\n\u003C\u002Fdiv>\n\n# Overview\nWelcome to LLM-PowerHouse, your ultimate resource for unleashing the full potential of Large Language Models (LLMs) with custom training and inferencing. This GitHub repository is a comprehensive and curated guide designed to empower developers, researchers, and enthusiasts to harness the true capabilities of LLMs and build intelligent applications that push the boundaries of natural language understanding.\n\n# Quick Navigation\n\n## Start by goal\n- 🧠 Learn fundamentals → [Foundations of LLMs](#foundations-of-llms)\n- 🧪 Train & align models → [Unlock the Art of LLM Science](#unlock-the-art-of-llm-science)\n- 🏭 Build production apps (RAG, deployment, security) → [Building Production-Ready LLM Applications](#building-production-ready-llm-applications)\n- 📚 Browse all topic guides → [In-Depth Articles](#in-depth-articles)\n- 💻 Jump to runnable examples → [Codebase Mastery: Building with Perfection](#codebase-mastery-building-with-perfection)\n- 🗂️ Explore datasets quickly → [LLM Datasets](#llm-datasets)\n\n## Repository map\n- [Articles](.\u002FArticles)\n- [Example codebase](.\u002Fexample_codebase)\n- [Dataset](.\u002Fdataset)\n- [License](.\u002FLICENSE)\n\n## Full Table of Contents\n- [Foundations of LLMs](#foundations-of-llms)\n- [Unlock the Art of LLM Science](#unlock-the-art-of-llm-science)\n- [Building Production-Ready LLM Applications](#building-production-ready-llm-applications)\n- [In-Depth Articles](#in-depth-articles)\n    - [NLP](#nlp)\n    - [Models](#models)\n    - [Training](#training)\n    - [Enhancing Model Compression: Inference and Training Optimization Strategies](#enhancing-model-compression-inference-and-training-optimization-strategies)\n    - [Evaluation Metrics](#evaluation-metrics)\n    - [Open LLMs](#open-llms)\n    - [Resources for cost analysis and network visualization](#resources-for-cost-analysis-and-network-visualization)\n- [Codebase Mastery: Building with Perfection](#codebase-mastery-building-with-perfection)\n- [LLM PlayLab](#llm-playlab)\n- [LLM Datasets](#llm-datasets)\n- [LLM Alignment](#llm-alignment)\n- [Data Generation](#data-generation)\n- [What I am learning](#what-i-am-learning)\n- [Contributing](#contributing)\n- [License](#license)\n- [About The Author](#about-the-author)\n\n## Foundations of LLMs\n\nThis section offers fundamental insights into mathematics, Python, and neural networks. It may not be the ideal starting point, but you can consult it whenever necessary.\n\n\u003Cdetails>\n\u003Csummary>⬇️ Ready to Embrace Foundations of LLMs? ⬇️ \u003C\u002Fsummary>\n\n```mermaid\ngraph LR\n    Foundations[\"📚 Foundations of Large Language Models (LLMs)\"] --> ML[\"1️⃣ Mathematics for Machine Learning\"]\n    Foundations[\"📚 Foundations of Large Language Models (LLMs)\"] --> Python[\"2️⃣ Python for Machine Learning\"]\n    Foundations[\"📚 Foundations of Large Language Models (LLMs)\"] --> NN[\"3️⃣ Neural Networks\"]\n    Foundations[\"📚 Foundations of Large Language Models (LLMs)\"] --> NLP[\"4️⃣ Natural Language Processing (NLP)\"]\n    \n    ML[\"1️⃣ Mathematics for Machine Learning\"] --> LA[\"📐 Linear Algebra\"]\n    ML[\"1️⃣ Mathematics for Machine Learning\"] --> Calculus[\"📏 Calculus\"]\n    ML[\"1️⃣ Mathematics for Machine Learning\"] --> Probability[\"📊 Probability & Statistics\"]\n    \n    Python[\"2️⃣ Python for Machine Learning\"] --> PB[\"🐍 Python Basics\"]\n    Python[\"2️⃣ Python for Machine Learning\"] --> DS[\"📊 Data Science Libraries\"]\n    Python[\"2️⃣ Python for Machine Learning\"] --> DP[\"🔄 Data Preprocessing\"]\n    Python[\"2️⃣ Python for Machine Learning\"] --> MLL[\"🤖 Machine Learning Libraries\"]\n    \n    NN[\"3️⃣ Neural Networks\"] --> Fundamentals[\"🔧 Fundamentals\"]\n    NN[\"3️⃣ Neural Networks\"] --> TO[\"⚙️ Training & Optimization\"]\n    NN[\"3️⃣ Neural Networks\"] --> Overfitting[\"📉 Overfitting\"]\n    NN[\"3️⃣ Neural Networks\"] --> MLP[\"🧠 Implementation of MLP\"]\n    \n    NLP[\"4️⃣ Natural Language Processing (NLP)\"] --> TP[\"📝 Text Preprocessing\"]\n    NLP[\"4️⃣ Natural Language Processing (NLP)\"] --> FET[\"🔍 Feature Extraction Techniques\"]\n    NLP[\"4️⃣ Natural Language Processing (NLP)\"] --> WE[\"🌐 Word Embedding\"]\n    NLP[\"4️⃣ Natural Language Processing (NLP)\"] --> RNN[\"🔄 Recurrent Neural Network\"]\n\n```\n\n### 1. Mathematics for Machine Learning\n\nBefore mastering machine learning, it's essential to grasp the fundamental mathematical concepts that underpin these algorithms.\n\n| Concept                    | Description |\n|----------------------------|-------------|\n| **Linear Algebra**         | Crucial for understanding many algorithms, especially in deep learning. Key concepts include vectors, matrices, determinants, eigenvalues, eigenvectors, vector spaces, and linear transformations. |\n| **Calculus**               | Important for optimizing continuous functions in many machine learning algorithms. Essential topics include derivatives, integrals, limits, series, multivariable calculus, and gradients. |\n| **Probability and Statistics** | Vital for understanding how models learn from data and make predictions. Key concepts encompass probability theory, random variables, probability distributions, expectations, variance, covariance, correlation, hypothesis testing, confidence intervals, maximum likelihood estimation, and Bayesian inference. |\n\n#### Further Exploration\n\n| Reference | Description | Link |\n| --- | --- | :---: |\n| 3Blue1Brown - The Essence of Linear Algebra | Offers a series of videos providing geometric intuition to fundamental linear algebra concepts. | [🔗](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=fNk_zzaMoSs&list=PLZHQObOWTQDPD3MizzM2xVFitgF8hE_ab) |\n| StatQuest with Josh Starmer - Statistics Fundamentals | Provides clear and straightforward explanations for various statistical concepts through video tutorials. | [🔗](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=qBigTkBLU6g&list=PLblh5JKOoLUK0FLuzwntyYI10UQFUhsY9) |\n| AP Statistics Intuition by Ms Aerin | Curates a collection of Medium articles offering intuitive insights into different probability distributions. | [🔗](https:\u002F\u002Fautomata88.medium.com\u002Flist\u002Fcacc224d5e7d) |\n| Immersive Linear Algebra | Presents an alternative visual approach to understanding linear algebra concepts. | [🔗](https:\u002F\u002Fimmersivemath.com\u002Fila\u002Flearnmore.html) |\n| Khan Academy - Linear Algebra | Tailored for beginners, this resource provides intuitive explanations for fundamental linear algebra topics. | [🔗](https:\u002F\u002Fwww.khanacademy.org\u002Fmath\u002Flinear-algebra) |\n| Khan Academy - Calculus | Delivers an interactive course covering the essentials of calculus comprehensively. | [🔗](https:\u002F\u002Fwww.khanacademy.org\u002Fmath\u002Fcalculus-1) |\n| Khan Academy - Probability and Statistics | Offers easy-to-follow material for learning probability and statistics concepts. | [🔗](https:\u002F\u002Fwww.khanacademy.org\u002Fmath\u002Fstatistics-probability) |\n\n### 2. Python for Machine Learning\n\n| Concept | Description |\n| --- | --- |\n| **Python Basics** | Mastery of Python programming entails understanding its basic syntax, data types, error handling, and object-oriented programming principles. |\n| **Data Science Libraries** | Familiarity with essential libraries such as NumPy for numerical operations, Pandas for data manipulation, and Matplotlib and Seaborn for data visualization is crucial for effective data analysis. |\n| **Data Preprocessing** | This phase involves crucial tasks such as feature scaling, handling missing data, outlier detection, categorical data encoding, and data partitioning into training, validation, and test sets to ensure data quality and model performance. |\n| **Machine Learning Libraries** | Proficiency with Scikit-learn, a comprehensive library for machine learning, is indispensable. Understanding and implementing algorithms like linear regression, logistic regression, decision trees, random forests, k-nearest neighbors (K-NN), and K-means clustering are essential for building predictive models. Additionally, familiarity with dimensionality reduction techniques like PCA and t-SNE aids in visualizing complex data structures effectively. |\n\n#### Further Exploration\n\n| Reference | Description | Link |\n| --- | --- | :---: |\n| Real Python | A comprehensive resource offering articles and tutorials for both beginner and advanced Python concepts. | [🔗](https:\u002F\u002Frealpython.com\u002F) |\n| freeCodeCamp - Learn Python | A lengthy video providing a thorough introduction to all core Python concepts. | [🔗](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=rfscVS0vtbw) |\n| Python Data Science Handbook | A free digital book that is an excellent resource for learning pandas, NumPy, Matplotlib, and Seaborn. | [🔗](https:\u002F\u002Fjakevdp.github.io\u002FPythonDataScienceHandbook\u002F) |\n| freeCodeCamp - Machine Learning for Everybody | A practical introduction to various machine learning algorithms for beginners. | [🔗](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=i_LwzRVP7bg) |\n| Udacity - Intro to Machine Learning | An introductory course on machine learning for beginners, covering fundamental algorithms. | [🔗](https:\u002F\u002Fwww.udacity.com\u002Fcourse\u002Fintro-to-machine-learning--ud120) |\n\n\n### 3. Neural Networks\n\n| Concept                    | Description |\n|----------------------------|-------------|\n| Fundamentals | Understand the basic structure of a neural network, including layers, weights, biases, and activation functions like sigmoid, tanh, and ReLU. |\n| Training and Optimization | Learn about backpropagation and various loss functions such as Mean Squared Error (MSE) and Cross-Entropy. Become familiar with optimization algorithms like Gradient Descent, Stochastic Gradient Descent, RMSprop, and Adam. |\n| Overfitting | Grasp the concept of overfitting, where a model performs well on training data but poorly on unseen data, and explore regularization techniques like dropout, L1\u002FL2 regularization, early stopping, and data augmentation to mitigate it. |\n| Implement a Multilayer Perceptron (MLP) | Build a Multilayer Perceptron (MLP), also known as a fully connected network, using PyTorch. |\n\n#### Further Exploration\n\n| Reference | Description | Link |\n| --- | --- | :---: |\n| 3Blue1Brown - But what is a Neural Network? | This video provides an intuitive explanation of neural networks and their inner workings. | [🔗](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=aircAruvnKk) |\n| freeCodeCamp - Deep Learning Crash Course | This video efficiently introduces the most important concepts in deep learning. | [🔗](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=CS4cs9xVecg) |\n| Fast.ai - Practical Deep Learning | A free course designed for those with coding experience who want to learn about deep learning. | [🔗](https:\u002F\u002Fcourse.fast.ai\u002F) |\n| Patrick Loeber - PyTorch Tutorials | A series of videos for complete beginners to learn about PyTorch. | [🔗](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PLqnslRFeH2UrcDBWF5mfPGpqQDSta6VK4) |\n\n### 4. Natural Language Processing (NLP)\n\n| Concept | Description |\n|----------------------------|-------------|\n| Text Preprocessing | Learn various text preprocessing steps such as tokenization (splitting text into words or sentences), stemming (reducing words to their root form), lemmatization (similar to stemming but considers the context), and stop word removal. |\n| Feature Extraction Techniques | Become familiar with techniques to convert text data into a format understandable by machine learning algorithms. Key methods include Bag-of-Words (BoW), Term Frequency-Inverse Document Frequency (TF-IDF), and n-grams. |\n| Word Embeddings | Understand word embeddings, a type of word representation that allows words with similar meanings to have similar representations. Key methods include Word2Vec, GloVe, and FastText. |\n| Recurrent Neural Networks (RNNs) | Learn about RNNs, a type of neural network designed to work with sequence data, and explore LSTMs and GRUs, two RNN variants capable of learning long-term dependencies. |\n\n\n#### Further Exploration\n\n| Reference | Description | Link |\n| --- | --- | :---: |\n| RealPython - NLP with spaCy in Python | An exhaustive guide on using the spaCy library for NLP tasks in Python. | [🔗](https:\u002F\u002Frealpython.com\u002Fnatural-language-processing-spacy-python\u002F) |\n| Kaggle - NLP Guide | A collection of notebooks and resources offering a hands-on explanation of NLP in Python. | [🔗](https:\u002F\u002Fwww.kaggle.com\u002Flearn\u002Fnatural-language-processing) |\n| Jay Alammar - The Illustrated Word2Vec | A detailed reference for understanding the Word2Vec architecture. | [🔗](https:\u002F\u002Fjalammar.github.io\u002Fillustrated-word2vec\u002F) |\n| Jake Tae - PyTorch RNN from Scratch | A practical and straightforward implementation of RNN, LSTM, and GRU models in PyTorch. | [🔗](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PLhhyoLH6IjfwU8t8_eElRxa2GvGv8vV_h) |\n| colah's blog - Understanding LSTM Networks | A theoretical article explaining LSTM networks. | [🔗](https:\u002F\u002Fcolah.github.io\u002Fposts\u002F2015-08-Understanding-LSTMs\u002F) |\n\n\u003C\u002Fdetails>\n\u003Cbr>\n\n\n## Unlock the Art of LLM Science\n\nIn this segment of the curriculum, participants delve into mastering the creation of top-notch LLMs through cutting-edge methodologies.\n\n\u003Cdetails>\n\u003Csummary>⬇️ Ready to Embrace LLM Science? ⬇️ \u003C\u002Fsummary>\n\n```mermaid\ngraph LR\n    Scientist[\"Art of LLM Science 👩‍🔬\"] --> Architecture[\"The LLM architecture 🏗️\"]\n    Scientist[\"Art of LLM Science 👩‍🔬\"] --> Instruction[\"Building an instruction dataset 📚\"]\n    Scientist[\"Art of LLM Science 👩‍🔬\"] --> Pretraining[\"Pretraining models 🛠️\"]\n    Scientist[\"Art of LLM Science 👩‍🔬\"] --> FineTuning[\"Supervised Fine-Tuning 🎯\"]\n    Scientist[\"Art of LLM Science 👩‍🔬\"] --> RLHF[\"RLHF 🔍\"]\n    Scientist[\"Art of LLM Science 👩‍🔬\"] --> Evaluation[\"Evaluation 📊\"]\n    Scientist[\"Art of LLM Science 👩‍🔬\"] --> Quantization[\"Quantization ⚖️\"]\n    Scientist[\"Art of LLM Science 👩‍🔬\"] --> Trends[\"New Trends 📈\"]\n    Architecture[\"The LLM architecture 🏗️\"] --> HLV[\"High Level View 🔍\"]\n    Architecture[\"The LLM architecture 🏗️\"] --> Tokenization[\"Tokenization 🔠\"]\n    Architecture[\"The LLM architecture 🏗️\"] --> Attention[\"Attention Mechanisms 🧠\"]\n    Architecture[\"The LLM architecture 🏗️\"] --> Generation[\"Text Generation ✍️\"]\n    Instruction[\"Building an instruction dataset 📚\"] --> Alpaca[\"Alpaca-like dataset 🦙\"]\n    Instruction[\"Building an instruction dataset 📚\"] --> Advanced[\"Advanced Techniques 📈\"]\n    Instruction[\"Building an instruction dataset 📚\"] --> Filtering[\"Filtering Data 🔍\"]\n    Instruction[\"Building an instruction dataset 📚\"] --> Prompt[\"Prompt Templates 📝\"]\n    Pretraining[\"Pretraining models 🛠️\"] --> Pipeline[\"Data Pipeline 🚀\"]\n    Pretraining[\"Pretraining models 🛠️\"] --> CLM[\"Casual Language Modeling 📝\"]\n    Pretraining[\"Pretraining models 🛠️\"] --> Scaling[\"Scaling Laws 📏\"]\n    Pretraining[\"Pretraining models 🛠️\"] --> HPC[\"High-Performance Computing 💻\"]\n    FineTuning[\"Supervised Fine-Tuning 🎯\"] --> Full[\"Full fine-tuning 🛠️\"]\n    FineTuning[\"Supervised Fine-Tuning 🎯\"] --> Lora[\"Lora and QLoRA 🌀\"]\n    FineTuning[\"Supervised Fine-Tuning 🎯\"] --> Axoloti[\"Axoloti 🦠\"]\n    FineTuning[\"Supervised Fine-Tuning 🎯\"] --> DeepSpeed[\"DeepSpeed ⚡\"]\n    RLHF[\"RLHF 🔍\"] --> Preference[\"Preference Datasets 📝\"]\n    RLHF[\"RLHF 🔍\"] --> Optimization[\"Proximal Policy Optimization 🎯\"]\n    RLHF[\"RLHF 🔍\"] --> DPO[\"Direct Preference Optimization 📈\"]\n    Evaluation[\"Evaluation 📊\"] --> Traditional[\"Traditional Metrics 📏\"]\n    Evaluation[\"Evaluation 📊\"] --> General[\"General Benchmarks 📈\"]\n    Evaluation[\"Evaluation 📊\"] --> Task[\"Task-specific Benchmarks 📋\"]\n    Evaluation[\"Evaluation 📊\"] --> HF[\"Human Evaluation 👩‍🔬\"]\n    Quantization[\"Quantization ⚖️\"] --> Base[\"Base Techniques 🛠️\"]\n    Quantization[\"Quantization ⚖️\"] --> GGUF[\"GGUF and llama.cpp 🐐\"]\n    Quantization[\"Quantization ⚖️\"] --> GPTQ[\"GPTQ and EXL2 🤖\"]\n    Quantization[\"Quantization ⚖️\"] --> AWQ[\"AWQ 🚀\"]\n    Trends[\"New Trends 📈\"] --> Positional[\"Positional Embeddings 🎯\"]\n    Trends[\"New Trends 📈\"] --> Merging[\"Model Merging 🔄\"]\n    Trends[\"New Trends 📈\"] --> MOE[\"Mixture of Experts 🎭\"]\n    Trends[\"New Trends 📈\"] --> Multimodal[\"Multimodal Models 📷\"]\n```    \n\n### 1. The LLM architecture 🏗️\n\nAn overview of the Transformer architecture, with emphasis on inputs (tokens) and outputs (logits), and the importance of understanding the vanilla attention mechanism and its improved versions.\n\n| Concept | Description |\n| --- | --- |\n| Transformer Architecture (High-Level) | Review encoder-decoder Transformers, specifically the decoder-only GPT architecture used in modern LLMs. |\n| Tokenization | Understand how raw text is converted into tokens (words or subwords) for the model to process. |\n| Attention Mechanisms | Grasp the theory behind attention, including self-attention and scaled dot-product attention, which allows the model to focus on relevant parts of the input during output generation.|\n| Text Generation | Learn different methods the model uses to generate output sequences. Common strategies include greedy decoding, beam search, top-k sampling, and nucleus sampling.|\n\n#### Further Exploration\n\n| Reference | Description | Link |\n| --- | --- | :---: |\n| The Illustrated Transformer by Jay Alammar | A visual and intuitive explanation of the Transformer model | [🔗](https:\u002F\u002Fjalammar.github.io\u002Fillustrated-transformer\u002F) |\n| The Illustrated GPT-2 by Jay Alammar | Focuses on the GPT architecture, similar to Llama's. | [🔗](https:\u002F\u002Fjalammar.github.io\u002Fillustrated-gpt2\u002F) |\n| Visual intro to Transformers by 3Blue1Brown | Simple visual intro to Transformers | [🔗](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=wjZofJX0v4M&t=187s) |\n| LLM Visualization by Brendan Bycroft | 3D visualization of LLM internals | [🔗](https:\u002F\u002Fbbycroft.net\u002Fllm) |\n| nanoGPT by Andrej Karpathy | Reimplementation of GPT from scratch (for programmers) | [🔗](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=kCc8FmEb1nY) |\n| Decoding Strategies in LLMs | Provides code and visuals for decoding strategies | [🔗](https:\u002F\u002Fmlabonne.github.io\u002Fblog\u002Fposts\u002F2023-06-07-Decoding_strategies.html) |\n\n\n### 2. Building an instruction dataset 📚\n\nWhile it's easy to find raw data from Wikipedia and other websites, it's difficult to collect pairs of instructions and answers in the wild. Like in traditional machine learning, the quality of the dataset will directly influence the quality of the model, which is why it might be the most important component in the fine-tuning process.\n\n| Concept | Description |\n| --- | --- |\n| [Alpaca](https:\u002F\u002Fcrfm.stanford.edu\u002F2023\u002F03\u002F13\u002Falpaca.html)-like dataset | This dataset generation method utilizes the OpenAI API (GPT) to synthesize data from scratch, allowing for the specification of seeds and system prompts to foster diversity within the dataset. |\n| Advanced techniques | Delve into methods for enhancing existing datasets with [Evol-Instruct](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.12244), and explore approaches for generating top-tier synthetic data akin to those outlined in the [Orca](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.02707) and [phi-1](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.11644) research papers.|\n| Filtering data | Employ traditional techniques such as regex, near-duplicate removal, and prioritizing answers with substantial token counts to refine datasets.|\n| Prompt templates  | Recognize the absence of a definitive standard for structuring instructions and responses, underscoring the importance of familiarity with various chat templates like [ChatML](https:\u002F\u002Flearn.microsoft.com\u002Fen-us\u002Fazure\u002Fai-services\u002Fopenai\u002Fhow-to\u002Fchatgpt?tabs=python&pivots=programming-language-chat-ml) and [Alpaca](https:\u002F\u002Fcrfm.stanford.edu\u002F2023\u002F03\u002F13\u002Falpaca.html).|\n\n#### Further Exploration\n\n| Reference | Description | Link |\n| --- | --- | :---:|\n| Preparing a Dataset for Instruction tuning by Thomas Capelle | Explores the Alpaca and Alpaca-GPT4 datasets and discusses formatting methods. | [🔗](https:\u002F\u002Fwandb.ai\u002Fcapecape\u002Falpaca_ft\u002Freports\u002FHow-to-Fine-Tune-an-LLM-Part-1-Preparing-a-Dataset-for-Instruction-Tuning--Vmlldzo1NTcxNzE2) |\n| Generating a Clinical Instruction Dataset by Solano Todeschini | Provides a tutorial on creating a synthetic instruction dataset using GPT-4. | [🔗](https:\u002F\u002Fmedium.com\u002Fmlearning-ai\u002Fgenerating-a-clinical-instruction-dataset-in-portuguese-with-langchain-and-gpt-4-6ee9abfa41ae) |\n| GPT 3.5 for news classification by Kshitiz Sahay | Demonstrates using GPT 3.5 to create an instruction dataset for fine-tuning Llama 2 in news classification. | [🔗](https:\u002F\u002Fmedium.com\u002F@kshitiz.sahay26\u002Fhow-i-created-an-instruction-dataset-using-gpt-3-5-to-fine-tune-llama-2-for-news-classification-ed02fe41c81f) |\n| Dataset creation for fine-tuning LLM | Notebook containing techniques to filter a dataset and upload the result. | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FDataset_creation_for_fine_tuning_LLM.ipynb) |\n| Chat Template by Matthew Carrigan | Hugging Face's page about prompt templates | [🔗](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Fchat-templates) |\n\n### 3. Pretraining models 🛠️\n\nPre-training, being both lengthy and expensive, is not the primary focus of this course. While it's beneficial to grasp the fundamentals of pre-training, practical experience in this area is not mandatory.\n\n| Concept | Description |\n| --- | --- |\n| Data pipeline | Pre-training involves handling vast datasets, such as the 2 trillion tokens used in [Llama 2](https:\u002F\u002Farxiv.org\u002Fabs\u002F2307.09288), which necessitates tasks like filtering, tokenization, and vocabulary preparation. |\n| Causal language modeling  | Understand the distinction between causal and masked language modeling, including insights into the corresponding loss functions. Explore efficient pre-training techniques through resources like [Megatron-LM](https:\u002F\u002Fgithub.com\u002FNVIDIA\u002FMegatron-LM) or [gpt-neox](https:\u002F\u002Fgithub.com\u002FEleutherAI\u002Fgpt-neox). |\n| Scaling laws  | Delve into the [scaling laws](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2001.08361), which elucidate the anticipated model performance based on factors like model size, dataset size, and computational resources utilized during training. |\n| High-Performance Computing  | While beyond the scope of this discussion, a deeper understanding of HPC becomes essential for those considering building their own LLMs from scratch, encompassing aspects like hardware selection and distributed workload management. |\n\n#### Further Exploration\n\n| Reference | Description | Link |\n| --- | --- | :---:|\n| LLMDataHub by Junhao Zhao | Offers a carefully curated collection of datasets tailored for pre-training, fine-tuning, and RLHF. | [🔗](https:\u002F\u002Fgithub.com\u002FZjh-819\u002FLLMDataHub) |\n| Training a causal language model from scratch by Hugging Face | Guides users through the process of pre-training a GPT-2 model from the ground up using the transformers library. | [🔗](https:\u002F\u002Fhuggingface.co\u002Flearn\u002Fnlp-course\u002Fchapter7\u002F6?fw=pt) |\n| TinyLlama by Zhang et al. | Provides insights into the training process of a Llama model from scratch, offering a comprehensive understanding. | [🔗](https:\u002F\u002Fgithub.com\u002Fjzhang38\u002FTinyLlama) |\n| Causal language modeling by Hugging Face | Explores the distinctions between causal and masked language modeling, alongside a tutorial on efficiently fine-tuning a DistilGPT-2 model. | [🔗](https:\u002F\u002Fhuggingface.co\u002Fdocs\u002Ftransformers\u002Ftasks\u002Flanguage_modeling) |\n| Chinchilla's wild implications by nostalgebraist | Delves into the scaling laws and their implications for LLMs, offering valuable insights into their broader significance. | [🔗](https:\u002F\u002Fwww.lesswrong.com\u002Fposts\u002F6Fpvch8RR29qLEWNH\u002Fchinchilla-s-wild-implications) |\n| BLOOM by BigScience | Provides a comprehensive overview of the BLOOM model's construction, offering valuable insights into its engineering aspects and encountered challenges. | [🔗](https:\u002F\u002Fbigscience.notion.site\u002FBLOOM-BigScience-176B-Model-ad073ca07cdf479398d5f95d88e218c4) |\n| OPT-175 Logbook by Meta | Offers research logs detailing the successes and failures encountered during the pre-training of a large language model with 175B parameters. | [🔗](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fmetaseq\u002Fblob\u002Fmain\u002Fprojects\u002FOPT\u002Fchronicles\u002FOPT175B_Logbook.pdf) |\n| LLM 360 | Presents a comprehensive framework for open-source LLMs, encompassing training and data preparation code, datasets, evaluation metrics, and models. | [🔗](https:\u002F\u002Fwww.llm360.ai\u002F) |\n\n### 4. Supervised Fine-Tuning 🎯\nPre-trained models are trained to predict the next word, so they're not great as assistants. But with SFT, you can adjust them to follow instructions. Plus, you can fine-tune them on different data, even private stuff GPT-4 hasn't seen, and use them without needing paid APIs like OpenAI's.\n\n| Concept | Description |\n| --- | --- |\n| Full fine-tuning | Full fine-tuning involves training all parameters in the model, though it's not the most efficient approach, it can yield slightly improved results. |\n| [LoRA](https:\u002F\u002Farxiv.org\u002Fabs\u002F2106.09685) | LoRA, a parameter-efficient technique (PEFT) based on low-rank adapters, focuses on training only these adapters rather than all model parameters. |\n| [QLoRA](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.14314) | QLoRA, another PEFT stemming from LoRA, also quantizes model weights to 4 bits and introduces paged optimizers to manage memory spikes efficiently. |\n| [Axolotl](https:\u002F\u002Fgithub.com\u002FOpenAccess-AI-Collective\u002Faxolotl) | Axolotl stands as a user-friendly and potent fine-tuning tool, extensively utilized in numerous state-of-the-art open-source models. |\n| [DeepSpeed](https:\u002F\u002Fwww.deepspeed.ai\u002F) | DeepSpeed facilitates efficient pre-training and fine-tuning of large language models across multi-GPU and multi-node settings, often integrated within Axolotl for enhanced performance. |\n\n#### Further Exploration\n| Reference | Description | Link |\n| --- | --- | :---: |\n| The Novice's LLM Training Guide by Alpin | Provides an overview of essential concepts and parameters for fine-tuning LLMs. | [🔗](https:\u002F\u002Frentry.org\u002Fllm-training)|\n| LoRA insights by Sebastian Raschka | Offers practical insights into LoRA and guidance on selecting optimal parameters. | [🔗](https:\u002F\u002Flightning.ai\u002Fpages\u002Fcommunity\u002Flora-insights\u002F)|\n| Fine-Tune Your Own Llama 2 Model | Presents a hands-on tutorial on fine-tuning a Llama 2 model using Hugging Face libraries. | [🔗](https:\u002F\u002Fmlabonne.github.io\u002Fblog\u002Fposts\u002FFine_Tune_Your_Own_Llama_2_Model_in_a_Colab_Notebook.html)|\n| Padding Large Language Models by Benjamin Marie | Outlines best practices for padding training examples in causal LLMs. | [🔗](https:\u002F\u002Ftowardsdatascience.com\u002Fpadding-large-language-models-examples-with-llama-2-199fb10df8ff)|\n\n### RLHF 🔍\nFollowing supervised fine-tuning, RLHF serves as a crucial step in harmonizing the LLM's responses with human expectations. This entails acquiring preferences from human or artificial feedback, thereby mitigating biases, implementing model censorship, or fostering more utilitarian behavior. RLHF is notably more intricate than SFT and is frequently regarded as discretionary.\n\n| Concept | Description |\n| --- | --- |\n| Preference datasets | Typically containing several answers with some form of ranking, these datasets are more challenging to produce than instruction datasets. | \n| [Proximal Policy Optimization](https:\u002F\u002Farxiv.org\u002Fabs\u002F1707.06347) | This algorithm utilizes a reward model to predict whether a given text is highly ranked by humans. It then optimizes the SFT model using a penalty based on KL divergence. |\n| [Direct Preference Optimization](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.18290) | DPO simplifies the process by framing it as a classification problem. It employs a reference model instead of a reward model (requiring no training) and only necessitates one hyperparameter, rendering it more stable and efficient. |\n\n#### Further Exploration\n\n| Reference | Description | Link |\n| --- | --- | :---: |\n| An Introduction to Training LLMs using RLHF by Ayush Thakur | Explain why RLHF is desirable to reduce bias and increase performance in LLMs.| [🔗](https:\u002F\u002Fwandb.ai\u002Fayush-thakur\u002FIntro-RLAIF\u002Freports\u002FAn-Introduction-to-Training-LLMs-Using-Reinforcement-Learning-From-Human-Feedback-RLHF---VmlldzozMzYyNjcy)|\n| Illustration RLHF by Hugging Face | Introduction to RLHF with reward model training and fine-tuning with reinforcement learning. | [🔗](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Frlhf)|\n| StackLLaMA by Hugging Face | Tutorial to efficiently align a LLaMA model with RLHF using the transformers library | [🔗](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Fstackllama)|\n| LLM Training RLHF and Its Alternatives by Sebastian Rashcka | Overview of the RLHF process and alternatives like RLAIF. | [🔗](https:\u002F\u002Fmagazine.sebastianraschka.com\u002Fp\u002Fllm-training-rlhf-and-its-alternatives)|\n| Fine-tune Llama2 with DPO | Tutorial to fine-tune a Llama2 model with DPO| [🔗](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Fdpo-trl)|\n\n### 6. Evaluation 📊\n\nAssessing LLMs is an often overlooked aspect of the pipeline, characterized by its time-consuming nature and moderate reliability. Your evaluation criteria should be tailored to your downstream task, while bearing in mind Goodhart's law: \"When a measure becomes a target, it ceases to be a good measure.\"\n\n| Concept | Description |\n| --- | --- |\n| Traditional metrics | Metrics like perplexity and BLEU score, while less favored now due to their contextual limitations, remain crucial for comprehension and determining their applicable contexts. |\n| General benchmarks | The primary benchmark for general-purpose LLMs, such as ChatGPT, is the [Open LLM Leaderboard](https:\u002F\u002Fhuggingface.co\u002Fspaces\u002FHuggingFaceH4\u002Fopen_llm_leaderboard), which is founded on the [Language Model Evaluation Harness](https:\u002F\u002Fgithub.com\u002FEleutherAI\u002Flm-evaluation-harness). Other notable benchmarks include BigBench and MT-Bench. |\n| Task-specific benchmarks | Tasks like summarization, translation, and question answering boast dedicated benchmarks, metrics, and even subdomains (e.g., medical, financial), exemplified by [PubMedQA](https:\u002F\u002Fpubmedqa.github.io\u002F) for biomedical question answering. |\n| Human evaluation | The most dependable evaluation method entails user acceptance rates or human-comparison metrics. Additionally, logging user feedback alongside chat traces, facilitated by tools like [LangSmith](https:\u002F\u002Fdocs.smith.langchain.com\u002Fold\u002Fevaluation), aids in pinpointing potential areas for enhancement. |\n\n#### Further Evaluation\n\n| Reference | Description | Link |\n| --- | --- | :---: |\n| Perplexity of fixed-length models by Hugging Face | Provides an overview of perplexity along with code to implement it using the transformers library. | [🔗](https:\u002F\u002Fhuggingface.co\u002Fdocs\u002Ftransformers\u002Fperplexity)|\n| BLEU at your own risk by Rachael Tatman | Offers insights into the BLEU score, highlighting its various issues through examples. | [🔗](https:\u002F\u002Ftowardsdatascience.com\u002Fevaluating-text-output-in-nlp-bleu-at-your-own-risk-e8609665a213?gi=2f135d0032ac)|\n| A Survey on Evaluation of LLMs by Chang et al. | Presents a comprehensive paper covering what to evaluate, where to evaluate, and how to evaluate language models. | [🔗](https:\u002F\u002Farxiv.org\u002Fabs\u002F2307.03109)|\n| Chatbot Arena Leaderboard by lmsys | Showcases an Elo rating system for general-purpose language models, based on comparisons made by humans. | [🔗](https:\u002F\u002Fhuggingface.co\u002Fspaces\u002Flmsys\u002Fchatbot-arena-leaderboard)|\n\n### 7. Quantization ⚖️\n\nQuantization involves converting the weights (and activations) of a model to lower precision. For instance, weights initially stored using 16 bits may be transformed into a 4-bit representation. This technique has gained significance in mitigating the computational and memory expenses linked with LLMs\n\n| Concept | Description |\n| --- | --- |\n| Base techniques | Explore various levels of precision (FP32, FP16, INT8, etc.) and learn how to conduct naïve quantization using techniques like absmax and zero-point. | \n| GGUF and llama.cpp | Originally intended for CPU execution, [llama.cpp](https:\u002F\u002Fgithub.com\u002Fggerganov\u002Fllama.cpp) and the GGUF format have emerged as popular tools for running LLMs on consumer-grade hardware. |\n| GPTQ and EXL2 | [GPTQ](https:\u002F\u002Farxiv.org\u002Fabs\u002F2210.17323) and its variant, the [EXL2](https:\u002F\u002Fgithub.com\u002Fturboderp\u002Fexllamav2) format, offer remarkable speed but are limited to GPU execution. However, quantizing models using these formats can be time-consuming. |\n| AWQ | This newer format boasts higher accuracy compared to GPTQ, as indicated by lower perplexity, but demands significantly more VRAM and may not necessarily exhibit faster performance. |\n\n#### Further Exploration\n\n| Reference | Description | Link |\n| --- | --- | :---: |\n| Introduction to quantization | Offers an overview of quantization, including absmax and zero-point quantization, and demonstrates LLM.int8() with accompanying code. | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FModel%20Compression\u002FQuantization) |\n| Quantize Llama models with llama.cpp | Provides a tutorial on quantizing a Llama 2 model using llama.cpp and the GGUF format. | [🔗](https:\u002F\u002Fmlabonne.github.io\u002Fblog\u002Fposts\u002FQuantize_Llama_2_models_using_ggml.html) |\n| 4-bit LLM Quantization with GPTQ | Offers a tutorial on quantizing an LLM using the GPTQ algorithm with AutoGPTQ. | [🔗](https:\u002F\u002Fmlabonne.github.io\u002Fblog\u002Fposts\u002F4_bit_Quantization_with_GPTQ.html) |\n| ExLlamaV2 | Presents a guide on quantizing a Mistral model using the EXL2 format and running it with the ExLlamaV2 library, touted as the fastest library for LLMs. | [🔗](https:\u002F\u002Fmlabonne.github.io\u002Fblog\u002Fposts\u002FExLlamaV2_The_Fastest_Library_to_Run%C2%A0LLMs.html) |\n| Understanding Activation-Aware Weight Quantization by FriendliAI | Provides an overview of the AWQ technique and its associated benefits. | [🔗](https:\u002F\u002Fmedium.com\u002Ffriendliai\u002Funderstanding-activation-aware-weight-quantization-awq-boosting-inference-serving-efficiency-in-10bb0faf63a8) |\n\n\n### 8. New Trends 📈\n\n| Concept | Description |\n| --- | --- |\n| Positional embeddings | Explore how LLMs encode positions, focusing on relative positional encoding schemes like [RoPE](https:\u002F\u002Farxiv.org\u002Fabs\u002F2104.09864). Implement extensions to context length using techniques such as [YaRN](https:\u002F\u002Farxiv.org\u002Fabs\u002F2309.00071) (which multiplies the attention matrix by a temperature factor) or [ALiBi](https:\u002F\u002Farxiv.org\u002Fabs\u002F2108.12409) (applying attention penalty based on token distance). |\n| Model merging | Model merging has gained popularity as a method for creating high-performance models without additional fine-tuning. The widely-used [mergekit](https:\u002F\u002Fgithub.com\u002Farcee-ai\u002Fmergekit) library incorporates various merging methods including SLERP, [DARE](https:\u002F\u002Farxiv.org\u002Fabs\u002F2311.03099), and [TIES](https:\u002F\u002Farxiv.org\u002Fabs\u002F2311.03099). |\n| Mixture of Experts | The resurgence of the MoE architecture, exemplified by [Mixtral](https:\u002F\u002Farxiv.org\u002Fabs\u002F2401.04088), has led to the emergence of alternative approaches like frankenMoE, seen in community-developed models such as [Phixtral](https:\u002F\u002Fhuggingface.co\u002Fmlabonne\u002Fphixtral-2x2_8), offering cost-effective and high-performance alternatives. |\n| Multimodal models | These models, such as [CLIP](https:\u002F\u002Fopenai.com\u002Findex\u002Fclip\u002F), [Stable Diffusion](https:\u002F\u002Fstability.ai\u002Fstable-image), or [LLaVA](https:\u002F\u002Fllava-vl.github.io\u002F), process diverse inputs (text, images, audio, etc.) within a unified embedding space, enabling versatile applications like text-to-image generation. |\n| [glaive-function-calling-v2](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fglaiveai\u002Fglaive-function-calling-v2) | High-quality dataset with pairs of instructions and answers in different languages. \u003Cbr>See [Locutusque\u002Ffunction-calling-chatml](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FLocutusque\u002Ffunction-calling-chatml) for a variant without conversation tags. | Agent & Function calling |\n| [Agent-FLAN](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Finternlm\u002FAgent-FLAN)| Mix of AgentInstruct, ToolBench, and ShareGPT datasets.                                                                   | Agent & Function calling |\n\n#### Further Exploration\n\n| Reference | Description | Link |\n| --- | --- | :---: |\n| Extending the RoPE by EleutherAI | Article summarizing various position-encoding techniques. | [🔗](https:\u002F\u002Fblog.eleuther.ai\u002Fyarn\u002F) |\n| Understanding YaRN by Rajat Chawla | Introduction to YaRN. | [🔗](https:\u002F\u002Fmedium.com\u002F@rcrajatchawla\u002Funderstanding-yarn-extending-context-window-of-llms-3f21e3522465) |\n| Merge LLMs with mergekit | Tutorial on model merging using mergekit. | [🔗](https:\u002F\u002Fmlabonne.github.io\u002Fblog\u002Fposts\u002F2024-01-08_Merge_LLMs_with_mergekit.html) |\n| Mixture of Experts Explained by Hugging Face | Comprehensive guide on MoEs and their functioning. | [🔗](https:\u002F\u002Fhuyenchip.com\u002F2023\u002F10\u002F10\u002Fmultimodal.html) |\n| Large Multimodal Models by Chip Huyen: | Overview of multimodal systems and recent developments in the field. | [🔗](https:\u002F\u002Fhuyenchip.com\u002F2023\u002F10\u002F10\u002Fmultimodal.html) |\n\u003C\u002Fdetails>\n\u003Cbr>\n\n\n## Building Production-Ready LLM Applications\n\nLearn to create and deploy robust LLM-powered applications, focusing on model augmentation and practical deployment strategies for production environments.\n\n\u003Cdetails>\n\u003Csummary>⬇️ Ready to Build Production-Ready LLM Applications?⬇️ \u003C\u002Fsummary>\n\n```mermaid\ngraph LR\n    Scientist[\"Production-Ready LLM Applications 👩‍🔬\"] --> Architecture[\"Running LLMs 🏗️\"]\n    Scientist --> Storage[\"Building a Vector Storage 📦\"]\n    Scientist --> Retrieval[\"Retrieval Augmented Generation 🔍\"]\n    Scientist --> AdvancedRAG[\"Advanced RAG ⚙️\"]\n    Scientist --> Optimization[\"Inference Optimization ⚡\"]\n    Scientist --> Deployment[\"Deploying LLMs 🚀\"]\n    Scientist --> Secure[\"Securing LLMs 🔒\"]\n\n    Architecture --> APIs[\"LLM APIs 🌐\"]\n    Architecture --> OpenSource[\"Open Source LLMs 🌍\"]\n    Architecture --> PromptEng[\"Prompt Engineering 💬\"]\n    Architecture --> StructOutputs[\"Structure Outputs 🗂️\"]\n\n    Storage --> Ingest[\"Ingesting Documents 📥\"]\n    Storage --> Split[\"Splitting Documents ✂️\"]\n    Storage --> Embed[\"Embedding Models 🧩\"]\n    Storage --> VectorDB[\"Vector Databases 📊\"]\n\n    Retrieval --> Orchestrators[\"Orchestrators 🎼\"]\n    Retrieval --> Retrievers[\"Retrievers 🤖\"]\n    Retrieval --> Memory[\"Memory 🧠\"]\n    Retrieval --> Evaluation[\"Evaluation 📈\"]\n\n    AdvancedRAG --> Query[\"Query Construction 🔧\"]\n    AdvancedRAG --> Agents[\"Agents and Tools 🛠️\"]\n    AdvancedRAG --> PostProcess[\"Post Processing 🔄\"]\n    AdvancedRAG --> Program[\"Program LLMs 💻\"]\n\n    Optimization --> FlashAttention[\"Flash Attention ⚡\"]\n    Optimization --> KeyValue[\"Key-value Cache 🔑\"]\n    Optimization --> SpecDecoding[\"Speculative Decoding 🚀\"]\n\n    Deployment --> LocalDeploy[\"Local Deployment 🖥️\"]\n    Deployment --> DemoDeploy[\"Demo Deployment 🎤\"]\n    Deployment --> ServerDeploy[\"Server Deployment 🖧\"]\n    Deployment --> EdgeDeploy[\"Edge Deployment 🌐\"]\n\n    Secure --> PromptEngSecure[\"Prompt Engineering 🔐\"]\n    Secure --> Backdoors[\"Backdoors 🚪\"]\n    Secure --> Defensive[\"Defensive measures 🛡️\"]\n```\n\n### 1. Running LLMs\nRunning LLMs can be demanding due to significant hardware requirements. Based on your use case, you might opt to use a model through an API (like GPT-4) or run it locally. In either scenario, employing additional prompting and guidance techniques can improve and constrain the output for your applications.\n\n| **Category**            | **Details**                                                                                                                                                                                                                                                                                                        |\n|-------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n| **LLM APIs**            | APIs offer a convenient way to deploy LLMs. This space is divided between private LLMs ([OpenAI](https:\u002F\u002Fplatform.openai.com\u002F), [Google](https:\u002F\u002Fcloud.google.com\u002Fvertex-ai\u002Fdocs\u002Fgenerative-ai\u002Flearn\u002Foverview), [Anthropic](https:\u002F\u002Fdocs.anthropic.com\u002Fclaude\u002Freference\u002Fgetting-started-with-the-api), [Cohere](https:\u002F\u002Fdocs.cohere.com\u002Fdocs), etc.) and open-source LLMs ([OpenRouter](https:\u002F\u002Fopenrouter.ai\u002F), [Hugging Face](https:\u002F\u002Fhuggingface.co\u002Finference-api), [Together AI](https:\u002F\u002Fwww.together.ai\u002F), etc.). |\n| **Open-source LLMs**    | The [Hugging Face Hub](https:\u002F\u002Fhuggingface.co\u002Fmodels) is an excellent resource for finding LLMs. Some can be run directly in [Hugging Face Spaces](https:\u002F\u002Fhuggingface.co\u002Fspaces), or downloaded and run locally using apps like [LM Studio](https:\u002F\u002Flmstudio.ai\u002F) or through the command line interface with [llama.cpp](https:\u002F\u002Fgithub.com\u002Fggerganov\u002Fllama.cpp) or [Ollama](https:\u002F\u002Follama.ai\u002F).                     |\n| **Prompt Engineering**  | Techniques such as zero-shot prompting, few-shot prompting, chain of thought, and ReAct are commonly used in prompt engineering. These methods are more effective with larger models but can also be adapted for smaller ones.                                                                                       |\n| **Structuring Outputs** | Many tasks require outputs to be in a specific format, such as a strict template or JSON. Libraries like [LMQL](https:\u002F\u002Flmql.ai\u002F), [Outlines](https:\u002F\u002Fgithub.com\u002Foutlines-dev\u002Foutlines), and [Guidance](https:\u002F\u002Fgithub.com\u002Fguidance-ai\u002Fguidance) can help guide the generation process to meet these structural requirements.                       |\n\n#### Further Exploration\n\n| **Reference**                                                                                                          | **Description**                                                                                                           | **Link**   |\n|------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------|------------|\n| Run an LLM locally with LM Studio by Nisha Arya                                                                        | A brief guide on how to use LM Studio for running a local LLM.                                                            | [🔗](https:\u002F\u002Fwww.kdnuggets.com\u002Frun-an-llm-locally-with-lm-studio) |\n| Prompt engineering guide by DAIR.AI                                                                                    | An extensive list of prompt techniques with examples.                                                                     | [🔗](https:\u002F\u002Fwww.promptingguide.ai\u002F)                              |\n| Outlines - Quickstart                                                                                                  | A quickstart guide detailing the guided generation techniques enabled by the Outlines library.                            | [🔗](https:\u002F\u002Foutlines-dev.github.io\u002Foutlines\u002Fquickstart\u002F)        |\n| LMQL - Overview                                                                                                        | An introduction to the LMQL language, explaining its features and usage.                                                  | [🔗](https:\u002F\u002Flmql.ai\u002Fdocs\u002Flanguage\u002Foverview.html)                |\n\n### 2. Building a Vector Storage\n\nCreating a vector storage is the first step in building a Retrieval Augmented Generation (RAG) pipeline. This involves loading and splitting documents, and then using the relevant chunks to produce vector representations (embeddings) that are stored for future use during inference.\n\n| **Category**           | **Details**                                                                                                                                                                                                                       |\n|------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n| **Ingesting Documents**| Document loaders are convenient wrappers that handle various formats such as PDF, JSON, HTML, Markdown, etc. They can also retrieve data directly from some databases and APIs (e.g., GitHub, Reddit, Google Drive).                 |\n| **Splitting Documents**| Text splitters break down documents into smaller, semantically meaningful chunks. Instead of splitting text after a certain number of characters, it's often better to split by header or recursively, with some additional metadata. |\n| **Embedding Models**   | Embedding models convert text into vector representations, providing a deeper and more nuanced understanding of language, which is essential for performing semantic search.                                                       |\n| **Vector Databases**   | Vector databases (like [Chroma](https:\u002F\u002Fwww.trychroma.com\u002F), [Pinecone](https:\u002F\u002Fwww.pinecone.io\u002F), [Milvus](https:\u002F\u002Fmilvus.io\u002F), [FAISS](https:\u002F\u002Ffaiss.ai\u002F), [Annoy](https:\u002F\u002Fgithub.com\u002Fspotify\u002Fannoy), etc.) store embedding vectors and enable efficient retrieval of data based on vector similarity. |\n\n#### Further Exploration\n\n| **Reference**                                                                                                           | **Description**                                                                                                           | **Link**   |\n|------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------|------------|\n| LangChain - Text splitters                                                                                             | A list of different text splitters implemented in LangChain.                                                              | [🔗](https:\u002F\u002Fpython.langchain.com\u002Fdocs\u002Fmodules\u002Fdata_connection\u002Fdocument_transformers\u002F) |\n| Sentence Transformers library                                                                                          | A popular library for embedding models.                                                                                   | [🔗](https:\u002F\u002Fwww.sbert.net\u002F)                              |\n| MTEB Leaderboard                                                                                                       | Leaderboard for evaluating embedding models.                                                                              | [🔗](https:\u002F\u002Fhuggingface.co\u002Fspaces\u002Fmteb\u002Fleaderboard)        |\n| The Top 5 Vector Databases by Moez Ali                                                                                 | A comparison of the best and most popular vector databases.                                                               | [🔗](https:\u002F\u002Fwww.datacamp.com\u002Fblog\u002Fthe-top-5-vector-databases)                |\n\n### 3. Retrieval Augmented Generation\n\nUsing RAG, LLMs access relevant documents from a database to enhance the precision of their responses. This method is widely used to expand the model's knowledge base without the need for fine-tuning.\n\n| Category      | Details                                                                                                                                                                                                                                  |\n|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n| **Orchestrators** | Orchestrators (like [LangChain](https:\u002F\u002Fpython.langchain.com\u002Fdocs\u002Fget_started\u002Fintroduction), [LlamaIndex](https:\u002F\u002Fdocs.llamaindex.ai\u002Fen\u002Fstable\u002F), [FastRAG](https:\u002F\u002Fgithub.com\u002FIntelLabs\u002FfastRAG), etc.) are popular frameworks to connect your LLMs with tools, databases, memories, etc. and augment their abilities. |\n| **Retrievers**    | User instructions are not optimized for retrieval. Different techniques (e.g., multi-query retriever, [HyDE](https:\u002F\u002Farxiv.org\u002Fabs\u002F2212.10496), etc.) can be applied to rephrase\u002Fexpand them and improve performance.                                                      |\n| **Memory**        | To remember previous instructions and answers, LLMs and chatbots like ChatGPT add this history to their context window. This buffer can be improved with summarization (e.g., using a smaller LLM), a vector store + RAG, etc.                                           |\n| **Evaluation**    | We need to evaluate both the document retrieval (context precision and recall) and generation stages (faithfulness and answer relevancy). It can be simplified with tools [Ragas](https:\u002F\u002Fgithub.com\u002Fexplodinggradients\u002Fragas\u002Ftree\u002Fmain) and [DeepEval](https:\u002F\u002Fgithub.com\u002Fconfident-ai\u002Fdeepeval).                 |\n\n\n#### Further Exploration\n\n| Reference                                       | Description                                                      | Link     |\n|-------------------------------------------------|------------------------------------------------------------------|----------|\n| Llamaindex - High-level concepts                | Main concepts to know when building RAG pipelines.               | [🔗](https:\u002F\u002Fdocs.llamaindex.ai\u002Fen\u002Fstable\u002Fgetting_started\u002Fconcepts.html) |\n| Pinecone - Retrieval Augmentation               | Overview of the retrieval augmentation process.                  | [🔗](https:\u002F\u002Fwww.pinecone.io\u002Flearn\u002Fseries\u002Flangchain\u002Flangchain-retrieval-augmentation\u002F) |\n| LangChain - Q&A with RAG                        | Step-by-step tutorial to build a typical RAG pipeline.           | [🔗](https:\u002F\u002Fpython.langchain.com\u002Fdocs\u002Fuse_cases\u002Fquestion_answering\u002Fquickstart) |\n| LangChain - Memory types                        | List of different types of memories with relevant usage.         | [🔗](https:\u002F\u002Fpython.langchain.com\u002Fdocs\u002Fmodules\u002Fmemory\u002Ftypes\u002F) |\n| RAG pipeline - Metrics                          | Overview of the main metrics used to evaluate RAG pipelines.     | [🔗](https:\u002F\u002Fdocs.ragas.io\u002Fen\u002Fstable\u002Fconcepts\u002Fmetrics\u002Findex.html) |\n\n### 4. Advanced RAG\n\nReal-world applications often demand intricate pipelines that utilize SQL or graph databases and dynamically choose the appropriate tools and APIs. These sophisticated methods can improve a basic solution and offer extra capabilities.\n\n| Category            | Details                                                                                                                                                                                                                                        |\n|---------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n| **Query construction** | Structured data stored in traditional databases requires a specific query language like SQL, Cypher, metadata, etc. We can directly translate the user instruction into a query to access the data with query construction.                        |\n| **Agents and tools**    | Agents augment LLMs by automatically selecting the most relevant tools to provide an answer. These tools can be as simple as using Google or Wikipedia, or more complex like a Python interpreter or Jira.                                         |\n| **Post-processing**     | The final step processes the inputs that are fed to the LLM. It enhances the relevance and diversity of documents retrieved with re-ranking, [RAG-fusion](https:\u002F\u002Fgithub.com\u002FRaudaschl\u002Frag-fusion), and classification.                           |\n| **Program LLMs**        | Frameworks like [DSPy](https:\u002F\u002Fgithub.com\u002Fstanfordnlp\u002Fdspy) allow you to optimize prompts and weights based on automated evaluations in a programmatic way.                                                                                      |\n\n#### Further Exploration\n\n| Reference                                             | Description                                                            | Link     |\n|-------------------------------------------------------|------------------------------------------------------------------------|----------|\n| LangChain - Query Construction                        | Blog post about different types of query construction.                 | [🔗](https:\u002F\u002Fblog.langchain.dev\u002Fquery-construction\u002F) |\n| LangChain - SQL                                       | Tutorial on how to interact with SQL databases with LLMs, involving Text-to-SQL and an optional SQL agent. | [🔗](https:\u002F\u002Fpython.langchain.com\u002Fdocs\u002Fuse_cases\u002Fqa_structured\u002Fsql) |\n| Pinecone - LLM agents                                 | Introduction to agents and tools with different types.                 | [🔗](https:\u002F\u002Fwww.pinecone.io\u002Flearn\u002Fseries\u002Flangchain\u002Flangchain-agents\u002F) |\n| LLM Powered Autonomous Agents by Lilian Weng          | More theoretical article about LLM agents.                             | [🔗](https:\u002F\u002Flilianweng.github.io\u002Fposts\u002F2023-06-23-agent\u002F) |\n| LangChain - OpenAI's RAG                              | Overview of the RAG strategies employed by OpenAI, including post-processing. | [🔗](https:\u002F\u002Fblog.langchain.dev\u002Fapplying-openai-rag\u002F) |\n| DSPy in 8 Steps                                       | General-purpose guide to DSPy introducing modules, signatures, and optimizers. | [🔗](https:\u002F\u002Fdspy-docs.vercel.app\u002Fdocs\u002Fbuilding-blocks\u002Fsolving_your_task) |\n\n### 5. Inference Optimization\n\nText generation is an expensive process that requires powerful hardware. Besides quantization, various techniques have been proposed to increase throughput and lower inference costs.\n\n| Category            | Details                                                                                                                                            |\n|---------------------|----------------------------------------------------------------------------------------------------------------------------------------------------|\n| **Flash Attention** | Optimization of the attention mechanism to transform its complexity from quadratic to linear, speeding up both training and inference.             |\n| **Key-value cache** | Understanding the key-value cache and the improvements introduced in [Multi-Query Attention](https:\u002F\u002Farxiv.org\u002Fabs\u002F1911.02150) (MQA) and [Grouped-Query Attention](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.13245) (GQA). |\n| **Speculative decoding** | Using a small model to produce drafts that are then reviewed by a larger model to speed up text generation.                                      |\n\n#### Further Exploration\n\n| Reference                                             | Description                                                                                                 | Link     |\n|-------------------------------------------------------|-------------------------------------------------------------------------------------------------------------|----------|\n| GPU Inference by Hugging Face                         | Explain how to optimize inference on GPUs.                                                                  | [🔗](https:\u002F\u002Fhuggingface.co\u002Fdocs\u002Ftransformers\u002Fmain\u002Fen\u002Fperf_infer_gpu_one) |\n| LLM Inference by Databricks                           | Best practices for how to optimize LLM inference in production.                                             | [🔗](https:\u002F\u002Fwww.databricks.com\u002Fblog\u002Fllm-inference-performance-engineering-best-practices) |\n| Optimizing LLMs for Speed and Memory by Hugging Face  | Explain three main techniques to optimize speed and memory, namely quantization, Flash Attention, and architectural innovations. | [🔗](https:\u002F\u002Fhuggingface.co\u002Fdocs\u002Ftransformers\u002Fmain\u002Fen\u002Fllm_tutorial_optimization) |\n| Assisted Generation by Hugging Face                   | HF's version of speculative decoding, it's an interesting blog post about how it works with code to implement it. | [🔗](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Fassisted-generation) |\n\n### 6. Deploying LLMs\n\nDeploying LLMs at scale is a complex engineering task that may require multiple GPU clusters. However, demos and local applications can often be achieved with significantly less complexity.\n\n| Category            | Details                                                                                                                                                                                                                       |\n|---------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n| **Local deployment** | Privacy is an important advantage that open-source LLMs have over private ones. Local LLM servers ([LM Studio](https:\u002F\u002Flmstudio.ai\u002F), [Ollama](https:\u002F\u002Follama.ai\u002F), [oobabooga](https:\u002F\u002Fgithub.com\u002Foobabooga\u002Ftext-generation-webui), [kobold.cpp](https:\u002F\u002Fgithub.com\u002FLostRuins\u002Fkoboldcpp), etc.) capitalize on this advantage to power local apps. |\n| **Demo deployment**  | Frameworks like [Gradio](https:\u002F\u002Fwww.gradio.app\u002F) and [Streamlit](https:\u002F\u002Fdocs.streamlit.io\u002F) are helpful to prototype applications and share demos. You can also easily host them online, for example using [Hugging Face Spaces](https:\u002F\u002Fhuggingface.co\u002Fspaces). |\n| **Server deployment** | Deploying LLMs at scale requires cloud infrastructure (see also [SkyPilot](https:\u002F\u002Fskypilot.readthedocs.io\u002Fen\u002Flatest\u002F)) or on-prem infrastructure and often leverages optimized text generation frameworks like [TGI](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Ftext-generation-inference), [vLLM](https:\u002F\u002Fgithub.com\u002Fvllm-project\u002Fvllm\u002Ftree\u002Fmain), etc. |\n| **Edge deployment**  | In constrained environments, high-performance frameworks like [MLC LLM](https:\u002F\u002Fgithub.com\u002Fmlc-ai\u002Fmlc-llm) and [mnn-llm](https:\u002F\u002Fgithub.com\u002Fwangzhaode\u002Fmnn-llm\u002Fblob\u002Fmaster\u002FREADME_en.md) can deploy LLMs in web browsers, Android, and iOS. |\n\n\n#### Further Exploration\n| Reference                                             | Description                                                                                                        | Link     |\n|-------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------|----------|\n| Streamlit - Build a basic LLM app                     | Tutorial to make a basic ChatGPT-like app using Streamlit.                                                         | [🔗](https:\u002F\u002Fdocs.streamlit.io\u002Fknowledge-base\u002Ftutorials\u002Fbuild-conversational-apps) |\n| HF LLM Inference Container                            | Deploy LLMs on Amazon SageMaker using Hugging Face's inference container.                                          | [🔗](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Fsagemaker-huggingface-llm) |\n| Philschmid blog by Philipp Schmid                     | Collection of high-quality articles about LLM deployment using Amazon SageMaker.                                   | [🔗](https:\u002F\u002Fwww.philschmid.de\u002F) |\n| Optimizing latency by Hamel Husain                    | Comparison of TGI, vLLM, CTranslate2, and mlc in terms of throughput and latency.                                  | [🔗](https:\u002F\u002Fhamel.dev\u002Fnotes\u002Fllm\u002Finference\u002F03_inference.html) |\n\n\n### 7. Securing LLMs \n\nAlong with the usual security concerns of software, LLMs face distinct vulnerabilities arising from their training and prompting methods.\n\n| Category            | Details                                                                                                                                                                                                                                    |\n|---------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n| **Prompt hacking** | Techniques related to prompt engineering, including prompt injection (adding instructions to alter the model’s responses), data\u002Fprompt leaking (accessing original data or prompts), and jailbreaking (crafting prompts to bypass safety features). |\n| **Backdoors**       | Attack vectors targeting the training data itself, such as poisoning the training data with false information or creating backdoors (hidden triggers to alter the model’s behavior during inference).                                     |\n| **Defensive measures** | Protecting LLM applications involves testing them for vulnerabilities (e.g., using red teaming and tools like [garak](https:\u002F\u002Fgithub.com\u002Fleondz\u002Fgarak\u002F)) and monitoring them in production (using a framework like [langfuse](https:\u002F\u002Fgithub.com\u002Flangfuse\u002Flangfuse)). |\n\n#### Further Exploration\n| Reference                                             | Description                                                                                                      | Link     |\n|-------------------------------------------------------|------------------------------------------------------------------------------------------------------------------|----------|\n| OWASP LLM Top 10 by HEGO Wiki                        | List of the 10 most critical vulnerabilities found in LLM applications.                                           | [🔗](https:\u002F\u002Fowasp.org\u002Fwww-project-top-10-for-large-language-model-applications\u002F) |\n| Prompt Injection Primer by Joseph Thacker            | Short guide dedicated to prompt injection techniques for engineers.                                               | [🔗](https:\u002F\u002Fgithub.com\u002Fjthack\u002FPIPE) |\n| LLM Security by @llm_sec                              | Extensive list of resources related to LLM security.                                                              | [🔗](https:\u002F\u002Fllmsecurity.net\u002F) |\n| Red teaming LLMs by Microsoft                         | Guide on how to perform red teaming assessments with LLMs.                                                          | [🔗](https:\u002F\u002Flearn.microsoft.com\u002Fen-us\u002Fazure\u002Fai-services\u002Fopenai\u002Fconcepts\u002Fred-teaming) |\n\n\n\u003C\u002Fdetails>\n\n\u003Cbr>\n\n# In-Depth Articles \n## NLP\n\n| Article | Resources |\n| -------- | :---------: |\n| LLMs Overview | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FNLP\u002FLLMs%20Overview)|\n| NLP Embeddings | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FNLP\u002FNLP%20Embeddings)|\n| Preprocessing | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FNLP\u002FPreprocessing)|\n| Sampling | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002FArticles\u002FNLP\u002FSampling)| \n| Tokenization | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FNLP\u002FTokenization)|\n| Transformer | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FNLP\u002FTransformer\u002FAttention%20Is%20All%20You%20Need)|\n| Interview Preparation | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FInterview%20Preparation)|\n\n## Models \n\n| Article | Resources |\n| -------- | :---------: |\n| Generative Pre-trained Transformer (GPT) | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FModels\u002FGenerative%20Pre-trained%20Transformer%20(GPT))|\n\n## Training \n\n| Article | Resources |\n| -------- | :---------: |\n| Activation Function |  [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FTraining\u002FActivation%20Function)|\n| Fine Tuning Models | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FTraining\u002FFine%20Tuning%20Models)|\n| Enhancing Model Compression: Inference and Training Optimization Strategies | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FTraining\u002FModel%20Compression)|\n| Model Summary | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FTraining\u002FModel%20Summary)|\n| Splitting Datasets | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FTraining\u002FSplitting%20Datasets)|\n| Train Loss > Val Loss | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FTraining\u002FTrain%20Loss%20%3E%20Val%20Loss)|\n| Parameter Efficient Fine-Tuning | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FTraining\u002FParameter%20Efficient%20Fine-Tuning) |\n| Gradient Descent and Backprop | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FTraining\u002FGradient%20Descent%20and%20Backprop) |\n| Overfitting And Underfitting | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FTraining\u002FOverfitting%20And%20Underfitting)| \n| Gradient Accumulation and Checkpointing | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FTraining\u002FGradient%20Accumulation%20and%20Checkpointing)| \n| Flash Attention| [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FTraining\u002FFlash%20Attention)| \n\n## Enhancing Model Compression: Inference and Training Optimization Strategies\n\n| Article | Resources |\n| -------- | :---------: |\n| Quantization | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FModel%20Compression\u002FQuantization)|\n| Intro to Quantization | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FIntroduction_to_Weight_Quantization.ipynb)|\n| Knowledge Distillation | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FModel%20Compression\u002FKnowledge%20Distillation)|\n| Pruning | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FModel%20Compression\u002FPruning)|\n| DeepSpeed | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FModel%20Compression\u002FDeepSpeed)|\n| Sharding | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FModel%20Compression\u002FSharding)|\n| Mixed Precision Training | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FModel%20Compression\u002FMixed%20Precision%20Training)|\n| Inference Optimization | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FModel%20Compression\u002FInference%20Optimization)|\n\n## Evaluation Metrics \n| Article | Resources |\n| -------- | :---------: |\n| Classification | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FEvaluation%20Metrics\u002FClassification)|\n| Regression | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FEvaluation%20Metrics\u002FRegression)| \n| Generative Text Models | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FEvaluation%20Metrics\u002FGenerative%20Text%20Models)|\n\n## Open LLMs\n| Article | Resources |\n| -------- | :---------: |\n| Open Source LLM Space for Commercial Use | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FOpen%20LLMs\u002FCommercial%20Use)|\n| Open Source LLM Space for Research Use | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FOpen%20LLMs\u002FResearch%20Use)|\n| LLM Training Frameworks | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FOpen%20LLMs\u002FLLM%20Training%20Frameworks)|\n| Effective Deployment Strategies for Language Models | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FOpen%20LLMs\u002FDeployment)|\n| Tutorials about LLM | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FOpen%20LLMs\u002FTutorials)|\n| Courses about LLM | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FOpen%20LLMs\u002FCourses)|\n| Deployment | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FOpen%20LLMs\u002FDeployment)|\n\n## Resources for cost analysis and network visualization\n| Article | Resources |\n| -------- | :---------: |\n| Lambda Labs vs AWS Cost Analysis | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FResources)|\n| Neural Network Visualization | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FResources\u002FNeural%20Network%20Visualization)|\n\n# Codebase Mastery: Building with Perfection \n| Title | Repository | \n| ------- | :--------:|\n| Instruction based data prepare using OpenAI | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002Fdata_generate_prepare)|\n| Optimal Fine-Tuning using the Trainer API: From Training to Model Inference| [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002Ftrain_inference)|\n| Efficient Fine-tuning and inference LLMs with PEFT and LoRA| [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002Ftrain_inference_peft_lora)|\n| Efficient Fine-tuning and inference LLMs Accelerate| [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002Ftrain_inference_accelerate)|\n| Efficient Fine-tuning with T5 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002Ft5)|\n| Train Large Language Models with LoRA and Hugging Face | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FEfficiently_train_Large_Language_Models_with_LoRA_and_Hugging_Face.ipynb)|\n| Fine-Tune Your Own Llama 2 Model in a Colab Notebook | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FFine_Tune_Your_Own_Llama_2_Model_in_a_Colab_Notebook.ipynb)|\n| Guanaco Chatbot Demo with LLaMA-7B Model | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FGuanaco%20Chatbot%20Demo%20with%20LLaMA-7B%20Model.ipynb)|\n| PEFT Finetune-Bloom-560m-tagger | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FPEFT%20Finetune-Bloom-560m-tagger.ipynb)|\n| Finetune_Meta_OPT-6-1b_Model_bnb_peft | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FFinetune_Meta_OPT-6-1b_Model_bnb_peft.ipynb)|\n| Finetune Falcon-7b with BNB Self Supervised Training | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FFinetune%20Falcon-7b%20with%20BNB%20Self%20Supervised%20Training.ipynb)|\n| FineTune LLaMa2 with QLoRa | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FFineTune_LLAMA2_with_QLORA.ipynb)|\n| Stable_Vicuna13B_8bit_in_Colab | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FStable_Vicuna13B_8bit_in_Colab.ipynb)|\n| GPT-Neo-X-20B-bnb2bit_training | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FGPT-neo-x-20B-bnb_4bit_training.ipynb)|\n| MPT-Instruct-30B Model Training | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FMPT_Instruct_30B.ipynb)|\n| RLHF_Training_for_CustomDataset_for_AnyModel | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FRLHF_Training_for_CustomDataset_for_AnyModel.ipynb)|\n| Fine_tuning_Microsoft_Phi_1_5b_on_custom_dataset(dialogstudio) | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FFine_tuning_Microsoft_Phi_1_5b_on_custom_dataset(dialogstudio).ipynb)|\n| Finetuning OpenAI GPT3.5 Turbo | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FFine_tuning_OpenAI_GPT_3_5_turbo.ipynb)|\n| Finetuning Mistral-7b FineTuning Model using Autotrain-advanced| [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FFinetuning_Mistral_7b_Using_AutoTrain.ipynb)|\n| RAG LangChain Tutorial | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FRAG_LangChain.ipynb)|\n| Mistral DPO Trainer | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002Fmistral_trainer_dpo)|\n| LLM Sharding | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FLLM_Sharding.ipynb)|\n| Integrating Unstructured and Graph Knowledge with Neo4j and LangChain for Enhanced Question | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FNeo4j_and_LangChain_for_Enhanced_Question_Answering.ipynb)|\n| vLLM Benchmarking | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002Fvllm_benchmark.py)|\n| Milvus Vector Database | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002Fvector_database)|\n| Decoding Strategies | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002Fdecoding_strategies)|\n| Peft QLora SageMaker Training | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002Fpeft_qlora_sm_training)|\n| Optimize Single Model SageMaker Endpoint | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002Foptimize_single_model_sm_endpoint)|\n| Multi Adapter Inference | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002Fmulti_adapter_inference)|\n| Inf2 LLM SM Deployment | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002FInf2%20LLM%20SM%20Deployment)|\n| Text Chunk Visualization `In Progress` | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002Ftext_chunk_visaulization)|\n| Fine-tune Llama 3 with ORPO | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FFine_tune_Llama_3_with_ORPO.ipynb)|\n| 4 bit LLM Quantization with GPTQ | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002F4_bit_LLM_Quantization_with_GPTQ.ipynb)|\n| Model Family Tree | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FModel_Family_Tree.ipynb)|\n| Create MoEs with MergeKit | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002Fmerge_model_mergekit.ipynb)|\n| Finetune Llama 3.1 on AWS, GCP or Azure | [🔗](https:\u002F\u002Fwww.zenml.io\u002Fblog\u002Fhow-to-finetune-llama-3-1-with-zenml)|\n| Finetune Phi 3.5 on AWS, GCP or Azure | [🔗](https:\u002F\u002Fwww.zenml.io\u002Fblog\u002Fhow-to-finetune-phi-3-5-with-zenml)|\n\n\n# LLM PlayLab\n| LLM Projects  |Respository|\n| ------------ | :------------: |\n|CSVQConnect   | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FCSVQConnect) |\n| AI_VIRTUAL_ASSISTANT  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FAI_VIRTUAL_ASSISTANT)|\n|  DocuBotMultiPDFConversationalAssistant | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FDocuBotMultiPDFConversationalAssistant)  |\n|  autogpt |  [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002Fautogpt)|\n|  meta_llama_2finetuned_text_generation_summarization | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002F-meta_llama_2finetuned_text_generation_summarization)  |\n| text_generation_using_Llama|[🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002Ftext_generation_using_Llama-2\u002Ftree\u002Fmain) |\n| llm_using_petals|[🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002Fllm_using_petals) |\n| llm_using_petals|[🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002Fllm_using_petals) |\n| Salesforce-xgen|[🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FSalesforce-xgen)|\n| text_summarization_using_open_llama_7b|[🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002Ftext_summarization_using_open_llama_7b)|\n| Text_summarization_using_GPT-J|[🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FText_summarization_using_GPT-J)|\n| codllama  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002Fcodllama) |\n| Image_to_text_using_LLaVA  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FImage_to_text_using_LLaVA\u002Ftree\u002Fmain) |\n| Tabular_data_using_llamaindex  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FTabular_data_using_llamaindex) |\n| nextword_sentence_prediction  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002Fnextword_sentence_prediction) |\n| Text-Generation-using-DeciLM-7B-instruct  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FText-Generation-using-DeciLM-7B-instruct) |\n| Gemini-blog-creation  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FGemini-blog-creation\u002Ftree\u002Fmain) |\n| Prepare_holiday_cards_with_Gemini_and_Sheets  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FPrepare_holiday_cards_with_Gemini_and_Sheets\u002Ftree\u002Fmain) |\n| Code-Generattion_using_phi2_llm  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FCode-Generattion_using_phi2_llm) |\n| RAG-USING-GEMINI  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FRAG-USING-GEMINI) |\n| Resturant-Recommendation-Multi-Modal-RAG-using-Gemini  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FResturant-Recommendation-Multi-Modal-RAG-using-Gemini) |\n| slim-sentiment-tool  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002Fslim-sentiment-tool) |\n| Synthetic-Data-Generation-Using-LLM  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FCorporate-Presentations-Synthetic-Data-Generation-Using-LLM) |\n| Architecture-for-building-a-Chat-Assistant | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FDesign-an-Architecture-for-building-a-Chat-Assistant-for-an-ecommerce-platform) |\n| LLM-CHAT-ASSISTANT-WITH-DYNAMIC-CONTEXT-BASED-ON-QUERY | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FLLM-CHAT-ASSISTANT-WITH-DYNAMIC-CONTEXT-BASED-ON-QUERY) |\n| Text Classifier using LLM | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FAI-Powered-Text-Classifier-Harnessing-Large-Language-Models-for-Precise-Data-Categorization) |\n| Multiclass sentiment Analysis | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002Fmulti-class-sentiment-analysis-model-using-LLM) |\n| Text-Generation-Using-GROQ | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FText-Generation-Using-GROQ) |\n| DataAgents | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FDataAgents) |\n| PandasQuery_tabular_data | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FPandasQuery_tabular_data) |\n| Exploratory_Data_Analysis_using_LLM | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FExploratory_Data_Analysis_using_LLM\u002Ftree\u002Fmain) |\n\n# LLM Datasets\n\n| Dataset                                                                                                       | #     | Authors                      | Date     | Notes                                                                             | Category          |\n|------------------------------------------------------------------------------------------------------------- | ----- | ---------------------------- | -------- | --------------------------------------------------------------------------------- | ----------------- |\n| [Buzz](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FH-D-T\u002FBuzz)                                                          | 31.2M | Alignment Lab AI             | May 2024 | Huge collection of 435 datasets with data augmentation, deduplication, and other techniques. | General Purpose |\n| [WebInstructSub](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fchargoddard\u002FWebInstructSub-prometheus)                   | 2.39M | Yue et al.                   | May 2024 | Instructions created by retrieving document from Common Crawl, extracting QA pairs, and refining them. See the [MAmmoTH2 paper](https:\u002F\u002Farxiv.org\u002Fabs\u002F2405.03548) (this is a subset). | General Purpose |\n| [Bagel](https:\u002F\u002Fgithub.com\u002Fjondurbin\u002Fbagel)                                                                  | >2M?  | Jon Durbin                   | Jan 2024 | Collection of datasets decontaminated with cosine similarity. | General Purpose |\n| [Hercules v4.5](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FLocutusque\u002Fhercules-v4.5)                                    | 1.72M | Sebastian Gabarain           | Apr 2024 | Large-scale general-purpose dataset with math, code, RP, etc. See [v4](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FLocutusque\u002Fhercules-v4.0) for the list of datasets. | General Purpose |\n| [Dolphin-2.9](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fcognitivecomputations\u002FDolphin-2.9)                              | 1.39M | Cognitive Computations      | Apr 2023 | Large-scale general-purpose dataset used by the Dolphin models. | General Purpose |\n| [WildChat-1M](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fallenai\u002FWildChat-1M)                                            | 1.04M | Zhao et al.                  | May 2023 | Real conversations between human users and GPT-3.5\u002F4, including metadata. See the [WildChat paper](https:\u002F\u002Farxiv.org\u002Fabs\u002F2405.01470). | General Purpose |\n| [OpenHermes-2.5](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fteknium\u002FOpenHermes-2.5)                                      | 1M    | Teknium                      | Nov 2023 | Another large-scale dataset used by the OpenHermes models. | General Purpose |\n| [SlimOrca](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FOpen-Orca\u002FSlimOrca)                                                | 518k  | Lian et al.                  | Sep 2023 | Curated subset of [OpenOrca](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FOpen-Orca\u002FOpenOrca) using GPT-4-as-a-judge to remove wrong answers. | General Purpose |\n| [Tulu V2 Mix](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fallenai\u002Ftulu-v2-sft-mixture)                                    | 326k  | Ivison et al.                | Nov 2023 | Mix of high-quality datasets. See [Tulu 2 paper](https:\u002F\u002Farxiv.org\u002Fabs\u002F2311.10702). | General Purpose |\n| [UltraInteract SFT](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fopenbmb\u002FUltraInteract_sft)                                | 289k  | Yuan et al.                  | Apr 2024 | Focus on math, coding, and logic tasks with step-by-step answers. See [Eurus paper](https:\u002F\u002Farxiv.org\u002Fabs\u002F2404.02078). | General Purpose |\n| [NeurIPS-LLM-data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fupaya07\u002FNeurIPS-LLM-data)                                  | 204k  | Jindal et al.                | Nov 2023 | Winner of [NeurIPS LLM Efficiency Challenge](https:\u002F\u002Fllm-efficiency-challenge.github.io\u002F), with an interesting data preparation strategy. | General Purpose |\n| [UltraChat 200k](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FHuggingFaceH4\u002Fultrachat_200k)                                | 200k  | Tunstall et al., Ding et al. | Oct 2023 | Heavily filtered version of the [UItraChat](https:\u002F\u002Fgithub.com\u002Fthunlp\u002FUltraChat) dataset, consisting of 1.4M dialogues generated by ChatGPT. | General Purpose |\n| [WizardLM_evol_instruct_V2](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fmlabonne\u002FWizardLM_evol_instruct_v2_196K-ShareGPT) | 143k  | Xu et al.                    | Jun 2023 | Latest version of Evol-Instruct applied to Alpaca and ShareGPT data. See [WizardLM paper](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.12244). | General Purpose |\n| [sft_datablend_v1](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fnvidia\u002Fsft_datablend_v1)                                   | 128k  | NVIDIA                       | Jan 2024 | Blend of publicly available datasets: OASST, CodeContests, FLAN, T0, Open_Platypus, and GSM8K and others (45 total). | General Purpose |\n| [Synthia-v1.3](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fmigtissera\u002FSynthia-v1.3)                                       | 119k  | Migel Tissera                | Nov 2023 | High-quality synthetic data generated using GPT-4. | General Purpose |\n| [FuseChat-Mixture](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FFuseAI\u002FFuseChat-Mixture)                                   | 95k   | Wan et al.                   | Feb 2024 | Selection of samples from high-quality datasets. See [FuseChat paper](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.16107). | General Purpose |\n| [oasst1](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FOpenAssistant\u002Foasst1)                                                | 84.4k | Köpf et al.                  | Mar 2023 | Human-generated assistant-style conversation corpus in 35 different languages. See [OASST1 paper](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.07327) and [oasst2](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FOpenAssistant\u002Foasst2). | General Purpose |\n| [WizardLM_evol_instruct_70k](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fmlabonne\u002FWizardLM_evol_instruct_70k-ShareGPT) | 70k   | Xu et al.                    | Apr 2023 | Evol-Instruct applied to Alpaca and ShareGPT data. See [WizardLM paper](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.12244). | General Purpose |\n| [airoboros-3.2](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fjondurbin\u002Fairoboros-3.2)                                      | 58.7k | Jon Durbin                   | Dec 2023 | High-quality uncensored dataset.                                                                                                                                                                        | General Purpose |\n| [ShareGPT_Vicuna_unfiltered](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fanon8231489123\u002FShareGPT_Vicuna_unfiltered)       | 53k   | anon823 1489123              | Mar 2023 | Filtered version of the ShareGPT dataset, consisting of real conversations between users and ChatGPT.                                                                                                   | General Purpose |\n| [lmsys-chat-1m-smortmodelsonly](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FNebulous\u002Flmsys-chat-1m-smortmodelsonly)       | 45.8k | Nebulous, Zheng et al.       | Sep 2023 | Filtered version of [lmsys-chat-1m](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Flmsys\u002Flmsys-chat-1m) with responses from GPT-4, GPT-3.5-turbo, Claude-2, Claude-1, and Claude-instant-1.                            | General Purpose |\n| [Open-Platypus](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fgarage-bAInd\u002FOpen-Platypus)                                   | 24.9k | Lee et al.                   | Sep 2023 | Collection of datasets that were deduplicated using Sentence Transformers (it contains an NC dataset). See [Platypus paper](https:\u002F\u002Farxiv.org\u002Fabs\u002F2308.07317).                                          | General Purpose |\n| [databricks-dolly-15k](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fdatabricks\u002Fdatabricks-dolly-15k)                       | 15k   | Conover et al.               | May 2023 | Generated by Databricks employees, prompt\u002Fresponse pairs in eight different instruction categories, including the seven outlined in the InstructGPT paper.  | General Purpose |\n| [OpenMathInstruct-1](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fnvidia\u002FOpenMathInstruct-1)                   | 5.75M | Toshniwal et al. | Feb 2024 | Problems from GSM8K and MATH, solutions generated by Mixtral-8x7B.                                                                      | Math     |\n| [MetaMathQA](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fmeta-math\u002FMetaMathQA)                                | 395k  | Yu et al.       | Dec 2023 | Bootstrap mathematical questions by rewriting them from multiple perspectives. See [MetaMath paper](https:\u002F\u002Farxiv.org\u002Fabs\u002F2309.12284). | Math     |\n| [MathInstruct](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FTIGER-Lab\u002FMathInstruct)                            | 262k  | Yue et al.      | Sep 2023 | Compiled from 13 math rationale datasets, six of which are newly curated, and focuses on chain-of-thought and program-of-thought.      | Math     |\n| [Orca-Math](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fmicrosoft\u002Forca-math-word-problems-200k)               | 200k  | Mitra et al.    | Feb 2024 | Grade school math world problems generated using GPT4-Turbo. See [Orca-Math paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.14830.pdf).              | Math     |\n| [CodeFeedback-Filtered-Instruction](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fm-a-p\u002FCodeFeedback-Filtered-Instruction)     | 157k  | Zheng et al.    | Feb 2024   | Filtered version of Magicoder-OSS-Instruct, ShareGPT (Python), Magicoder-Evol-Instruct, and Evol-Instruct-Code.                                                                                                                                                                                                            | Code |\n| [Tested-143k-Python-Alpaca](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FVezora\u002FTested-143k-Python-Alpaca)                    | 143k  | Vezora          | Mar 2024   | Collection of generated Python code that passed automatic tests to ensure high quality.                                                                                                                                                                                                                                     | Code |\n| [glaive-code-assistant](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fglaiveai\u002Fglaive-code-assistant)                          | 136k  | Glaive.ai       | Sep 2023   | Synthetic data of problems and solutions with ~60% Python samples. Also see the [v2](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fglaiveai\u002Fglaive-code-assistant-v2) version.                                                                                                                                                           | Code |\n| [Magicoder-Evol-Instruct-110K](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fise-uiuc\u002FMagicoder-Evol-Instruct-110K)            | 110k  | Wei et al.      | Nov 2023   | A decontaminated version of [evol-codealpaca-v1](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Ftheblackcat102\u002Fevol-codealpaca-v1). Decontamination is done in the same way as StarCoder ([bigcode decontamination process](https:\u002F\u002Fgithub.com\u002Fbigcode-project\u002Fbigcode-dataset\u002Ftree\u002Fmain\u002Fdecontamination)). See [Magicoder paper](https:\u002F\u002Farxiv.org\u002Fabs\u002F2312.02120). | Code |\n| [dolphin-coder](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fcognitivecomputations\u002Fdolphin-coder)                             | 109k  | Eric Hartford   | Nov 2023   | Dataset transformed from [leetcode-rosetta](https:\u002F\u002Fwww.kaggle.com\u002Fdatasets\u002Ferichartford\u002Fleetcode-rosetta).                                                                                                                                                                                                               | Code |\n| [synthetic_tex_to_sql](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fgretelai\u002Fsynthetic_text_to_sql)                           | 100k  | Gretel.ai       | Apr 2024   | Synthetic text-to-SQL samples (~23M tokens), covering diverse domains.                                                                                                                                                                                                                                                     | Code |\n| [sql-create-context](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fb-mc2\u002Fsql-create-context)                                   | 78.6k | b-mc2           | Apr 2023   | Cleansed and augmented version of the [WikiSQL](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fwikisql) and [Spider](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fspider) datasets.                                                                                                                                                                     | Code |\n| [Magicoder-OSS-Instruct-75K](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fise-uiuc\u002FMagicoder-OSS-Instruct-75K)                | 75k   | Wei et al.      | Nov 2023   | OSS-Instruct dataset generated by `gpt-3.5-turbo-1106`. See [Magicoder paper](https:\u002F\u002Farxiv.org\u002Fabs\u002F2312.02120).                                                                                                                                                                                                           | Code |\n| [Code-Feedback](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fm-a-p\u002FCode-Feedback)                                             | 66.4k | Zheng et al.    | Feb 2024   | Diverse Code Interpreter-like dataset with multi-turn dialogues and interleaved text and code responses. See [OpenCodeInterpreter paper](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.14658).                                                                                                                                                 | Code |\n| [Open-Critic-GPT](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FVezora\u002FOpen-Critic-GPT)                                             | 55.1k | Vezora    | Jul 2024   | Use a local model to create, introduce, and identify bugs in code across multiple programming languages.                                                                                                                                                | Code |\n| [self-oss-instruct-sc2-exec-filter-50k](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fbigcode\u002Fself-oss-instruct-sc2-exec-filter-50k) | 50.7k | Lozhkov et al.  | Apr 2024   | Created in three steps with seed functions from TheStack v1, self-instruction with StarCoder2, and self-validation. See the [blog post](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Fsc2-instruct).                                                                                                                                           | Code |\n| [Bluemoon](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FSquish42\u002Fbluemoon-fandom-1-1-rp-cleaned)               | 290k  | Squish42                | Jun 2023 | Posts from the Blue Moon roleplaying forum cleaned and scraped by a third party.                              | Conversation & Role-Play |\n| [PIPPA](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fkingbri\u002FPIPPA-shareGPT)                                   | 16.8k | Gosling et al., kingbri | Aug 2023 | Deduped version of Pygmalion's [PIPPA](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FPygmalionAI\u002FPIPPA) in ShareGPT format. | Conversation & Role-Play |\n| [Capybara](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FLDJnr\u002FCapybara)                                        | 16k   | LDJnr                   | Dec 2023 | Strong focus on information diversity across a wide range of domains with multi-turn conversations.           | Conversation & Role-Play |\n| [RPGPT_PublicDomain-alpaca](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fpractical-dreamer\u002FRPGPT_PublicDomain-alpaca) | 4.26k | practical dreamer       | May 2023 | Synthetic dataset of public domain character dialogue in roleplay format made with [build-a-dataset](https:\u002F\u002Fgithub.com\u002Fpractical-dreamer\u002Fbuild-a-dataset). | Conversation & Role-Play |\n| [Pure-Dove](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FLDJnr\u002FPure-Dove)                                      | 3.86k | LDJnr                   | Sep 2023 | Highly filtered multi-turn conversations between GPT-4 and real humans.                                       | Conversation & Role-Play |\n| [Opus Samantha](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fmacadeliccc\u002Fopus_samantha)                        | 1.85k | macadelicc              | Apr 2024 | Multi-turn conversations with Claude 3 Opus.                                                                  | Conversation & Role-Play |\n| [LimaRP-augmented](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fgrimulkan\u002FLimaRP-augmented)                    | 804   | lemonilia, grimulkan    | Jan 2024 | Augmented and cleansed version of LimaRP, consisting of human roleplaying conversations.                      | Conversation & Role-Play |\n| [glaive-function-calling-v2](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fglaiveai\u002Fglaive-function-calling-v2) | 113k  | Sahil Chaudhary | Sep 2023 | High-quality dataset with pairs of instructions and answers in different languages. \u003Cbr>See [Locutusque\u002Ffunction-calling-chatml](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FLocutusque\u002Ffunction-calling-chatml) for a variant without conversation tags. | Agent & Function calling |\n| [xlam-function-calling-60k](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FSalesforce\u002Fxlam-function-calling-60k) | 60k   | Salesforce      | Jun 2024 | Samples created using a data generation pipeline designed to produce verifiable data for function-calling applications. | Agent & Function calling |\n| [Agent-FLAN](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Finternlm\u002FAgent-FLAN)                                 | 34.4k | internlm        | Mar 2024 | Mix of AgentInstruct, ToolBench, and ShareGPT datasets.                                                                   | Agent & Function calling |\n\n\n# LLM Alignment\n\nAlignment is an emerging field of study where you ensure that an AI system performs exactly what you want it to perform. In the context of LLMs specifically, alignment is a process that trains an LLM to ensure that the generated outputs align with human values and goals. \n\nWhat are the current methods for LLM alignment? \n\nYou will find many alignment methods in research literature, we will only stick to 3 alignment methods for the sake of discussion\n\n### 📌 RLHF: \n- Step 1 & 2: Train an LLM (pre-training for the base model + supervised\u002Finstruction fine-tuning for chat model) \n- Step 3: RLHF uses an ancillary language model (it could be much smaller than the main LLM) to learn human preferences. This can be done using a preference dataset - it contains a prompt, and a response\u002Fset of responses graded by expert human labelers. This is called a “reward model”. \n- Step 4: Use a reinforcement learning algorithm (eg: PPO - proximal policy optimization), where the LLM is the agent, the reward model provides a positive or negative reward to the LLM based on how well it’s responses align with the “human preferred responses”. \nIn theory, it is as simple as that. However, implementation isn’t that easy - requiring lot of human experts and compute resources. To overcome the “expense” of RLHF, researchers developed DPO.\n- RLHF : [RLHF: Reinforcement Learning from Human Feedback](https:\u002F\u002Fhuyenchip.com\u002F2023\u002F05\u002F02\u002Frlhf.html)\n\n### 📌 DPO:\n- Step 1&2 remain the same\n- Step 4: DPO eliminates the need for the training of a reward model (i.e step 3). How? DPO defines an additional preference loss as a function of it’s policy and uses the language model directly as the reward model. The idea is simple, If you are already training such a powerful LLM, why not train itself to distinguish between good and bad responses, instead of using another model?\n- DPO is shown to be more computationally efficient (in case of RLHF you also need to constantly monitor the behavior of the reward model) and has better performance than RLHF in several settings. \n- Blog on DPO : [Aligning LLMs with Direct Preference Optimization (DPO)— background, overview, intuition and paper summary](https:\u002F\u002Fmedium.com\u002F@ManishChablani\u002Faligning-llms-with-direct-preference-optimization-dpo-background-overview-intuition-and-paper-0a72b9dc539c)\n  \n### 📌 ORPO: \n- The newest method out of all 3, ORPO combines Step 2, 3 & 4 into a single step - so the dataset required for this method is a combination of a fine-tuning + preference dataset. \n- The supervised fine-tuning and alignment\u002Fpreference optimization is performed in a single step. This is because the fine-tuning step, while allowing the model to specialize to tasks and domains, can also increase the probability of undesired responses from the model. \n- ORPO combines the steps using a single objective function by incorporating an odds ratio (OR) term - reward preferred responses & penalizing rejected responses. \n- Blog on ORPO : [ORPO Outperforms SFT+DPO | Train Phi-2 with ORPO](https:\u002F\u002Fmedium.com\u002F@zaiinn440\u002Forpo-outperforms-sft-dpo-train-phi-2-with-orpo-3ee6bf18dbf2)\n\n# Data Generation\n\n## Data Filtering \n\n| Datasets | Descriptions | Link | \n| --------- | -------- | :-----: |\n| Rule-based filtering | Remove samples based on a list of unwanted words, like refusals and \"As an AI assistant\" | [🔗](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fcognitivecomputations\u002FWizardLM_alpaca_evol_instruct_70k_unfiltered\u002Fblob\u002Fmain\u002Fwizardlm_clean.py) |\n| SemHash | Fuzzy deduplication based on fast embedding generation with a distilled model. | [🔗](https:\u002F\u002Fgithub.com\u002FMinishLab\u002Fsemhash) |\n \n\n## SFT Datasets\n\n| Datasets | Descriptions | Link | \n| --------- | -------- | :-----: |\n|  Distilabel | General-purpose framework that can generate and augment data (SFT, DPO) with techniques like UltraFeedback and DEITA | [🔗](https:\u002F\u002Fgithub.com\u002Fargilla-io\u002Fdistilabel) |\n| Auto Data | Lightweight library to automatically generate fine-tuning datasets with API models.|  [🔗](https:\u002F\u002Fgithub.com\u002FItachi-Uchiha581\u002FAuto-Data) |\n| Bonito |  Library for generating synthetic instruction tuning datasets for your data without GPT (see also [AutoBonito](https:\u002F\u002Fcolab.research.google.com\u002Fdrive\u002F1l9zh_VX0X4ylbzpGckCjH5yEflFsLW04?usp=sharing)). |  [🔗](https:\u002F\u002Fgithub.com\u002FBatsResearch\u002Fbonito) | \n| Augmentoolkit | Framework to convert raw text into datasets using open-source and closed-source models. | [🔗](https:\u002F\u002Fgithub.com\u002Fe-p-armstrong\u002Faugmentoolkit)| \n| Magpie | Your efficient and high-quality synthetic data generation pipeline by prompting aligned LLMs with nothing. | [🔗](https:\u002F\u002Fgithub.com\u002Fmagpie-align\u002Fmagpie)|\n| Genstruct | An instruction generation model, which is designed to generate valid instructions from raw data | [🔗](https:\u002F\u002Fhuggingface.co\u002FNousResearch\u002FGenstruct-7B)|\n| DataDreamer | A python library for prompting and synthetic data generation. | [🔗](https:\u002F\u002Fdatadreamer.dev\u002Fdocs\u002Flatest\u002F)|\n\n## Pre-training datasets\n\n| Datasets | Descriptions | Link | \n| --------- | -------- | :-----: |\n| llm-swarm |Generate synthetic datasets for pretraining or fine-tuning using either local LLMs or Inference Endpoints on the Hugging Face Hub| [🔗](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fllm-swarm)|\n| Cosmopedia | Hugging Face's code for generating the Cosmopedia dataset. | [🔗](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fcosmopedia)|\n| textbook_quality | A repository for generating textbook-quality data, mimicking the approach of the Microsoft's Phi models. | [🔗](https:\u002F\u002Fgithub.com\u002FVikParuchuri\u002Ftextbook_quality)|\n\n## Data exploration\n| Datasets | Descriptions | Link | \n| --------- | -------- | :-----: |\n| sentence-transformers | A python module for working with popular language embedding models. | [🔗](https:\u002F\u002Fsbert.net\u002F)|\n| Lilac | Tool to curate better data for LLMs, used by NousResearch, databricks, cohere, Alignment Lab AI. It can also apply filters.| [🔗](https:\u002F\u002Fgithub.com\u002Flilacai\u002Flilac)|\n| Nomic Atlas | Interact with instructed data to find insights and store embeddings. | [🔗](https:\u002F\u002Fgithub.com\u002Fnomic-ai\u002Fnomic)|\n| text-clustering) | Easily embed, cluster and semantically label text datasets | [🔗](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Ftext-clustering)|\n\n## Data scraping\n| Datasets | Descriptions | Link | \n| --------- | -------- | :-----: |\n| Trafilatura | Python and command-line tool to gather text and metadata on the web. Used for the creation of RefinedWeb(https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.01116). | [🔗](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.01116)|\n| marker | Convert PDF to markdown + JSON quickly with high accuracy | [🔗](https:\u002F\u002Fgithub.com\u002FVikParuchuri\u002Fmarker)|\n\n## Understand LLM\n| Resources |  Link | \n| --------- |  :-----: |\n| Brown, Tom B. \"Language models are few-shot learners.\" arXiv preprint arXiv:2005.14165 (2020). | [🔗](https:\u002F\u002Frosanneliu.com\u002Fdlctfs\u002Fdlct_200724.pdf) |\n| Kambhampati, Subbarao, et al. \"LLMs can't plan, but can help planning in LLM-modulo frameworks.\" arXiv preprint arXiv:2402.01817 (2024). | [🔗](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.01817) | \n\n\n# What I am learning\n\nAfter immersing myself in the recent GenAI text-based language model hype for nearly a month, I have made several observations about its performance on my specific tasks.\n\nPlease note that these observations are subjective and specific to my own experiences, and your conclusions may differ.\n\n- We need a minimum of 7B parameter models (\u003C7B) for optimal natural language understanding performance. Models with fewer parameters result in a significant decrease in performance. However, using models with more than 7 billion parameters requires a GPU with greater than 24GB VRAM (>24GB).\n- Benchmarks can be tricky as different LLMs perform better or worse depending on the task. It is crucial to find the model that works best for your specific use case. In my experience, MPT-7B is still the superior choice compared to Falcon-7B.\n- Prompts change with each model iteration. Therefore, multiple reworks are necessary to adapt to these changes. While there are potential solutions, their effectiveness is still being evaluated.\n- For fine-tuning, you need at least one GPU with greater than 24GB VRAM (>24GB). A GPU with 32GB or 40GB VRAM is recommended.\n- Fine-tuning only the last few layers to speed up LLM training\u002Ffinetuning may not yield satisfactory results. I have tried this approach, but it didn't work well.\n- Loading 8-bit or 4-bit models can save VRAM. For a 7B model, instead of requiring 16GB, it takes approximately 10GB or less than 6GB, respectively. However, this reduction in VRAM usage comes at the cost of significantly decreased inference speed. It may also result in lower performance in text understanding tasks.\n- Those who are exploring LLM applications for their companies should be aware of licensing considerations. Training a model with another model as a reference and requiring original weights is not advisable for commercial settings.\n- There are three major types of LLMs: basic (like GPT-2\u002F3), chat-enabled, and instruction-enabled. Most of the time, basic models are not usable as they are and require fine-tuning. Chat versions tend to be the best, but they are often not open-source.\n- Not every problem needs to be solved with LLMs. Avoid forcing a solution around LLMs. Similar to the situation with deep reinforcement learning in the past, it is important to find the most appropriate approach.\n- I have tried but didn't use langchains and vector-dbs. I never needed them. Simple Python, embeddings, and efficient dot product operations worked well for me.\n- LLMs do not need to have complete world knowledge. Humans also don't possess comprehensive knowledge but can adapt. LLMs only need to know how to utilize the available knowledge. It might be possible to create smaller models by separating the knowledge component.\n- The next wave of innovation might involve simulating \"thoughts\" before answering, rather than simply predicting one word after another. This approach could lead to significant advancements.\n- The overparameterization of LLMs presents a significant challenge: they tend to memorize extensive amounts of training data. This becomes particularly problematic in RAG scenarios when the context conflicts with this \"implicit\" knowledge. However, the situation escalates further when the context itself contains contradictory information. A recent survey paper comprehensively analyzes these \"knowledge conflicts\" in LLMs, categorizing them into three distinct types:\n    - Context-Memory Conflicts: Arise when external context contradicts the LLM's internal knowledge.\n        - Solution\n            - Fine-tune on counterfactual contexts to prioritize external information.\n            - Utilize specialized prompts to reinforce adherence to context\n            - Apply decoding techniques to amplify context probabilities.\n            - Pre-train on diverse contexts across documents.\n\n    - Inter-Context Conflicts: Contradictions between multiple external sources.\n        - Solution:\n            - Employ specialized models for contradiction detection.\n            - Utilize fact-checking frameworks integrated with external tools.\n            - Fine-tune discriminators to identify reliable sources.\n            - Aggregate high-confidence answers from augmented queries.\n    - Intra-Memory Conflicts: The LLM gives inconsistent outputs for similar inputs due to conflicting internal knowledge.\n        - Solution:\n            - Fine-tune with consistency loss functions.\n            - Implement plug-in methods, retraining on word definitions.\n            - Ensemble one model's outputs with another's coherence scoring.\n            - Apply contrastive decoding, focusing on truthful layers\u002Fheads.\n- The difference between PPO and DPOs: in DPO you don’t need to train a reward model anymore. Having good and bad data would be sufficient!\n- ORPO: “A straightforward and innovative reference model-free monolithic odds ratio preference optimization algorithm, ORPO, eliminating the necessity for an additional preference alignment phase. “ [Hong, Lee, Thorne (2024)](https:\u002F\u002Farxiv.org\u002Fabs\u002F2403.07691)\n- KTO: “KTO does not need preferences -- only a binary signal of whether an output is desirable or undesirable for a given input. This makes it far easier to use in the real world, where preference data is scarce and expensive.” [Ethayarajh et al (2024)](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.01306)\n\n# Contributing\nContributions are welcome! If you'd like to contribute to this project, feel free to open an issue or submit a pull request.\n\n# License\nThis project is licensed under the [MIT License](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002FLICENSE).\n\n# About The Author\n[Sunil Ghimire](https:\u002F\u002Fsunilghimire.com.np\u002F) is a NLP Engineer passionate about literature. He believes that words and data are the two most powerful tools to change the world. \n\n---\n\u003Cp align=\"center\">\n  \u003Ca href=\"https:\u002F\u002Fstar-history.com\u002F#ghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing&Date\">\n    \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fghimiresunil_LLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing_readme_58147ad582c7.png\" alt=\"Star History Chart\">\n  \u003C\u002Fa>\n\u003C\u002Fp>\n\nCreated with ❤️ by [Sunil Ghimire](https:\u002F\u002Fsunilghimire.com.np\u002F)","\u003Cdiv align=\"center\">\n  \u003Ch1>🗣️ LLM 力量中心\u003C\u002Fh1>\n  \u003Cp>\n    \u003Cp>\n  \u003Ca href=\"https:\u002F\u002Ftwitter.com\u002FGhimire12Sunil\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002Ftwitter-%231DA1F2.svg?&style=for-the-badge&logo=twitter&logoColor=white\" height=25>\u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Fghimiresunil\u002F\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002Flinkedin-%230077B5.svg?&style=for-the-badge&logo=linkedin&logoColor=white\" height=25>\u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fwww.instagram.com\u002F_tech_tutor\u002F\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002Finstagram-%23E4405F.svg?&style=for-the-badge&logo=instagram&logoColor=white\" height=25>\u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fsunilghimire.com.np\">\u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002Fwebsite-25D366?style=for-the-badge&logo=website&logoColor=white\" height=25>\u003C\u002Fa>\n  \u003C\u002Fp>\n   \u003Cp>\u003Cem>通过精选教程、最佳实践以及可用于自定义训练和推理的现成代码，释放 LLM 的潜力。\u003C\u002Fem>\u003C\u002Fp>\n\u003C\u002Fdiv>\n\n# 概述\n欢迎来到 LLM-PowerHouse，您释放大型语言模型（LLMs）在自定义训练和推理方面全部潜力的终极资源。这个 GitHub 仓库是一个全面且精心策划的指南，旨在赋能开发者、研究人员和爱好者，帮助他们充分发挥 LLM 的真正能力，构建能够突破自然语言理解边界智能应用。\n\n# 快速导航\n\n## 按目标开始\n- 🧠 学习基础知识 → [LLMs 的基础](#foundations-of-llms)\n- 🧪 训练与对齐模型 → [解锁 LLM 科学的艺术](#unlock-the-art-of-llm-science)\n- 🏭 构建生产级应用（RAG、部署、安全）→ [构建生产就绪的 LLM 应用程序](#building-production-ready-llm-applications)\n- 📚 浏览所有主题指南 → [深度文章](#in-depth-articles)\n- 💻 跳转到可运行示例 → [代码库精通：完美构建](#codebase-mastery-building-with-perfection)\n- 🗂️ 快速探索数据集 → [LLM 数据集](#llm-datasets)\n\n## 仓库结构\n- [文章](.\u002FArticles)\n- [示例代码库](.\u002Fexample_codebase)\n- [数据集](.\u002Fdataset)\n- [许可证](.\u002FLICENSE)\n\n## 完整目录\n- [LLMs 的基础](#foundations-of-llms)\n- [解锁 LLM 科学的艺术](#unlock-the-art-of-llm-science)\n- [构建生产就绪的 LLM 应用程序](#building-production-ready-llm-applications)\n- [深度文章](#in-depth-articles)\n    - [NLP](#nlp)\n    - [模型](#models)\n    - [训练](#training)\n    - [增强模型压缩：推理与训练优化策略](#enhancing-model-compression-inference-and-training-optimization-strategies)\n    - [评估指标](#evaluation-metrics)\n    - [开源 LLMs](#open-llms)\n    - [成本分析与网络可视化资源](#resources-for-cost-analysis-and-network-visualization)\n- [代码库精通：完美构建](#codebase-mastery-building-with-perfection)\n- [LLM 玩乐实验室](#llm-playlab)\n- [LLM 数据集](#llm-datasets)\n- [LLM 对齐](#llm-alignment)\n- [数据生成](#data-generation)\n- [我正在学习的内容](#what-i-am-learning)\n- [贡献](#contributing)\n- [许可证](#license)\n- [关于作者](#about-the-author)\n\n## LLMs 的基础\n\n本节提供了关于数学、Python 和神经网络的基础知识。这可能不是理想的起点，但您可以在需要时随时查阅。\n\n\u003Cdetails>\n\u003Csummary>⬇️ 准备好掌握 LLMs 的基础了吗？ ⬇️ \u003C\u002Fsummary>\n\n```mermaid\ngraph LR\n    Foundations[\"📚 大型语言模型（LLMs）的基础\"] --> ML[\"1️⃣ 机器学习中的数学\"]\n    Foundations[\"📚 大型语言模型（LLMs）的基础\"] --> Python[\"2️⃣ 用于机器学习的 Python\"]\n    Foundations[\"📚 大型语言模型（LLMs）的基础\"] --> NN[\"3️⃣ 神经网络\"]\n    Foundations[\"📚 大型语言模型（LLMs）的基础\"] --> NLP[\"4️⃣ 自然语言处理（NLP）\"]\n    \n    ML[\"1️⃣ 机器学习中的数学\"] --> LA[\"📐 线性代数\"]\n    ML[\"1️⃣ 机器学习中的数学\"] --> Calculus[\"📏 微积分\"]\n    ML[\"1️⃣ 机器学习中的数学\"] --> Probability[\"📊 概率与统计\"]\n    \n    Python[\"2️⃣ 用于机器学习的 Python\"] --> PB[\"🐍 Python 基础\"]\n    Python[\"2️⃣ 用于机器学习的 Python\"] --> DS[\"📊 数据科学库\"]\n    Python[\"2️⃣ 用于机器学习的 Python\"] --> DP[\"🔄 数据预处理\"]\n    Python[\"2️⃣ 用于机器学习的 Python\"] --> MLL[\"🤖 机器学习库\"]\n    \n    NN[\"3️⃣ 神经网络\"] --> Fundamentals[\"🔧 基础知识\"]\n    NN[\"3️⃣ 神经网络\"] --> TO[\"⚙️ 训练与优化\"]\n    NN[\"3️⃣ 神经网络\"] --> Overfitting[\"📉 过拟合\"]\n    NN[\"3️⃣ 神经网络\"] --> MLP[\"🧠 MLP 的实现\"]\n    \n    NLP[\"4️⃣ 自然语言处理（NLP）\"] --> TP[\"📝 文本预处理\"]\n    NLP[\"4️⃣ 自然语言处理（NLP）\"] --> FET[\"🔍 特征提取技术\"]\n    NLP[\"4️⃣ 自然语言处理（NLP）\"] --> WE[\"🌐 词嵌入\"]\n    NLP[\"4️⃣ 自然语言处理（NLP）\"] --> RNN[\"🔄 循环神经网络\"]\n\n```\n\n### 1. 机器学习中的数学\n\n在掌握机器学习之前，理解支撑这些算法的基础数学概念至关重要。\n\n| 概念                    | 描述 |\n|----------------------------|-------------|\n| **线性代数**         | 对于理解许多算法，尤其是深度学习中的算法，至关重要。关键概念包括向量、矩阵、行列式、特征值、特征向量、向量空间和线性变换。 |\n| **微积分**               | 在许多机器学习算法中，优化连续函数时非常重要。重要主题包括导数、积分、极限、级数、多元微积分和梯度。 |\n| **概率与统计** | 理解模型如何从数据中学习并进行预测的关键。关键概念涵盖概率论、随机变量、概率分布、期望、方差、协方差、相关性、假设检验、置信区间、最大似然估计和贝叶斯推断。 |\n\n#### 拓展阅读\n\n| 参考资料 | 描述 | 链接 |\n| --- | --- | :---: |\n| 3Blue1Brown - 线性代数的本质 | 提供一系列视频，以几何直观的方式讲解线性代数的基本概念。 | [🔗](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=fNk_zzaMoSs&list=PLZHQObOWTQDPD3MizzM2xVFitgF8hE_ab) |\n| StatQuest with Josh Starmer - 统计学基础 | 通过视频教程，清晰简洁地解释各种统计学概念。 | [🔗](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=qBigTkBLU6g&list=PLblh5JKOoLUK0FLuzwntyYI10UQFUhsY9) |\n| AP Statistics Intuition by Ms Aerin | 整理了一系列Medium文章，提供对不同概率分布的直观见解。 | [🔗](https:\u002F\u002Fautomata88.medium.com\u002Flist\u002Fcacc224d5e7d) |\n| Immersive Linear Algebra | 以另一种视觉化方式呈现线性代数概念，帮助理解。 | [🔗](https:\u002F\u002Fimmersivemath.com\u002Fila\u002Flearnmore.html) |\n| 可汗学院 - 线性代数 | 专为初学者设计，提供线性代数基本主题的直观解释。 | [🔗](https:\u002F\u002Fwww.khanacademy.org\u002Fmath\u002Flinear-algebra) |\n| 可汗学院 - 微积分 | 提供全面覆盖微积分要点的互动课程。 | [🔗](https:\u002F\u002Fwww.khanacademy.org\u002Fmath\u002Fcalculus-1) |\n| 可汗学院 - 概率与统计 | 提供易于理解的材料，用于学习概率与统计概念。 | [🔗](https:\u002F\u002Fwww.khanacademy.org\u002Fmath\u002Fstatistics-probability) |\n\n### 2. 机器学习中的Python\n\n| 概念 | 描述 |\n| --- | --- |\n| **Python基础** | 掌握Python编程需要理解其基本语法、数据类型、错误处理以及面向对象编程原则。 |\n| **数据科学库** | 熟悉NumPy（用于数值计算）、Pandas（用于数据操作）以及Matplotlib和Seaborn（用于数据可视化）等核心库，对于高效的数据分析至关重要。 |\n| **数据预处理** | 这一阶段涉及特征缩放、缺失值处理、异常值检测、类别型数据编码，以及将数据划分为训练集、验证集和测试集等关键任务，以确保数据质量和模型性能。 |\n| **机器学习库** | 熟练使用Scikit-learn这一全面的机器学习库是必不可少的。理解和实现线性回归、逻辑回归、决策树、随机森林、K近邻（K-NN）和K均值聚类等算法，对于构建预测模型至关重要。此外，熟悉PCA和t-SNE等降维技术，有助于有效可视化复杂的数据结构。 |\n\n#### 拓展阅读\n\n| 参考资料 | 描述 | 链接 |\n| --- | --- | :---: |\n| Real Python | 一个全面的资源，提供针对初学者和高级用户的Python概念文章和教程。 | [🔗](https:\u002F\u002Frealpython.com\u002F) |\n| freeCodeCamp - 学习Python | 一段较长的视频，全面介绍所有核心Python概念。 | [🔗](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=rfscVS0vtbw) |\n| Python数据科学手册 | 一本免费的电子书，是学习Pandas、NumPy、Matplotlib和Seaborn的绝佳资源。 | [🔗](https:\u002F\u002Fjakevdp.github.io\u002FPythonDataScienceHandbook\u002F) |\n| freeCodeCamp - 人人都能学的机器学习 | 为初学者提供的关于各种机器学习算法的实用入门课程。 | [🔗](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=i_LwzRVP7bg) |\n| Udacity - 机器学习导论 | 面向初学者的机器学习入门课程，涵盖基础算法。 | [🔗](https:\u002F\u002Fwww.udacity.com\u002Fcourse\u002Fintro-to-machine-learning--ud120) |\n\n\n### 3. 神经网络\n\n| 概念                    | 描述 |\n|----------------------------|-------------|\n| 基础知识 | 理解神经网络的基本结构，包括层、权重、偏置以及sigmoid、tanh和ReLU等激活函数。 |\n| 训练与优化 | 学习反向传播以及均方误差（MSE）和交叉熵等各种损失函数。熟悉梯度下降、随机梯度下降、RMSprop和Adam等优化算法。 |\n| 过拟合 | 掌握过拟合的概念——即模型在训练数据上表现良好，但在未见过的数据上表现较差——并探索如dropout、L1\u002FL2正则化、早停和数据增强等正则化技术，以缓解过拟合问题。 |\n| 实现多层感知机（MLP） | 使用PyTorch构建一个多层感知机（MLP），也称为全连接网络。 |\n\n#### 拓展阅读\n\n| 参考资料 | 描述 | 链接 |\n| --- | --- | :---: |\n| 3Blue1Brown - 但什么是神经网络？ | 本视频以直观的方式解释神经网络及其内部工作原理。 | [🔗](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=aircAruvnKk) |\n| freeCodeCamp - 深度学习速成课 | 该视频高效地介绍了深度学习中最重要的一些概念。 | [🔗](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=CS4cs9xVecg) |\n| Fast.ai - 实用深度学习 | 一门专为有编程经验、希望学习深度学习的人设计的免费课程。 | [🔗](https:\u002F\u002Fcourse.fast.ai\u002F) |\n| Patrick Loeber - PyTorch教程 | 一系列面向完全初学者的视频，帮助学习PyTorch。 | [🔗](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PLqnslRFeH2UrcDBWF5mfPGpqQDSta6VK4) |\n\n### 4. 自然语言处理（NLP）\n\n| 概念 | 描述 |\n|----------------------------|-------------|\n| 文本预处理 | 学习各种文本预处理步骤，例如分词（将文本拆分为单词或句子）、词干提取（将单词还原为词根形式）、词形还原（与词干提取类似，但考虑上下文）以及停用词移除。 |\n| 特征提取技术 | 熟悉将文本数据转换为机器学习算法可理解格式的技术。主要方法包括词袋模型（BoW）、词频-逆文档频率（TF-IDF）和n-gram。 |\n| 词嵌入 | 理解词嵌入，这是一种词表示方法，能够使语义相似的词语具有相似的表示。主要方法包括Word2Vec、GloVe和FastText。 |\n| 循环神经网络（RNNs） | 学习RNN，一种专为处理序列数据设计的神经网络，并探索LSTM和GRU这两种能够学习长期依赖关系的RNN变体。 |\n\n\n#### 深入探索\n\n| 参考资料 | 描述 | 链接 |\n| --- | --- | :---: |\n| RealPython - 使用spaCy进行Python中的NLP | 一篇关于在Python中使用spaCy库进行NLP任务的详尽指南。 | [🔗](https:\u002F\u002Frealpython.com\u002Fnatural-language-processing-spacy-python\u002F) |\n| Kaggle - NLP指南 | 一系列笔记本和资源，提供Python中NLP的实践性讲解。 | [🔗](https:\u002F\u002Fwww.kaggle.com\u002Flearn\u002Fnatural-language-processing) |\n| Jay Alammar - 图解Word2Vec | 一份详细参考资料，用于理解Word2Vec架构。 | [🔗](https:\u002F\u002Fjalammar.github.io\u002Fillustrated-word2vec\u002F) |\n| Jake Tae - 从零开始的PyTorch RNN | 在PyTorch中对RNN、LSTM和GRU模型的实用且直接的实现。 | [🔗](https:\u002F\u002Fwww.youtube.com\u002Fplaylist?list=PLhhyoLH6IjfwU8t8_eElRxa2GvGv8vV_h) |\n| colah的博客 - 理解LSTM网络 | 一篇解释LSTM网络的理论文章。 | [🔗](https:\u002F\u002Fcolah.github.io\u002Fposts\u002F2015-08-Understanding-LSTMs\u002F) |\n\n\u003C\u002Fdetails>\n\u003Cbr>\n\n\n## 解锁LLM科学的艺术\n\n在本课程模块中，参与者将深入掌握通过前沿方法创建顶级LLM的技术。\n\n\u003Cdetails>\n\u003Csummary>⬇️ 准备好迎接LLM科学了吗？ ⬇️ \u003C\u002Fsummary>\n\n```mermaid\ngraph LR\n    Scientist[\"LLM科学的艺术 👩‍🔬\"] --> Architecture[\"LLM架构 🏗️\"]\n    Scientist[\"LLM科学的艺术 👩‍🔬\"] --> Instruction[\"构建指令数据集 📚\"]\n    Scientist[\"LLM科学的艺术 👩‍🔬\"] --> Pretraining[\"预训练模型 🛠️\"]\n    Scientist[\"LLM科学的艺术 👩‍🔬\"] --> FineTuning[\"监督微调 🎯\"]\n    Scientist[\"LLM科学的艺术 👩‍🔬\"] --> RLHF[\"RLHF 🔍\"]\n    Scientist[\"LLM科学的艺术 👩‍🔬\"] --> Evaluation[\"评估 📊\"]\n    Scientist[\"LLM科学的艺术 👩‍🔬\"] --> Quantization[\"量化 ⚖️\"]\n    Scientist[\"LLM科学的艺术 👩‍🔬\"] --> Trends[\"新趋势 📈\"]\n    Architecture[\"LLM架构 🏗️\"] --> HLV[\"高层次视图 🔍\"]\n    Architecture[\"LLM架构 🏗️\"] --> Tokenization[\"分词 🔠\"]\n    Architecture[\"LLM架构 🏗️\"] --> Attention[\"注意力机制 🧠\"]\n    Architecture[\"LLM架构 🏗️\"] --> Generation[\"文本生成 ✍️\"]\n    Instruction[\"构建指令数据集 📚\"] --> Alpaca[\"类似Alpaca的数据集 🦙\"]\n    Instruction[\"构建指令数据集 📚\"] --> Advanced[\"高级技术 📈\"]\n    Instruction[\"构建指令数据集 📚\"] --> Filtering[\"数据过滤 🔍\"]\n    Instruction[\"构建指令数据集 📚\"] --> Prompt[\"提示模板 📝\"]\n    Pretraining[\"预训练模型 🛠️\"] --> Pipeline[\"数据流水线 🚀\"]\n    Pretraining[\"预训练模型 🛠️\"] --> CLM[\"因果语言建模 📝\"]\n    Pretraining[\"预训练模型 🛠️\"] --> Scaling[\"规模法则 📏\"]\n    Pretraining[\"预训练模型 🛠️\"] --> HPC[\"高性能计算 💻\"]\n    FineTuning[\"监督微调 🎯\"] --> Full[\"完全微调 🛠️\"]\n    FineTuning[\"监督微调 🎯\"] --> Lora[\"LoRA和QLoRA 🌀\"]\n    FineTuning[\"监督微调 🎯\"] --> Axoloti[\"Axoloti 🦠\"]\n    FineTuning[\"监督微调 🎯\"] --> DeepSpeed[\"DeepSpeed ⚡\"]\n    RLHF[\"RLHF 🔍\"] --> Preference[\"偏好数据集 📝\"]\n    RLHF[\"RLHF 🔍\"] --> Optimization[\"近端策略优化 🎯\"]\n    RLHF[\"RLHF 🔍\"] --> DPO[\"直接偏好优化 📈\"]\n    Evaluation[\"评估 📊\"] --> Traditional[\"传统指标 📏\"]\n    Evaluation[\"评估 📊\"] --> General[\"通用基准 📈\"]\n    Evaluation[\"评估 📊\"] --> Task[\"特定任务基准 📋\"]\n    Evaluation[\"评估 📊\"] --> HF[\"人工评估 👩‍🔬\"]\n    Quantization[\"量化 ⚖️\"] --> Base[\"基础技术 🛠️\"]\n    Quantization[\"量化 ⚖️\"] --> GGUF[\"GGUF和llama.cpp 🐐\"]\n    Quantization[\"量化 ⚖️\"] --> GPTQ[\"GPTQ和EXL2 🤖\"]\n    Quantization[\"量化 ⚖️\"] --> AWQ[\"AWQ 🚀\"]\n    Trends[\"新趋势 📈\"] --> Positional[\"位置嵌入 🎯\"]\n    Trends[\"新趋势 📈\"] --> Merging[\"模型合并 🔄\"]\n    Trends[\"新趋势 📈\"] --> MOE[\"专家混合模型 🎭\"]\n    Trends[\"新趋势 📈\"] --> Multimodal[\"多模态模型 📷\"]\n```\n\n### 1. 大语言模型架构 🏗️\n\n对 Transformer 架构的概述，重点介绍输入（标记）和输出（logits），以及理解原始注意力机制及其改进版本的重要性。\n\n| 概念 | 描述 |\n| --- | --- |\n| Transformer 架构（高层次） | 回顾编码器-解码器型 Transformer，特别是现代大语言模型中使用的仅解码器 GPT 架构。 |\n| 分词 | 理解如何将原始文本转换为模型可处理的标记（单词或子词）。 |\n| 注意力机制 | 掌握注意力背后的理论，包括自注意力和缩放点积注意力，这些机制使模型在生成输出时能够关注输入的相关部分。|\n| 文本生成 | 学习模型用于生成输出序列的不同方法。常见策略包括贪心解码、束搜索、top-k 采样和核采样。|\n\n#### 深入探索\n\n| 参考资料 | 描述 | 链接 |\n| --- | --- | :---: |\n| Jay Alammar 的《图解 Transformer》 | 对 Transformer 模型的可视化且直观的解释 | [🔗](https:\u002F\u002Fjalammar.github.io\u002Fillustrated-transformer\u002F) |\n| Jay Alammar 的《图解 GPT-2》 | 专注于 GPT 架构，与 Llama 的架构类似。 | [🔗](https:\u002F\u002Fjalammar.github.io\u002Fillustrated-gpt2\u002F) |\n| 3Blue1Brown 的 Transformer 视觉入门 | 简单的 Transformer 视觉介绍 | [🔗](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=wjZofJX0v4M&t=187s) |\n| Brendan Bycroft 的 LLM 可视化 | LLM 内部结构的 3D 可视化 | [🔗](https:\u002F\u002Fbbycroft.net\u002Fllm) |\n| Andrej Karpathy 的 nanoGPT | 从零开始重新实现 GPT（面向程序员） | [🔗](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=kCc8FmEb1nY) |\n| 大语言模型中的解码策略 | 提供解码策略的代码和可视化 | [🔗](https:\u002F\u002Fmlabonne.github.io\u002Fblog\u002Fposts\u002F2023-06-07-Decoding_strategies.html) |\n\n\n### 2. 构建指令数据集 📚\n\n虽然从维基百科和其他网站上很容易找到原始数据，但在实际环境中收集指令与答案的配对却很困难。与传统机器学习一样，数据集的质量会直接影响模型的质量，因此它可能是微调过程中最重要的组成部分。\n\n| 概念 | 描述 |\n| --- | --- |\n| 类 Alpaca 数据集 | 这种数据集生成方法利用 OpenAI API（GPT）从头合成数据，允许指定种子和系统提示，以促进数据集的多样性。 |\n| 高级技术 | 深入研究使用 [Evol-Instruct](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.12244) 增强现有数据集的方法，并探索生成顶级合成数据的途径，类似于 [Orca](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.02707) 和 [phi-1](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.11644) 研究论文中所描述的方法。|\n| 数据过滤 | 采用传统的正则表达式、近似重复项去除以及优先选择具有大量标记的答案等技术来优化数据集。|\n| 提示模板 | 认识到目前尚无明确的标准来规范指令和响应的结构，强调熟悉各种聊天模板的重要性，例如 [ChatML](https:\u002F\u002Flearn.microsoft.com\u002Fen-us\u002Fazure\u002Fai-services\u002Fopenai\u002Fhow-to\u002Fchatgpt?tabs=python&pivots=programming-language-chat-ml) 和 [Alpaca](https:\u002F\u002Fcrfm.stanford.edu\u002F2023\u002F03\u002F13\u002Falpaca.html)。|\n\n#### 深入探索\n\n| 参考资料 | 描述 | 链接 |\n| --- | --- | :---:|\n| Thomas Capelle 的《为指令微调准备数据集》 | 探讨 Alpaca 和 Alpaca-GPT4 数据集，并讨论格式化方法。 | [🔗](https:\u002F\u002Fwandb.ai\u002Fcapecape\u002Falpaca_ft\u002Freports\u002FHow-to-Fine-Tune-an-LLM-Part-1-Preparing-a-Dataset-for-Instruction-Tuning--Vmlldzo1NTcxNzE2) |\n| Solano Todeschini 的《生成临床指令数据集》 | 提供使用 GPT-4 创建合成指令数据集的教程。 | [🔗](https:\u002F\u002Fmedium.com\u002Fmlearning-ai\u002Fgenerating-a-clinical-instruction-dataset-in-portuguese-with-langchain-and-gpt-4-6ee9abfa41ae) |\n| Kshitiz Sahay 的《使用 GPT 3.5 进行新闻分类》 | 展示如何使用 GPT 3.5 创建用于在新闻分类任务中微调 Llama 2 的指令数据集。 | [🔗](https:\u002F\u002Fmedium.com\u002F@kshitiz.sahay26\u002Fhow-i-created-an-instruction-dataset-using-gpt-3-5-to-fine-tune-llama-2-for-news-classification-ed02fe41c81f) |\n| 用于微调 LLM 的数据集创建 | 包含过滤数据集并上传结果的技术的笔记本。 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FDataset_creation_for_fine_tuning_LLM.ipynb) |\n| Matthew Carrigan 的聊天模板 | Hugging Face 关于提示模板的页面 | [🔗](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Fchat-templates) |\n\n### 3. 预训练模型 🛠️\n\n预训练过程既耗时又昂贵，因此并非本课程的重点。虽然掌握预训练的基础知识很有帮助，但在这一领域获得实践经验并不是必需的。\n\n| 概念 | 描述 |\n| --- | --- |\n| 数据流水线 | 预训练涉及处理海量数据集，例如 [Llama 2](https:\u002F\u002Farxiv.org\u002Fabs\u002F2307.09288) 中使用的 2 万亿个 token，这需要执行诸如过滤、分词和词汇表准备等任务。 |\n| 因果语言建模 | 理解因果语言建模与掩码语言建模之间的区别，包括相应的损失函数。通过 [Megatron-LM](https:\u002F\u002Fgithub.com\u002FNVIDIA\u002FMegatron-LM) 或 [gpt-neox](https:\u002F\u002Fgithub.com\u002FEleutherAI\u002Fgpt-neox) 等资源探索高效的预训练技术。 |\n| 扩展定律 | 深入了解 [扩展定律](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2001.08361)，它根据模型规模、数据集大小以及训练过程中所用的计算资源等因素，阐明了预期的模型性能。 |\n| 高性能计算 | 虽然超出了本次讨论的范围，但对于那些考虑从头构建自己的 LLM 的人来说，深入理解 HPC 将变得至关重要，包括硬件选择和分布式工作负载管理等方面。 |\n\n#### 更多探索\n\n| 参考资料 | 描述 | 链接 |\n| --- | --- | :---:|\n| Junhao Zhao 的 LLMDataHub | 提供精心策划的数据集集合，专为预训练、微调和 RLHF 设计。 | [🔗](https:\u002F\u002Fgithub.com\u002FZjh-819\u002FLLMDataHub) |\n| Hugging Face 的从零开始训练因果语言模型 | 引导用户使用 transformers 库从头开始对 GPT-2 模型进行预训练。 | [🔗](https:\u002F\u002Fhuggingface.co\u002Flearn\u002Fnlp-course\u002Fchapter7\u002F6?fw=pt) |\n| Zhang 等人的 TinyLlama | 提供了从零开始训练 Llama 模型的过程见解，有助于全面理解。 | [🔗](https:\u002F\u002Fgithub.com\u002Fjzhang38\u002FTinyLlama) |\n| Hugging Face 的因果语言建模 | 探讨因果语言建模与掩码语言建模的区别，并提供高效微调 DistilGPT-2 模型的教程。 | [🔗](https:\u002F\u002Fhuggingface.co\u002Fdocs\u002Ftransformers\u002Ftasks\u002Flanguage_modeling) |\n| nostalgebraist 的《Chinchilla 的深远影响》 | 深入探讨扩展定律及其对 LLM 的影响，提供了关于其更广泛意义的宝贵见解。 | [🔗](https:\u002F\u002Fwww.lesswrong.com\u002Fposts\u002F6Fpvch8RR29qLEWNH\u002Fchinchilla-s-wild-implications) |\n| BigScience 的 BLOOM | 对 BLOOM 模型的构建进行了全面概述，提供了有关其工程实现及所面临挑战的重要见解。 | [🔗](https:\u002F\u002Fbigscience.notion.site\u002FBLOOM-BigScience-176B-Model-ad073ca07cdf479398d5f95d88e218c4) |\n| Meta 的 OPT-175 日志 | 提供了研究日志，详细记录了在预训练一个拥有 1750 亿参数的大语言模型过程中遇到的成功与失败。 | [🔗](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fmetaseq\u002Fblob\u002Fmain\u002Fprojects\u002FOPT\u002Fchronicles\u002FOPT175B_Logbook.pdf) |\n| LLM 360 | 展示了一个面向开源 LLM 的综合框架，涵盖训练和数据准备代码、数据集、评估指标以及模型。 | [🔗](https:\u002F\u002Fwww.llm360.ai\u002F) |\n\n### 4. 监督微调 🎯\n预训练模型被训练用来预测下一个词，因此它们并不适合作为助手。但通过 SFT，你可以调整它们以更好地遵循指令。此外，你还可以在不同的数据上对其进行微调，甚至可以使用 GPT-4 从未见过的私有数据，并且无需依赖 OpenAI 等公司的付费 API 即可使用这些模型。\n\n| 概念 | 描述 |\n| --- | --- |\n| 完全微调 | 完全微调是指训练模型中的所有参数，尽管这种方法效率不高，但可以获得略微更好的效果。 |\n| [LoRA](https:\u002F\u002Farxiv.org\u002Fabs\u002F2106.09685) | LoRA 是一种基于低秩适配器的参数高效技术 (PEFT)，它只专注于训练这些适配器，而不是整个模型的所有参数。 |\n| [QLoRA](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.14314) | QLoRA 是另一种源自 LoRA 的 PEFT 技术，它还将模型权重量化为 4 位，并引入分页优化器来有效管理内存峰值。 |\n| [Axolotl](https:\u002F\u002Fgithub.com\u002FOpenAccess-AI-Collective\u002Faxolotl) | Axolotl 是一款用户友好且功能强大的微调工具，已被广泛应用于众多最先进的开源模型中。 |\n| [DeepSpeed](https:\u002F\u002Fwww.deepspeed.ai\u002F) | DeepSpeed 可以在多 GPU 和多节点环境中高效地进行大型语言模型的预训练和微调，通常与 Axolotl 集成以提升性能。 |\n\n#### 更多探索\n| 参考资料 | 描述 | 链接 |\n| --- | --- | :---: |\n| Alpin 的新手 LLM 训练指南 | 提供了微调 LLM 所需的基本概念和参数概述。 | [🔗](https:\u002F\u002Frentry.org\u002Fllm-training)|\n| Sebastian Raschka 的 LoRA 洞察 | 提供了关于 LoRA 的实用见解，并指导如何选择最佳参数。 | [🔗](https:\u002F\u002Flightning.ai\u002Fpages\u002Fcommunity\u002Flora-insights\u002F)|\n| 微调你自己的 Llama 2 模型 | 提供了一份使用 Hugging Face 库微调 Llama 2 模型的实践教程。 | [🔗](https:\u002F\u002Fmlabonne.github.io\u002Fblog\u002Fposts\u002FFine_Tune_Your_Own_Llama_2_Model_in_a_Colab_Notebook.html)|\n| Benjamin Marie 的《为大型语言模型填充数据》 | 阐述了在因果语言模型中填充训练样本的最佳实践。 | [🔗](https:\u002F\u002Ftowardsdatascience.com\u002Fpadding-large-language-models-examples-with-llama-2-199fb10df8ff)|\n\n### RLHF 🔍\n在监督微调之后，RLHF 是使大型语言模型的响应与人类期望相一致的关键步骤。这涉及从人类或人工智能反馈中获取偏好，从而减少偏差、实施模型审查或促进更具功利性的行为。RLHF 显然比 SFT 更为复杂，通常被视为可选步骤。\n\n| 概念 | 描述 |\n| --- | --- |\n| 偏好数据集 | 通常包含若干答案及某种形式的排序，这类数据集比指令数据集更难构建。 |\n| [近端策略优化](https:\u002F\u002Farxiv.org\u002Fabs\u002F1707.06347) | 该算法利用奖励模型预测给定文本是否受到人类的高度评价。随后，它基于 KL 散度惩罚来优化 SFT 模型。 |\n| [直接偏好优化](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.18290) | DPO 将这一过程简化为分类问题。它使用参考模型而非奖励模型（无需训练），且仅需一个超参数，因此更加稳定和高效。\n\n#### 深入探索\n\n| 参考文献 | 描述 | 链接 |\n| --- | --- | :---: |\n| Ayush Thakur 的《使用 RLHF 训练 LLM 简介》 | 解释了为何 RLHF 有助于减少 LLM 中的偏差并提升性能。| [🔗](https:\u002F\u002Fwandb.ai\u002Fayush-thakur\u002FIntro-RLAIF\u002Freports\u002FAn-Introduction-to-Training-LLMs-Using-Reinforcement-Learning-From-Human-Feedback-RLHF---VmlldzozMzYyNjcy)|\n| Hugging Face 的 RLHF 示例 | 介绍 RLHF，包括奖励模型的训练以及通过强化学习进行微调。 | [🔗](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Frlhf)|\n| Hugging Face 的 StackLLaMA | 使用 transformers 库通过 RLHF 高效对齐 LLaMA 模型的教程 | [🔗](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Fstackllama)|\n| Sebastian Rashcka 的《LLM 训练中的 RLHF 及其替代方案》 | 概述 RLHF 流程及 RLAIF 等替代方案。 | [🔗](https:\u002F\u002Fmagazine.sebastianraschka.com\u002Fp\u002Fllm-training-rlhf-and-its-alternatives)|\n| 使用 DPO 微调 Llama2 | 使用 DPO 微调 Llama2 模型的教程 | [🔗](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Fdpo-trl)|\n\n### 6. 评估 📊\n评估大型语言模型往往是整个流程中被忽视的一个环节，其特点是耗时且可靠性一般。您的评估标准应根据下游任务量身定制，同时需牢记古德哈特定律：“当某项指标成为目标时，它就不再是好的指标了。”\n\n| 概念 | 描述 |\n| --- | --- |\n| 传统指标 | 困惑度和 BLEU 分数等指标虽然因其上下文局限性而如今不太受青睐，但它们对于理解模型及其适用场景仍然至关重要。 |\n| 通用基准 | 对于 ChatGPT 等通用大型语言模型而言，主要基准是 [Open LLM Leaderboard](https:\u002F\u002Fhuggingface.co\u002Fspaces\u002FHuggingFaceH4\u002Fopen_llm_leaderboard)，该榜单基于 [Language Model Evaluation Harness](https:\u002F\u002Fgithub.com\u002FEleutherAI\u002Flm-evaluation-harness)。其他著名的基准还包括 BigBench 和 MT-Bench。 |\n| 任务特定基准 | 总结、翻译和问答等任务拥有专门的基准、指标，甚至子领域（如医学、金融），例如用于生物医学问答的 [PubMedQA](https:\u002F\u002Fpubmedqa.github.io\u002F)。 |\n| 人工评估 | 最可靠的评估方法是用户接受率或人机对比指标。此外，借助 [LangSmith](https:\u002F\u002Fdocs.smith.langchain.com\u002Fold\u002Fevaluation) 等工具记录用户反馈及聊天轨迹，有助于 pinpoint 潜在的改进方向。\n\n#### 进一步评估\n\n| 参考文献 | 描述 | 链接 |\n| --- | --- | :---: |\n| Hugging Face 的固定长度模型困惑度 | 提供困惑度的概述，并附带使用 transformers 库实现困惑度的代码。 | [🔗](https:\u002F\u002Fhuggingface.co\u002Fdocs\u002Ftransformers\u002Fperplexity)|\n| Rachael Tatman 的“自担风险使用 BLEU” | 深入探讨 BLEU 分数，通过实例指出其存在的各种问题。 | [🔗](https:\u002F\u002Ftowardsdatascience.com\u002Fevaluating-text-output-in-nlp-bleu-at-your-own-risk-e8609665a213?gi=2f135d0032ac)|\n| Chang 等人的《LLM 评估综述》 | 全面阐述了应评估什么、在哪里评估以及如何评估语言模型。 | [🔗](https:\u002F\u002Farxiv.org\u002Fabs\u002F2307.03109)|\n| lmsys 的聊天机器人竞技场排行榜 | 展示了一个基于人类比较的通用语言模型 Elo 评分系统。 | [🔗](https:\u002F\u002Fhuggingface.co\u002Fspaces\u002Flmsys\u002Fchatbot-arena-leaderboard)|\n\n### 7. 量化 ⚖️\n\n量化是指将模型的权重（以及激活值）转换为更低精度的数据表示。例如，原本用16位存储的权重可以被转换为4位表示。这一技术在降低大型语言模型相关的计算和内存开销方面变得愈发重要。\n\n| 概念 | 描述 |\n| --- | --- |\n| 基础技术 | 探讨不同精度级别（FP32、FP16、INT8等），并学习如何使用如absmax和零点法等方法进行朴素量化。 |\n| GGUF 和 llama.cpp | [llama.cpp](https:\u002F\u002Fgithub.com\u002Fggerganov\u002Fllama.cpp) 及其 GGUF 格式最初是为了在 CPU 上运行而设计的，如今已成为在消费级硬件上运行 LLM 的热门工具。 |\n| GPTQ 和 EXL2 | [GPTQ](https:\u002F\u002Farxiv.org\u002Fabs\u002F2210.17323) 及其变体 [EXL2](https:\u002F\u002Fgithub.com\u002Fturboderp\u002Fexllamav2) 格式提供了惊人的速度，但仅限于 GPU 上运行。然而，使用这些格式对模型进行量化可能非常耗时。 |\n| AWQ | 这种较新的格式相比 GPTQ 具有更高的准确性，表现为更低的困惑度，但需要显著更多的显存，并且不一定能带来更快的性能。 |\n\n#### 更多探索\n\n| 参考资料 | 描述 | 链接 |\n| --- | --- | :---: |\n| 量化简介 | 提供量化的概述，包括 absmax 和零点量化，并演示了 LLM.int8() 方法及配套代码。 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FModel%20Compression\u002FQuantization) |\n| 使用 llama.cpp 对 Llama 模型进行量化 | 提供使用 llama.cpp 和 GGUF 格式对 Llama 2 模型进行量化的方法教程。 | [🔗](https:\u002F\u002Fmlabonne.github.io\u002Fblog\u002Fposts\u002FQuantize_Llama_2_models_using_ggml.html) |\n| 使用 GPTQ 对 LLM 进行 4 位量化 | 提供使用 AutoGPTQ 和 GPTQ 算法对 LLM 进行量化的方法教程。 | [🔗](https:\u002F\u002Fmlabonne.github.io\u002Fblog\u002Fposts\u002F4_bit_Quantization_with_GPTQ.html) |\n| ExLlamaV2 | 展示如何使用 EXL2 格式对 Mistral 模型进行量化，并借助 ExLlamaV2 库运行该模型——后者被誉为运行 LLM 最快的库。 | [🔗](https:\u002F\u002Fmlabonne.github.io\u002Fblog\u002Fposts\u002FExLlamaV2_The_Fastest_Library_to_Run%C2%A0LLMs.html) |\n| FriendliAI 关于激活感知权重量化技术的解读 | 提供 AWQ 技术及其相关优势的概述。 | [🔗](https:\u002F\u002Fmedium.com\u002Ffriendliai\u002Funderstanding-activation-aware-weight-quantization-awq-boosting-inference-serving-efficiency-in-10bb0faf63a8) |\n\n\n### 8. 新趋势 📈\n\n| 概念 | 描述 |\n| --- | --- |\n| 位置嵌入 | 探讨 LLM 如何编码位置信息，重点关注相对位置编码方案，如 [RoPE](https:\u002F\u002Farxiv.org\u002Fabs\u002F2104.09864)。通过诸如 [YaRN](https:\u002F\u002Farxiv.org\u002Fabs\u002F2309.00071)（将注意力矩阵乘以温度因子）或 [ALiBi](https:\u002F\u002Farxiv.org\u002Fabs\u002F2108.12409)（根据标记距离施加注意力惩罚）等技术扩展上下文长度。 |\n| 模型合并 | 模型合并作为一种无需额外微调即可创建高性能模型的方法，近年来日益流行。广泛使用的 [mergekit](https:\u002F\u002Fgithub.com\u002Farcee-ai\u002Fmergekit) 库集成了多种合并方法，包括 SLERP、[DARE](https:\u002F\u002Farxiv.org\u002Fabs\u002F2311.03099) 和 [TIES](https:\u002F\u002Farxiv.org\u002Fabs\u002F2311.03099)。 |\n| 专家混合模型 | 以 [Mixtral](https:\u002F\u002Farxiv.org\u002Fabs\u002F2401.04088) 为代表的 MoE 架构的复兴，催生了诸如 frankenMoE 等替代方案，例如社区开发的 [Phixtral](https:\u002F\u002Fhuggingface.co\u002Fmlabonne\u002Fphixtral-2x2_8) 等模型，它们提供了经济高效且性能优越的替代选择。 |\n| 多模态模型 | 这些模型，如 [CLIP](https:\u002F\u002Fopenai.com\u002Findex\u002Fclip\u002F)、[Stable Diffusion](https:\u002F\u002Fstability.ai\u002Fstable-image) 或 [LLaVA](https:\u002F\u002Fllava-vl.github.io\u002F) 等，能够在统一的嵌入空间中处理多种输入（文本、图像、音频等），从而实现文本到图像生成等多种应用。 |\n| [glaive-function-calling-v2](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fglaiveai\u002Fglaive-function-calling-v2) | 高质量数据集，包含不同语言的指令与答案配对。\u003Cbr>可参阅 [Locutusque\u002Ffunction-calling-chatml](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FLocutusque\u002Ffunction-calling-chatml)，其中不含对话标签。 | 代理与函数调用 |\n| [Agent-FLAN](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Finternlm\u002FAgent-FLAN) | AgentInstruct、ToolBench 和 ShareGPT 数据集的混合。 | 代理与函数调用 |\n\n#### 更多探索\n\n| 参考资料 | 描述 | 链接 |\n| --- | --- | :---: |\n| EleutherAI 关于扩展 RoPE 的讨论 | 总结各种位置编码技术的文章。 | [🔗](https:\u002F\u002Fblog.eleuther.ai\u002Fyarn\u002F) |\n| Rajat Chawla 关于 YaRN 的理解 | YaRN 的入门介绍。 | [🔗](https:\u002F\u002Fmedium.com\u002F@rcrajatchawla\u002Funderstanding-yarn-extending-context-window-of-llms-3f21e3522465) |\n| 使用 mergekit 合并 LLM | 关于使用 mergekit 进行模型合并的教程。 | [🔗](https:\u002F\u002Fmlabonne.github.io\u002Fblog\u002Fposts\u002F2024-01-08_Merge_LLMs_with_mergekit.html) |\n| Hugging Face 解释专家混合模型 | 关于 MoE 及其工作原理的全面指南。 | [🔗](https:\u002F\u002Fhuyenchip.com\u002F2023\u002F10\u002F10\u002Fmultimodal.html) |\n| Chip Huyen 关于大型多模态模型的探讨： | 多模态系统及其领域内最新进展的概述。 | [🔗](https:\u002F\u002Fhuyenchip.com\u002F2023\u002F10\u002F10\u002Fmultimodal.html) |\n\u003C\u002Fdetails>\n\u003Cbr>\n\n## 构建生产就绪的LLM应用\n\n学习如何创建和部署强大的LLM驱动的应用程序，重点在于模型增强以及适用于生产环境的实用部署策略。\n\n\u003Cdetails>\n\u003Csummary>⬇️ 准备好构建生产就绪的LLM应用了吗？⬇️ \u003C\u002Fsummary>\n\n```mermaid\ngraph LR\n    Scientist[\"生产就绪的LLM应用 👩‍🔬\"] --> Architecture[\"运行LLM 🏗️\"]\n    Scientist --> Storage[\"构建向量存储 📦\"]\n    Scientist --> Retrieval[\"检索增强生成 🔍\"]\n    Scientist --> AdvancedRAG[\"高级RAG ⚙️\"]\n    Scientist --> Optimization[\"推理优化 ⚡\"]\n    Scientist --> Deployment[\"部署LLM 🚀\"]\n    Scientist --> Secure[\"保护LLM 🔒\"]\n\n    Architecture --> APIs[\"LLM API 🌐\"]\n    Architecture --> OpenSource[\"开源LLM 🌍\"]\n    Architecture --> PromptEng[\"提示工程 💬\"]\n    Architecture --> StructOutputs[\"结构化输出 🗂️\"]\n\n    Storage --> Ingest[\"文档摄取 📥\"]\n    Storage --> Split[\"文档分割 ✂️\"]\n    Storage --> Embed[\"嵌入模型 🧩\"]\n    Storage --> VectorDB[\"向量数据库 📊\"]\n\n    Retrieval --> Orchestrators[\"编排器 🎼\"]\n    Retrieval --> Retrievers[\"检索器 🤖\"]\n    Retrieval --> Memory[\"记忆 🧠\"]\n    Retrieval --> Evaluation[\"评估 📈\"]\n\n    AdvancedRAG --> Query[\"查询构造 🔧\"]\n    AdvancedRAG --> Agents[\"智能体与工具 🛠️\"]\n    AdvancedRAG --> PostProcess[\"后处理 🔄\"]\n    AdvancedRAG --> Program[\"编程LLM 💻\"]\n\n    Optimization --> FlashAttention[\"Flash Attention ⚡\"]\n    Optimization --> KeyValue[\"键值缓存 🔑\"]\n    Optimization --> SpecDecoding[\"推测解码 🚀\"]\n\n    Deployment --> LocalDeploy[\"本地部署 🖥️\"]\n    Deployment --> DemoDeploy[\"演示部署 🎤\"]\n    Deployment --> ServerDeploy[\"服务器部署 🖧\"]\n    Deployment --> EdgeDeploy[\"边缘部署 🌐\"]\n\n    Secure --> PromptEngSecure[\"提示工程 🔐\"]\n    Secure --> Backdoors[\"后门 🚪\"]\n    Secure --> Defensive[\"防御措施 🛡️\"]\n```\n\n### 1. 运行LLM\n运行LLM可能非常耗费资源，因为对硬件要求很高。根据你的使用场景，你可以选择通过API调用模型（如GPT-4），或者在本地运行它。无论哪种方式，采用额外的提示和引导技术都可以改善并约束输出，以满足你的应用需求。\n\n| **类别**            | **详情**                                                                                                                                                                                                                                                                                                        |\n|-------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n| **LLM API**            | API提供了一种便捷的方式来部署LLM。这一领域分为私有LLM（如[OpenAI](https:\u002F\u002Fplatform.openai.com\u002F)、[Google](https:\u002F\u002Fcloud.google.com\u002Fvertex-ai\u002Fdocs\u002Fgenerative-ai\u002Flearn\u002Foverview)、[Anthropic](https:\u002F\u002Fdocs.anthropic.com\u002Fclaude\u002Freference\u002Fgetting-started-with-the-api)、[Cohere](https:\u002F\u002Fdocs.cohere.com\u002Fdocs)等）和开源LLM（如[OpenRouter](https:\u002F\u002Fopenrouter.ai\u002F)、[Hugging Face](https:\u002F\u002Fhuggingface.co\u002Finference-api)、[Together AI](https:\u002F\u002Fwww.together.ai\u002F)等）。 |\n| **开源LLM**            | [Hugging Face Hub](https:\u002F\u002Fhuggingface.co\u002Fmodels)是寻找LLM的绝佳资源。有些可以直接在[Hugging Face Spaces](https:\u002F\u002Fhuggingface.co\u002Fspaces)中运行，也可以下载后使用[LM Studio](https:\u002F\u002Flmstudio.ai\u002F)等应用程序或通过命令行界面借助[llama.cpp](https:\u002F\u002Fgithub.com\u002Fggerganov\u002Fllama.cpp)或[Ollama](https:\u002F\u002Follama.ai\u002F)在本地运行。                     |\n| **提示工程**          | 零样本提示、少样本提示、思维链以及ReAct等技术常用于提示工程。这些方法在大型模型上效果更好，但也可以适配到小型模型上。                                                                                       |\n| **结构化输出**        | 许多任务要求输出必须符合特定格式，例如严格的模板或JSON。像[LMQL](https:\u002F\u002Flmql.ai\u002F)、[Outlines](https:\u002F\u002Fgithub.com\u002Foutlines-dev\u002Foutlines)和[Guidance](https:\u002F\u002Fgithub.com\u002Fguidance-ai\u002Fguidance)这样的库可以帮助引导生成过程，以满足这些结构化要求。                       |\n\n#### 深入探索\n\n| **参考**                                                                                                          | **描述**                                                                                                           | **链接**   |\n|------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------|------------|\n| 使用Nisha Arya的LM Studio在本地运行LLM                                                                        | 关于如何使用LM Studio在本地运行LLM的简要指南。                                                            | [🔗](https:\u002F\u002Fwww.kdnuggets.com\u002Frun-an-llm-locally-with-lm-studio) |\n| DAIR.AI的提示工程指南                                                                                    | 包含示例的丰富提示技巧列表。                                                                     | [🔗](https:\u002F\u002Fwww.promptingguide.ai\u002F)                              |\n| Outlines - 快速入门                                                                                                  | 详细介绍Outlines库支持的引导式生成技术的快速入门指南。                            | [🔗](https:\u002F\u002Foutlines-dev.github.io\u002Foutlines\u002Fquickstart\u002F)        |\n| LMQL - 概述                                                                                                        | 对LMQL语言的介绍，解释其功能和用法。                                                  | [🔗](https:\u002F\u002Flmql.ai\u002Fdocs\u002Flanguage\u002Foverview.html)                |\n\n### 2. 构建向量存储\n\n构建向量存储是搭建检索增强生成（RAG）流水线的第一步。这一步骤包括加载和拆分文档，然后利用相关文本块生成向量表示（嵌入），并将这些嵌入存储起来，以供后续推理时使用。\n\n| **类别**           | **详情**                                                                                                                                                                                                                       |\n|------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n| **文档摄取**| 文档加载器是便捷的封装工具，可处理多种格式，如 PDF、JSON、HTML、Markdown 等。它们还能直接从一些数据库和 API 中获取数据（例如 GitHub、Reddit、Google Drive）。                 |\n| **文档拆分**| 文本拆分器会将文档分解为更小、语义上更有意义的块。与按固定字符数拆分文本不同，通常建议根据标题或递归方式拆分，并附加一些元数据。 |\n| **嵌入模型**   | 嵌入模型可以将文本转换为向量表示，从而提供对语言更深入、更细腻的理解，这对于执行语义搜索至关重要。                                                       |\n| **向量数据库**   | 向量数据库（如 [Chroma](https:\u002F\u002Fwww.trychroma.com\u002F)、[Pinecone](https:\u002F\u002Fwww.pinecone.io\u002F)、[Milvus](https:\u002F\u002Fmilvus.io\u002F)、[FAISS](https:\u002F\u002Ffaiss.ai\u002F)、[Annoy](https:\u002F\u002Fgithub.com\u002Fspotify\u002Fannoy) 等）用于存储嵌入向量，并能基于向量相似性高效地检索数据。 |\n\n#### 深入探索\n\n| **参考**                                                                                                           | **描述**                                                                                                           | **链接**   |\n|------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------|------------|\n| LangChain - 文本拆分器                                                                                             | LangChain 中实现的各种文本拆分器列表。                                                              | [🔗](https:\u002F\u002Fpython.langchain.com\u002Fdocs\u002Fmodules\u002Fdata_connection\u002Fdocument_transformers\u002F) |\n| Sentence Transformers 库                                                                                          | 一个流行的嵌入模型库。                                                                                   | [🔗](https:\u002F\u002Fwww.sbert.net\u002F)                              |\n| MTEB 排行榜                                                                                                       | 用于评估嵌入模型的排行榜。                                                                              | [🔗](https:\u002F\u002Fhuggingface.co\u002Fspaces\u002Fmteb\u002Fleaderboard)        |\n| Moez Ali 的“前五名向量数据库”                                                                                 | 对最佳和最受欢迎的向量数据库的比较。                                                               | [🔗](https:\u002F\u002Fwww.datacamp.com\u002Fblog\u002Fthe-top-5-vector-databases)                |\n\n### 3. 检索增强生成\n\n通过 RAG，大语言模型可以从数据库中访问相关文档，从而提高其回答的准确性。这种方法被广泛用于在无需微调的情况下扩展模型的知识库。\n\n| 类别      | 详情                                                                                                                                                                                                                                  |\n|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n| **编排器** | 编排器（如 [LangChain](https:\u002F\u002Fpython.langchain.com\u002Fdocs\u002Fget_started\u002Fintroduction)、[LlamaIndex](https:\u002F\u002Fdocs.llamaindex.ai\u002Fen\u002Fstable\u002F)、[FastRAG](https:\u002F\u002Fgithub.com\u002FIntelLabs\u002FfastRAG)等）是流行的框架，用于将您的大语言模型与工具、数据库、记忆模块等连接起来，以增强其能力。 |\n| **检索器**    | 用户指令通常并未针对检索进行优化。可以采用不同的技术（例如多查询检索器、[HyDE](https:\u002F\u002Farxiv.org\u002Fabs\u002F2212.10496)等）来改写或扩展这些指令，从而提升性能。                                                      |\n| **记忆模块**        | 为了记住之前的指令和答案，大语言模型及 ChatGPT 等聊天机器人会将这些历史记录添加到上下文窗口中。这一缓冲区可以通过摘要化（例如使用较小的 LLM）、向量存储 + RAG 等方式加以改进。                                           |\n| **评估**    | 我们需要同时评估文档检索阶段（上下文的精确性和召回率）以及生成阶段（忠实度和答案的相关性）。可以借助 [Ragas](https:\u002F\u002Fgithub.com\u002Fexplodinggradients\u002Fragas\u002Ftree\u002Fmain) 和 [DeepEval](https:\u002F\u002Fgithub.com\u002Fconfident-ai\u002Fdeepeval) 等工具来简化评估过程。                 |\n\n\n#### 更深入的探索\n\n| 参考资料                                       | 描述                                                      | 链接     |\n|-------------------------------------------------|------------------------------------------------------------------|----------|\n| Llamaindex - 高层次概念                | 构建 RAG 流程时需要了解的主要概念。               | [🔗](https:\u002F\u002Fdocs.llamaindex.ai\u002Fen\u002Fstable\u002Fgetting_started\u002Fconcepts.html) |\n| Pinecone - 检索增强               | 检索增强流程的概述。                  | [🔗](https:\u002F\u002Fwww.pinecone.io\u002Flearn\u002Fseries\u002Flangchain\u002Flangchain-retrieval-augmentation\u002F) |\n| LangChain - 基于 RAG 的问答                        | 构建典型 RAG 流程的分步教程。           | [🔗](https:\u002F\u002Fpython.langchain.com\u002Fdocs\u002Fuse_cases\u002Fquestion_answering\u002Fquickstart) |\n| LangChain - 记忆模块类型                        | 不同类型记忆模块及其适用场景的列表。         | [🔗](https:\u002F\u002Fpython.langchain.com\u002Fdocs\u002Fmodules\u002Fmemory\u002Ftypes\u002F) |\n| RAG 流程 - 指标                          | 用于评估 RAG 流程的主要指标概述。     | [🔗](https:\u002F\u002Fdocs.ragas.io\u002Fen\u002Fstable\u002Fconcepts\u002Fmetrics\u002Findex.html) |\n\n### 4. 高级 RAG\n\n现实世界的应用往往需要复杂的流程，这些流程会利用 SQL 或图数据库，并动态选择合适的工具和 API。这些高级方法能够改进基础解决方案，并提供额外的功能。\n\n| 类别            | 详情                                                                                                                                                                                                                                        |\n|---------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n| **查询构建** | 存储在传统数据库中的结构化数据需要特定的查询语言，如 SQL、Cypher、元数据等。我们可以通过查询构建直接将用户指令转换为查询语句，从而访问数据。                        |\n| **智能体与工具**    | 智能体会通过自动选择最相关的工具来辅助大语言模型给出答案。这些工具既可以是简单的 Google 或 Wikipedia，也可以是更复杂的 Python 解释器或 Jira。                                         |\n| **后处理**     | 最后一步对输入到大语言模型的内容进行处理。它通过重新排序、[RAG-fusion](https:\u002F\u002Fgithub.com\u002FRaudaschl\u002Frag-fusion) 以及分类等方式，提升检索文档的相关性和多样性。                           |\n| **程序化优化大语言模型**        | 像 [DSPy](https:\u002F\u002Fgithub.com\u002Fstanfordnlp\u002Fdspy) 这样的框架允许您以编程化的方式，基于自动化评估结果优化提示词和权重。                                                                                      |\n\n#### 更深入的探索\n\n| 参考资料                                             | 描述                                                            | 链接     |\n|-------------------------------------------------------|------------------------------------------------------------------------|----------|\n| LangChain - 查询构建                        | 关于不同类型查询构建的博客文章。                 | [🔗](https:\u002F\u002Fblog.langchain.dev\u002Fquery-construction\u002F) |\n| LangChain - SQL                                       | 使用大语言模型与 SQL 数据库交互的教程，包括文本转 SQL 以及可选的 SQL 智能体。 | [🔗](https:\u002F\u002Fpython.langchain.com\u002Fdocs\u002Fuse_cases\u002Fqa_structured\u002Fsql) |\n| Pinecone - 大语言模型智能体                                 | 不同类型智能体和工具的介绍。                 | [🔗](https:\u002F\u002Fwww.pinecone.io\u002Flearn\u002Fseries\u002Flangchain\u002Flangchain-agents\u002F) |\n| Lilian Weng 关于大语言模型驱动的自主智能体          | 更具理论性的关于大语言模型智能体的文章。                             | [🔗](https:\u002F\u002Flilianweng.github.io\u002Fposts\u002F2023-06-23-agent\u002F) |\n| LangChain - OpenAI 的 RAG                              | OpenAI 采用的 RAG 策略概述，包括后处理环节。 | [🔗](https:\u002F\u002Fblog.langchain.dev\u002Fapplying-openai-rag\u002F) |\n| DSPy 八步指南                                       | 介绍 DSPy 模块、签名和优化器的通用指南。 | [🔗](https:\u002F\u002Fdspy-docs.vercel.app\u002Fdocs\u002Fbuilding-blocks\u002Fsolving_your_task) |\n\n### 5. 推理优化\n\n文本生成是一个计算开销较大的过程，需要强大的硬件支持。除了量化之外，还有多种技术被提出用于提升吞吐量并降低推理成本。\n\n| 类别            | 详情                                                                                                                                            |\n|---------------------|----------------------------------------------------------------------------------------------------------------------------------------------------|\n| **Flash Attention** | 对注意力机制进行优化，将其复杂度从二次方降为线性，从而加速训练和推理过程。             |\n| **键值缓存**       | 理解键值缓存，以及在 [多查询注意力](https:\u002F\u002Farxiv.org\u002Fabs\u002F1911.02150) (MQA) 和 [分组查询注意力](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.13245) (GQA) 中引入的改进。 |\n| **推测解码**       | 使用小型模型生成草稿，再由大型模型进行校验，以加快文本生成速度。                                      |\n\n#### 深入探索\n\n| 参考资料                                             | 描述                                                                                                 | 链接     |\n|-------------------------------------------------------|-------------------------------------------------------------------------------------------------------------|----------|\n| Hugging Face 的 GPU 推理                         | 解释如何在 GPU 上优化推理过程。                                                                  | [🔗](https:\u002F\u002Fhuggingface.co\u002Fdocs\u002Ftransformers\u002Fmain\u002Fen\u002Fperf_infer_gpu_one) |\n| Databricks 的 LLM 推理                           | 生产环境中优化 LLM 推理的最佳实践。                                             | [🔗](https:\u002F\u002Fwww.databricks.com\u002Fblog\u002Fllm-inference-performance-engineering-best-practices) |\n| Hugging Face 的 LLM 速度与内存优化                | 介绍三种主要的优化技术和方法：量化、Flash Attention 以及架构创新。                             | [🔗](https:\u002F\u002Fhuggingface.co\u002Fdocs\u002Ftransformers\u002Fmain\u002Fen\u002Fllm_tutorial_optimization) |\n| Hugging Face 的辅助生成                           | HF 版本的推测解码，一篇有趣的博客文章，详细介绍了其工作原理及实现代码。                          | [🔗](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Fassisted-generation) |\n\n### 6. 部署 LLM\n\n大规模部署 LLM 是一项复杂的工程任务，可能需要多个 GPU 集群。然而，对于演示和本地应用来说，所需的复杂度则要低得多。\n\n| 类别            | 详情                                                                                                                                                                                                                       |\n|---------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n| **本地部署**      | 开源 LLM 相较于闭源 LLM 的一个重要优势在于隐私保护。本地 LLM 服务器（如 [LM Studio](https:\u002F\u002Flmstudio.ai\u002F)、[Ollama](https:\u002F\u002Follama.ai\u002F)、[oobabooga](https:\u002F\u002Fgithub.com\u002Foobabooga\u002Ftext-generation-webui)、[kobold.cpp](https:\u002F\u002Fgithub.com\u002FLostRuins\u002Fkoboldcpp) 等）正是利用这一优势来支持本地应用程序。 |\n| **演示部署**      | 像 [Gradio](https:\u002F\u002Fwww.gradio.app\u002F) 和 [Streamlit](https:\u002F\u002Fdocs.streamlit.io\u002F) 这样的框架，非常适合快速原型化应用并分享演示。你也可以轻松地将这些应用部署到线上，例如使用 [Hugging Face Spaces](https:\u002F\u002Fhuggingface.co\u002Fspaces)。 |\n| **服务器部署**    | 大规模部署 LLM 需要云基础设施（参见 [SkyPilot](https:\u002F\u002Fskypilot.readthedocs.io\u002Fen\u002Flatest\u002F)）或本地基础设施，并且通常会借助优化的文本生成框架，如 [TGI](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Ftext-generation-inference)、[vLLM](https:\u002F\u002Fgithub.com\u002Fvllm-project\u002Fvllm\u002Ftree\u002Fmain) 等。 |\n| **边缘部署**      | 在资源受限的环境中，高性能框架如 [MLC LLM](https:\u002F\u002Fgithub.com\u002Fmlc-ai\u002Fmlc-llm) 和 [mnn-llm](https:\u002F\u002Fgithub.com\u002Fwangzhaode\u002Fmnn-llm\u002Fblob\u002Fmaster\u002FREADME_en.md) 能够将 LLM 部署到 Web 浏览器、Android 和 iOS 设备上。 |\n\n\n#### 深入探索\n| 参考资料                                             | 描述                                                                                                        | 链接     |\n|-------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------|----------|\n| Streamlit - 构建基础 LLM 应用                     | 使用 Streamlit 制作一个类似 ChatGPT 的基础聊天应用的教程。                                                         | [🔗](https:\u002F\u002Fdocs.streamlit.io\u002Fknowledge-base\u002Ftutorials\u002Fbuild-conversational-apps) |\n| HF LLM 推理容器                                    | 使用 Hugging Face 的推理容器在 Amazon SageMaker 上部署 LLM。                                          | [🔗](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Fsagemaker-huggingface-llm) |\n| Philipp Schmid 的博客                               | 收录了大量关于使用 Amazon SageMaker 部署 LLM 的高质量文章。                                   | [🔗](https:\u002F\u002Fwww.philschmid.de\u002F) |\n| Hamel Husain 的延迟优化                            | 对 TGI、vLLM、CTranslate2 和 mlc 在吞吐量和延迟方面的比较。                                  | [🔗](https:\u002F\u002Fhamel.dev\u002Fnotes\u002Fllm\u002Finference\u002F03_inference.html) |\n\n### 7. 保障大语言模型的安全\n\n除了软件常见的安全问题外，大语言模型还面临着由其训练和提示方法所引发的独特漏洞。\n\n| 类别            | 详情                                                                                                                                                                                                                                    |\n|---------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n| **提示攻击** | 与提示工程相关的技术，包括提示注入（添加指令以改变模型的响应）、数据\u002F提示泄露（访问原始数据或提示）以及越狱（设计提示以绕过安全机制）。 |\n| **后门**       | 针对训练数据本身的攻击向量，例如用虚假信息污染训练数据，或创建后门（在推理过程中触发以改变模型行为的隐藏信号）。                                     |\n| **防御措施** | 保护大语言模型应用需要对其漏洞进行测试（如使用红队演练和 [garak](https:\u002F\u002Fgithub.com\u002Fleondz\u002Fgarak\u002F) 等工具），并在生产环境中对其进行监控（使用 [langfuse](https:\u002F\u002Fgithub.com\u002Flangfuse\u002Flangfuse) 等框架）。 |\n\n#### 深入探索\n| 参考资料                                             | 描述                                                                                                      | 链接     |\n|-------------------------------------------------------|------------------------------------------------------------------------------------------------------------------|----------|\n| OWASP LLM Top 10 by HEGO Wiki                        | 大语言模型应用中发现的十大关键漏洞列表。                                           | [🔗](https:\u002F\u002Fowasp.org\u002Fwww-project-top-10-for-large-language-model-applications\u002F) |\n| Prompt Injection Primer by Joseph Thacker            | 专为工程师准备的关于提示注入技术的简短指南。                                               | [🔗](https:\u002F\u002Fgithub.com\u002Fjthack\u002FPIPE) |\n| LLM Security by @llm_sec                              | 与大语言模型安全相关的丰富资源列表。                                                              | [🔗](https:\u002F\u002Fllmsecurity.net\u002F) |\n| Red teaming LLMs by Microsoft                         | 关于如何使用大语言模型进行红队评估的指南。                                                          | [🔗](https:\u002F\u002Flearn.microsoft.com\u002Fen-us\u002Fazure\u002Fai-services\u002Fopenai\u002Fconcepts\u002Fred-teaming) |\n\n\n\u003C\u002Fdetails>\n\n\u003Cbr>\n\n# 深度文章 \n## 自然语言处理\n\n| 文章 | 资源 |\n| -------- | :---------: |\n| 大语言模型概述 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FNLP\u002FLLMs%20Overview)|\n| NLP 嵌入 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FNLP\u002FNLP%20Embeddings)|\n| 数据预处理 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FNLP\u002FPreprocessing)|\n| 抽样 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002FArticles\u002FNLP\u002FSampling)| \n| 分词 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FNLP\u002FTokenization)|\n| Transformer 架构 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FNLP\u002FTransformer\u002FAttention%20Is%20All%20You%20Need)|\n| 面试准备 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FInterview%20Preparation)|\n\n## 模型 \n\n| 文章 | 资源 |\n| -------- | :---------: |\n| GPT（生成式预训练变压器） | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FModels\u002FGenerative%20Pre-trained%20Transformer%20(GPT))|\n\n## 训练\n\n| 文章 | 资源 |\n| -------- | :---------: |\n| 激活函数 |  [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FTraining\u002FActivation%20Function)|\n| 微调模型 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FTraining\u002FFine%20Tuning%20Models)|\n| 提升模型压缩：推理与训练优化策略 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FTraining\u002FModel%20Compression)|\n| 模型概览 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FTraining\u002FModel%20Summary)|\n| 数据集拆分 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FTraining\u002FSplitting%20Datasets)|\n| 训练损失 > 验证损失 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FTraining\u002FTrain%20Loss%20%3E%20Val%20Loss)|\n| 参数高效微调 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FTraining\u002FParameter%20Efficient%20Fine-Tuning) |\n| 梯度下降与反向传播 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FTraining\u002FGradient%20Descent%20and%20Backprop) |\n| 过拟合与欠拟合 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FTraining\u002FOverfitting%20And%20Underfitting)| \n| 梯度累积与检查点 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FTraining\u002FGradient%20Accumulation%20and%20Checkpointing)| \n| Flash Attention| [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FTraining\u002FFlash%20Attention)| \n\n## 提升模型压缩：推理与训练优化策略\n\n| 文章 | 资源 |\n| -------- | :---------: |\n| 量化 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FModel%20Compression\u002FQuantization)|\n| 量化简介 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FIntroduction_to_Weight_Quantization.ipynb)|\n| 知识蒸馏 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FModel%20Compression\u002FKnowledge%20Distillation)|\n| 剪枝 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FModel%20Compression\u002FPruning)|\n| DeepSpeed | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FModel%20Compression\u002FDeepSpeed)|\n| 分片 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FModel%20Compression\u002FSharding)|\n| 混合精度训练 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FModel%20Compression\u002FMixed%20Precision%20Training)|\n| 推理优化 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FModel%20Compression\u002FInference%20Optimization)|\n\n## 评估指标\n| 文章 | 资源 |\n| -------- | :---------: |\n| 分类 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FEvaluation%20Metrics\u002FClassification)|\n| 回归 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FEvaluation%20Metrics\u002FRegression)| \n| 生成式文本模型 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FEvaluation%20Metrics\u002FGenerative%20Text%20Models)|\n\n## 开放的LLM\n| 文章 | 资源 |\n| -------- | :---------: |\n| 用于商业用途的开源LLM领域 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FOpen%20LLMs\u002FCommercial%20Use)|\n| 用于研究用途的开源LLM领域 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FOpen%20LLMs\u002FResearch%20Use)|\n| LLM训练框架 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FOpen%20LLMs\u002FLLM%20Training%20Frameworks)|\n| 语言模型的有效部署策略 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FOpen%20LLMs\u002FDeployment)|\n| 关于LLM的教程 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FOpen%20LLMs\u002FTutorials)|\n| 关于LLM的课程 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FOpen%20LLMs\u002FCourses)|\n| 部署 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FOpen%20LLMs\u002FDeployment)|\n\n## 成本分析与网络可视化资源\n| 文章 | 资源 |\n| -------- | :---------: |\n| Lambda Labs 与 AWS 成本分析 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FResources)|\n| 神经网络可视化 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002FArticles\u002FResources\u002FNeural%20Network%20Visualization)|\n\n# 代码库精通：以完美打造 \n| 标题 | 仓库 | \n| ------- | :--------:|\n| 基于指令的数据准备（使用 OpenAI） | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002Fdata_generate_prepare)|\n| 使用 Trainer API 进行最优微调：从训练到模型推理 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002Ftrain_inference)|\n| 利用 PEFT 和 LoRA 高效微调及推理大语言模型 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002Ftrain_inference_peft_lora)|\n| 加速大语言模型的高效微调与推理 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002Ftrain_inference_accelerate)|\n| 使用 T5 进行高效微调 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002Ft5)|\n| 使用 LoRA 和 Hugging Face 训练大型语言模型 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FEfficiently_train_Large_Language_Models_with_LoRA_and_Hugging_Face.ipynb)|\n| 在 Colab 笔记本中微调您自己的 Llama 2 模型 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FFine_Tune_Your_Own_Llama_2_Model_in_a_Colab_Notebook.ipynb)|\n| 基于 LLaMA-7B 模型的 Guanaco 聊天机器人演示 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FGuanaco%20Chatbot%20Demo%20with%20LLaMA-7B%20Model.ipynb)|\n| 使用 PEFT 微调 Bloom-560m 标注器 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FPEFT%20Finetune-Bloom-560m-tagger.ipynb)|\n| 使用 BNB 和 PEFT 微调 Meta OPT-6-1b 模型 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FFinetune_Meta_OPT-6-1b_Model_bnb_peft.ipynb)|\n| 使用 BNB 自监督训练微调 Falcon-7b | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FFinetune%20Falcon-7b%20with%20BNB%20Self%20Supervised%20Training.ipynb)|\n| 使用 QLoRa 微调 LLaMa2 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FFineTune_LLAMA2_with_QLORA.ipynb)|\n| Stable Vicuna13B 8-bit 版本在 Colab 中运行 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FStable_Vicuna13B_8bit_in_Colab.ipynb)|\n| GPT-Neo-X-20B 使用 BNB 进行 2 位训练 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FGPT-neo-x-20B-bnb_4bit_training.ipynb)|\n| MPT-Instruct-30B 模型训练 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FMPT_Instruct_30B.ipynb)|\n| 针对任意模型的自定义数据集进行 RLHF 训练 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FRLHF_Training_for_CustomDataset_for_AnyModel.ipynb)|\n| 在自定义数据集上微调 Microsoft Phi 1.5b（DialogStudio） | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FFine_tuning_Microsoft_Phi_1_5b_on_custom_dataset(dialogstudio).ipynb)|\n| 微调 OpenAI GPT3.5 Turbo | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FFine_tuning_OpenAI_GPT_3_5_turbo.ipynb)|\n| 使用 Autotrain-advanced 微调 Mistral-7b 模型 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FFinetuning_Mistral_7b_Using_AutoTrain.ipynb)|\n| RAG LangChain 教程 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FRAG_LangChain.ipynb)|\n| Mistral DPO 训练器 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002Fmistral_trainer_dpo)|\n| LLM 分片 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FLLM_Sharding.ipynb)|\n| 将非结构化数据和图谱知识与 Neo4j 和 LangChain 集成，以增强问答能力 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FNeo4j_and_LangChain_for_Enhanced_Question_Answering.ipynb)|\n| vLLM 基准测试 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002Fvllm_benchmark.py)|\n| Milvus 向量数据库 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002Fvector_database)|\n| 解码策略 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002Fdecoding_strategies)|\n| PEFT QLoRa 在 SageMaker 上的训练 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002Fpeft_qlora_sm_training)|\n| 优化单个模型的 SageMaker 终端节点 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002Foptimize_single_model_sm_endpoint)|\n| 多适配器推理 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002Fmulti_adapter_inference)|\n| Inf2 LLM 的 SageMaker 部署 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002FInf2%20LLM%20SM%20Deployment)|\n| 文本分块可视化 `进行中` | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Ftree\u002Fmain\u002Fexample_codebase\u002Ftext_chunk_visaulization)|\n| 使用 ORPO 微调 Llama 3 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FFine_tune_Llama_3_with_ORPO.ipynb)|\n| 使用 GPTQ 进行 4 位 LLM 量化 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002F4_bit_LLM_Quantization_with_GPTQ.ipynb)|\n| 模型家族树 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002FModel_Family_Tree.ipynb)|\n| 使用 MergeKit 创建 MoE 模型 | [🔗](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002Fexample_codebase\u002FEfficiently%20Fine%20Tune%20LLM\u002Fmerge_model_mergekit.ipynb)|\n| 在 AWS、GCP 或 Azure 上微调 Llama 3.1 | [🔗](https:\u002F\u002Fwww.zenml.io\u002Fblog\u002Fhow-to-finetune-llama-3-1-with-zenml)|\n| 在 AWS、GCP 或 Azure 上微调 Phi 3.5 | [🔗](https:\u002F\u002Fwww.zenml.io\u002Fblog\u002Fhow-to-finetune-phi-3-5-with-zenml)|\n\n# LLM 玩乐实验室\n| LLM 项目  |仓库|\n| ------------ | :------------: |\n|CSVQConnect   | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FCSVQConnect) |\n| AI_VIRTUAL_ASSISTANT  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FAI_VIRTUAL_ASSISTANT)|\n|  DocuBot多PDF对话助手 | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FDocuBotMultiPDFConversationalAssistant)  |\n|  autogpt |  [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002Fautogpt)|\n|  meta_llama_2微调文本生成摘要 | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002F-meta_llama_2finetuned_text_generation_summarization)  |\n| 使用Llama进行文本生成|[🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002Ftext_generation_using_Llama-2\u002Ftree\u002Fmain) |\n| 使用Petal网络的LLM|[🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002Fllm_using_petals) |\n| 使用Petal网络的LLM|[🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002Fllm_using_petals) |\n| Salesforce-xgen|[🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FSalesforce-xgen)|\n| 使用Open Llama 7B进行文本摘要|[🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002Ftext_summarization_using_open_llama_7b)|\n| 使用GPT-J进行文本摘要|[🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FText_summarization_using_GPT-J)|\n| codllama  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002Fcodllama) |\n| 使用LLaVA将图像转为文本  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FImage_to_text_using_LLaVA\u002Ftree\u002Fmain) |\n| 使用Llamaindex处理表格数据  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FTabular_data_using_llamaindex) |\n| 下一个词句子预测  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002Fnextword_sentence_prediction) |\n| 使用DeciLM-7B指令版进行文本生成  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FText-Generation-using-DeciLM-7B-instruct) |\n| Gemini博客创作  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FGemini-blog-creation\u002Ftree\u002Fmain) |\n| 使用Gemini和Sheets制作节日贺卡  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FPrepare_holiday_cards_with_Gemini_and_Sheets\u002Ftree\u002Fmain) |\n| 使用phi2 LLM进行代码生成  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FCode-Generattion_using_phi2_llm) |\n| 使用Gemini的RAG  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FRAG-USING-GEMINI) |\n| 使用Gemini的多模态RAG餐厅推荐  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FResturant-Recommendation-Multi-Modal-RAG-using-Gemini) |\n| 简洁情感工具  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002Fslim-sentiment-tool) |\n| 使用LLM生成合成数据  | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FCorporate-Presentations-Synthetic-Data-Generation-Using-LLM) |\n| 构建聊天助手的架构 | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FDesign-an-Architecture-for-building-a-Chat-Assistant-for-an-ecommerce-platform) |\n| 基于查询的动态上下文LLM聊天助手 | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FLLM-CHAT-ASSISTANT-WITH-DYNAMIC-CONTEXT-BASED-ON-QUERY) |\n| 使用LLM的文本分类器 | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FAI-Powered-Text-Classifier-Harnessing-Large-Language-Models-for-Precise-Data-Categorization) |\n| 多类别情感分析 | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002Fmulti-class-sentiment-analysis-model-using-LLM) |\n| 使用GROQ进行文本生成 | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FText-Generation-Using-GROQ) |\n| 数据代理 | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FDataAgents) |\n| PandasQuery表格数据 | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FPandasQuery_tabular_data) |\n| 使用LLM进行探索性数据分析 | [🔗](https:\u002F\u002Fgithub.com\u002FSakil786\u002FExploratory_Data_Analysis_using_LLM\u002Ftree\u002Fmain) |\n\n# LLM 数据集\n\n| 数据集                                                                                                       | 数量     | 作者                      | 日期     | 备注                                                                             | 类别          |\n|------------------------------------------------------------------------------------------------------------- | -------- | ---------------------------- | -------- | --------------------------------------------------------------------------------- | ------------- |\n| [Buzz](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FH-D-T\u002FBuzz)                                                          | 3120万   | Alignment Lab AI             | 2024年5月 | 包含435个数据集的庞大集合，采用数据增强、去重等技术。                           | 通用          |\n| [WebInstructSub](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fchargoddard\u002FWebInstructSub-prometheus)                   | 239万   | Yue等                       | 2024年5月 | 通过从Common Crawl中检索文档、提取问答对并进行优化生成的指令数据集。参见[MAmmoTH2论文](https:\u002F\u002Farxiv.org\u002Fabs\u002F2405.03548)（此为子集）。 | 通用          |\n| [Bagel](https:\u002F\u002Fgithub.com\u002Fjondurbin\u002Fbagel)                                                                  | >200万?  | Jon Durbin                   | 2024年1月 | 使用余弦相似度去污的数据集集合。                                               | 通用          |\n| [Hercules v4.5](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FLocutusque\u002Fhercules-v4.5)                                    | 172万   | Sebastian Gabarain           | 2024年4月 | 包含数学、代码、角色扮演等内容的大规模通用数据集。数据集列表参见[v4](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FLocutusque\u002Fhercules-v4.0)。 | 通用          |\n| [Dolphin-2.9](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fcognitivecomputations\u002FDolphin-2.9)                              | 139万   | Cognitive Computations      | 2023年4月 | Dolphin模型使用的大型通用数据集。                                              | 通用          |\n| [WildChat-1M](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fallenai\u002FWildChat-1M)                                            | 104万   | Zhao等                      | 2023年5月 | GPT-3.5\u002F4与人类用户之间的真实对话，包含元数据。参见[WildChat论文](https:\u002F\u002Farxiv.org\u002Fabs\u002F2405.01470)。 | 通用          |\n| [OpenHermes-2.5](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fteknium\u002FOpenHermes-2.5)                                      | 100万   | Teknium                      | 2023年11月 | OpenHermes模型使用的另一份大规模数据集。                                       | 通用          |\n| [SlimOrca](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FOpen-Orca\u002FSlimOrca)                                                | 51.8万  | Lian等                      | 2023年9月 | 使用GPT-4作为评判者，剔除错误答案后精选出的[OpenOrca](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FOpen-Orca\u002FOpenOrca)子集。 | 通用          |\n| [Tulu V2 Mix](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fallenai\u002Ftulu-v2-sft-mixture)                                    | 32.6万  | Ivison等                    | 2023年11月 | 高质量数据集的混合。参见[Tulu 2论文](https:\u002F\u002Farxiv.org\u002Fabs\u002F2311.10702)。         | 通用          |\n| [UltraInteract SFT](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fopenbmb\u002FUltraInteract_sft)                                | 28.9万  | Yuan等                      | 2024年4月 | 专注于数学、编程和逻辑任务，提供分步解答。参见[Eurus论文](https:\u002F\u002Farxiv.org\u002Fabs\u002F2404.02078)。 | 通用          |\n| [NeurIPS-LLM-data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fupaya07\u002FNeurIPS-LLM-data)                                  | 20.4万  | Jindal等                    | 2023年11月 | 获得[NeurIPS LLM效率挑战赛](https:\u002F\u002Fllm-efficiency-challenge.github.io\u002F)冠军，采用有趣的预处理策略。 | 通用          |\n| [UltraChat 200k](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FHuggingFaceH4\u002Fultrachat_200k)                                | 20万    | Tunstall等、Ding等          | 2023年10月 | 对[UItraChat](https:\u002F\u002Fgithub.com\u002Fthunlp\u002FUltraChat)数据集进行深度过滤后的版本，由ChatGPT生成的140万条对话组成。 | 通用          |\n| [WizardLM_evol_instruct_V2](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fmlabonne\u002FWizardLM_evol_instruct_v2_196K-ShareGPT) | 14.3万  | Xu等                        | 2023年6月 | 将Evol-Instruct最新版本应用于Alpaca和ShareGPT数据。参见[WizardLM论文](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.12244)。 | 通用          |\n| [sft_datablend_v1](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fnvidia\u002Fsft_datablend_v1)                                   | 12.8万  | NVIDIA                       | 2024年1月 | 混合了公开可用的数据集：OASST、CodeContests、FLAN、T0、Open_Platypus以及GSM8K等共45个数据集。 | 通用          |\n| [Synthia-v1.3](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fmigtissera\u002FSynthia-v1.3)                                       | 11.9万  | Migel Tissera                | 2023年11月 | 使用GPT-4生成的高质量合成数据。                                                | 通用          |\n| [FuseChat-Mixture](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FFuseAI\u002FFuseChat-Mixture)                                   | 9.5万   | Wan等                       | 2024年2月 | 精选自高质量数据集的样本。参见[FuseChat论文](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.16107)。   | 通用          |\n| [oasst1](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FOpenAssistant\u002Foasst1)                                                | 8.44万  | Köpf等                      | 2023年3月 | 以35种不同语言编写的、由人类生成的助手风格对话语料库。参见[OASST1论文](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.07327)和[oasst2](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FOpenAssistant\u002Foasst2)。 | 通用          |\n| [WizardLM_evol_instruct_70k](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fmlabonne\u002FWizardLM_evol_instruct_70k-ShareGPT) | 7万     | Xu等                        | 2023年4月 | 将Evol-Instruct应用于Alpaca和ShareGPT数据。参见[WizardLM论文](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.12244)。 | 通用          |\n| [airoboros-3.2](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fjondurbin\u002Fairoboros-3.2)                                      | 5.87万  | Jon Durbin                   | 2023年12月 | 高质量无审查数据集。                                                           | 通用          |\n| [ShareGPT_Vicuna_unfiltered](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fanon8231489123\u002FShareGPT_Vicuna_unfiltered)       | 5.3万   | anon823 1489123              | 2023年3月 | ShareGPT数据集的过滤版，包含用户与ChatGPT的真实对话。                         | 通用          |\n| [lmsys-chat-1m-smortmodelsonly](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FNebulous\u002Flmsys-chat-1m-smortmodelsonly)       | 4.58万  | Nebulous、Zheng等            | 2023年9月 | 对[lmsys-chat-1m](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Flmsys\u002Flmsys-chat-1m)进行过滤，仅保留GPT-4、GPT-3.5-turbo、Claude-2、Claude-1和Claude-instant-1的回答。 | 通用          |\n| [Open-Platypus](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fgarage-bAInd\u002FOpen-Platypus)                                   | 2.49万  | Lee等                       | 2023年9月 | 使用Sentence Transformers去重的数据集集合，其中包含NC数据集。参见[Platypus论文](https:\u002F\u002Farxiv.org\u002Fabs\u002F2308.07317)。 | 通用          |\n| [databricks-dolly-15k](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fdatabricks\u002Fdatabricks-dolly-15k)                       | 1.5万   | Conover等                   | 2023年5月 | 由Databricks员工生成的指令类数据，涵盖InstructGPT论文中提到的七类指令。         | 通用          |\n| [OpenMathInstruct-1](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fnvidia\u002FOpenMathInstruct-1)                   | 575万   | Toshniwal等                 | 2024年2月 | 来自GSM8K和MATH的数据集中的问题，由Mixtral-8x7B生成解答。                      | 数学          |\n| [MetaMathQA](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fmeta-math\u002FMetaMathQA)                                | 39.5万  | Yu等                       | 2023年12月 | 通过从多个角度改写问题来生成数学问题。参见[MetaMath论文](https:\u002F\u002Farxiv.org\u002Fabs\u002F2309.12284)。 | 数学          |\n| [MathInstruct](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FTIGER-Lab\u002FMathInstruct)                            | 26.2万  | Yue等                      | 2023年9月 | 汇总了13个数学推理数据集，其中6个是新整理的，重点在于思维链和程序性思维。      | 数学          |\n| [Orca-Math](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fmicrosoft\u002Forca-math-word-problems-200k)               | 20万    | Mitra等                    | 2024年2月 | 使用GPT4-Turbo生成的小学数学应用题。参见[Orca-Math论文](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.14830.pdf)。 | 数学          |\n| [CodeFeedback-Filtered-Instruction](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fm-a-p\u002FCodeFeedback-Filtered-Instruction)     | 15.7万  | Zheng等                    | 2024年2月   | Magicoder-OSS-Instruct、ShareGPT（Python）、Magicoder-Evol-Instruct以及Evol-Instruct-Code的过滤版。                                                                                                                                                                                                            | 编程          |\n| [Tested-143k-Python-Alpaca](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FVezora\u002FTested-143k-Python-Alpaca)                    | 14.3万  | Vezora          | 2024年3月   | 通过自动测试的高质量Python代码集合。                                                                                                                                                                                                                                     | 编程          |\n| [glaive-code-assistant](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fglaiveai\u002Fglaive-code-assistant)                          | 13.6万  | Glaive.ai       | 2023年9月   | 合成数据，包含问题和解答，约60%为Python样本。另请参阅[v2](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fglaiveai\u002Fglaive-code-assistant-v2)版本。                                                                                                                                                           | 编程          |\n| [Magicoder-Evol-Instruct-110K](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fise-uiuc\u002FMagicoder-Evol-Instruct-110K)            | 11万    | Wei等                      | 2023年11月   | 是[evol-codealpaca-v1](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Ftheblackcat102\u002Fevol-codealpaca-v1)的去污版本。去污方式与StarCoder相同（参考[bigcode去污流程](https:\u002F\u002Fgithub.com\u002Fbigcode-project\u002Fbigcode-dataset\u002Ftree\u002Fmain\u002Fdecontamination)）。参见[Magicoder论文](https:\u002F\u002Farxiv.org\u002Fabs\u002F2312.02120)。 | 编程          |\n| [dolphin-coder](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fcognitivecomputations\u002Fdolphin-coder)                             | 10.9万  | Eric Hartford   | 2023年11月   | 该数据集由[leetcode-rosetta](https:\u002F\u002Fwww.kaggle.com\u002Fdatasets\u002Ferichartford\u002Fleetcode-rosetta)转换而来。                                                                                                                                                                                                               | 编程          |\n| [synthetic_tex_to_sql](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fgretelai\u002Fsynthetic_text_to_sql)                           | 10万    | Gretel.ai       | 2024年4月   | 合成文本转SQL样本（约2300万token），覆盖多个领域。                                                                                                                                                                                                                                                     | 编程          |\n| [sql-create-context](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fb-mc2\u002Fsql-create-context)                                   | 7.86万  | b-mc2           | 2023年4月   | 清洗并扩充后的[WikiSQL](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fwikisql)和[Spider](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fspider)数据集。                                                                                                                                                                     | 编程          |\n| [Magicoder-OSS-Instruct-75K](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fise-uiuc\u002FMagicoder-OSS-Instruct-75K)                | 7.5万   | Wei等                      | 2023年11月   | 由`gpt-3.5-turbo-1106`生成的OSS-Instruct数据集。参见[Magicoder论文](https:\u002F\u002Farxiv.org\u002Fabs\u002F2312.02120)。                                                                                                                                                                                                           | 编程          |\n| [Code-Feedback](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fm-a-p\u002FCode-Feedback)                                             | 6.64万  | Zheng等                    | 2024年2月   | 多轮对话和文本与代码交替回复的多样化Code Interpreter类似数据集。参见[OpenCodeInterpreter论文](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.14658)。                                                                                                                                                 | 编程          |\n| [Open-Critic-GPT](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FVezora\u002FOpen-Critic-GPT)                                             | 5.51万  | Vezora    | 2024年7月   | 使用本地模型在多种编程语言中创建、引入并识别代码中的缺陷。                                                                                                                                                | 编程          |\n| [self-oss-instruct-sc2-exec-filter-50k](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fbigcode\u002Fself-oss-instruct-sc2-exec-filter-50k) | 5.07万  | Lozhkov等                  | 2024年4月   | 经过三步生成：从TheStack v1中获取种子函数，使用StarCoder2进行自我指令，最后进行自我验证。参见[博客文章](https:\u002F\u002Fhuggingface.co\u002Fblog\u002Fsc2-instruct)。                                                                                                                                           | 编程          |\n| [Bluemoon](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FSquish42\u002Fbluemoon-fandom-1-1-rp-cleaned)               | 29万    | Squish42                | 2023年6月 | 由第三方清理和爬取的Blue Moon角色扮演论坛帖子。                              | 对话与角色扮演 |\n| [PIPPA](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fkingbri\u002FPIPPA-shareGPT)                                   | 1.68万  | Gosling等、kingbri | 2023年8月 | Pygmalion的[PIPPA](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FPygmalionAI\u002FPIPPA)在ShareGPT格式下的去重版本。 | 对话与角色扮演 |\n| [Capybara](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FLDJnr\u002FCapybara)                                        | 1.6万   | LDJnr                   | 2023年12月 | 强调跨领域信息多样性，支持多轮对话。                                           | 对话与角色扮演 |\n| [RPGPT_PublicDomain-alpaca](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fpractical-dreamer\u002FRPGPT_PublicDomain-alpaca) | 4260    | practical dreamer       | 2023年5月 | 使用[build-a-dataset](https:\u002F\u002Fgithub.com\u002Fpractical-dreamer\u002Fbuild-a-dataset)制作的角色扮演公共领域人物对话合成数据集。 | 对话与角色扮演 |\n| [Pure-Dove](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FLDJnr\u002FPure-Dove)                                      | 3860    | LDJnr                   | 2023年9月 | GPT-4与真人之间高度过滤后的多轮对话。                                         | 对话与角色扮演 |\n| [Opus Samantha](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fmacadeliccc\u002Fopus_samantha)                        | 1850    | macadelicc              | 2024年4月 | 与Claude 3 Opus的多轮对话。                                                                  | 对话与角色扮演 |\n| [LimaRP-augmented](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fgrimulkan\u002FLimaRP-augmented)                    | 804     | lemonilia、grimulkan    | 2024年1月 | LimaRP的增强和清洗版本，包含人类角色扮演对话。                      | 对话与角色扮演 |\n| [glaive-function-calling-v2](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fglaiveai\u002Fglaive-function-calling-v2) | 11.3万  | Sahil Chaudhary | 2023年9月 | 高质量的指令与答案配对数据集，支持多种语言。 \u003Cbr>参见[Locutusque\u002Ffunction-calling-chatml](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FLocutusque\u002Ffunction-calling-chatml)，该版本不含对话标签。 | 代理与函数调用 |\n| [xlam-function-calling-60k](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FSalesforce\u002Fxlam-function-calling-60k) | 6万     | Salesforce      | 2024年6月 | 通过专为函数调用应用设计的数据生成流水线创建的样本。                         | 代理与函数调用 |\n| [Agent-FLAN](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Finternlm\u002FAgent-FLAN)                                 | 3.44万  | internlm        | 2024年3月 | AgentInstruct、ToolBench和ShareGPT数据集的混合。                                                                   | 代理与函数调用 |\n\n# 大语言模型对齐\n\n对齐是一个新兴的研究领域，旨在确保人工智能系统按照预期的方式运行。具体到大语言模型（LLM），对齐是指通过训练使模型生成的输出与人类价值观和目标保持一致的过程。\n\n目前有哪些大语言模型对齐的方法呢？\n\n研究文献中提出了许多对齐方法，但为了便于讨论，我们仅介绍三种主要方法：\n\n### 📌 RLHF：\n- 步骤1 & 2：训练一个大语言模型（包括基础模型的预训练以及对话模型的监督\u002F指令微调）。\n- 步骤3：RLHF使用一个辅助语言模型（通常比主模型小得多）来学习人类偏好。这可以通过偏好数据集实现——该数据集包含提示词及由专业标注者评分的回答或一组回答，这一过程称为“奖励模型”的构建。\n- 步骤4：采用强化学习算法（如PPO，即近端策略优化），其中大语言模型作为智能体，而奖励模型则根据其响应与“人类偏好评价”之间的匹配程度给予正面或负面奖励。\n理论上，流程就是如此简单。然而实际操作却并不容易，需要大量专家参与和计算资源支持。为了解决RLHF成本高昂的问题，研究人员开发了DPO。\n\n- RLHF参考：[RLHF：基于人类反馈的强化学习](https:\u002F\u002Fhuyenchip.com\u002F2023\u002F05\u002F02\u002Frlhf.html)\n\n### 📌 DPO：\n- 步骤1和步骤2保持不变。\n- 步骤4：DPO省去了奖励模型的训练环节（即步骤3）。它是如何做到的呢？DPO定义了一个额外的偏好损失函数，直接利用语言模型本身作为奖励模型。其核心思想是：既然已经训练了一个如此强大的大语言模型，为何不让它自己学会区分好与坏的回答，而不是再引入另一个模型呢？\n- 研究表明，DPO在计算效率上更高（因为无需持续监控奖励模型的行为），并且在多种场景下表现优于RLHF。\n- DPO相关博客：[通过直接偏好优化（DPO）对齐大语言模型——背景、概述、直觉及论文总结](https:\u002F\u002Fmedium.com\u002F@ManishChablani\u002Faligning-llms-with-direct-preference-optimization-dpo-background-overview-intuition-and-paper-0a72b9dc539c)\n\n### 📌 ORPO：\n- 这是三种方法中最新的，ORPO将步骤2、3和4合并为一步，因此所需的数据集是微调数据集与偏好数据集的结合。\n- 监督微调和对齐\u002F偏好优化在一个步骤中完成。这是因为微调虽然能使模型针对特定任务和领域进行专业化，但也可能增加模型产生不良响应的概率。\n- ORPO通过引入优势比（OR）项，将这些步骤整合到单一目标函数中，从而奖励偏好评价并惩罚被拒绝的评价。\n- ORPO相关博客：[ORPO性能超越SFT+DPO | 使用ORPO训练Phi-2](https:\u002F\u002Fmedium.com\u002F@zaiinn440\u002Forpo-outperforms-sft-dpo-train-phi-2-with-orpo-3ee6bf18dbf2)\n\n# 数据生成\n\n## 数据过滤\n\n| 数据集 | 描述 | 链接 |\n| -------- | ------ | :-----: |\n| 基于规则的过滤 | 根据黑名单词汇列表移除样本，例如拒绝回答或“作为一名AI助手”等内容 | [🔗](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fcognitivecomputations\u002FWizardLM_alpaca_evol_instruct_70k_unfiltered\u002Fblob\u002Fmain\u002Fwizardlm_clean.py) |\n| SemHash | 基于蒸馏模型快速生成嵌入向量的模糊去重工具。 | [🔗](https:\u002F\u002Fgithub.com\u002FMinishLab\u002Fsemhash) |\n\n## SFT数据集\n\n| 数据集 | 描述 | 链接 |\n| -------- | ------ | :-----: |\n| Distilabel | 通用框架，可利用UltraFeedback和DEITA等技术生成和增强数据（SFT、DPO）。 | [🔗](https:\u002F\u002Fgithub.com\u002Fargilla-io\u002Fdistilabel) |\n| Auto Data | 轻量级库，可通过API模型自动生成微调数据集。 | [🔗](https:\u002F\u002Fgithub.com\u002FItachi-Uchiha581\u002FAuto-Data) |\n| Bonito | 用于生成合成指令微调数据集的工具，无需GPT即可操作（也可参见[AutoBonito](https:\u002F\u002Fcolab.research.google.com\u002Fdrive\u002F1l9zh_VX0X4ylbzpGckCjH5yEflFsLW04?usp=sharing)）。 | [🔗](https:\u002F\u002Fgithub.com\u002FBatsResearch\u002Fbonito) |\n| Augmentoolkit | 利用开源和闭源模型将原始文本转换为数据集的框架。 | [🔗](https:\u002F\u002Fgithub.com\u002Fe-p-armstrong\u002Faugmentoolkit) |\n| Magpie | 通过提示对齐的大语言模型，高效生成高质量的合成数据流水线。 | [🔗](https:\u002F\u002Fgithub.com\u002Fmagpie-align\u002Fmagpie) |\n| Genstruct | 指令生成模型，旨在从原始数据中生成有效指令。 | [🔗](https:\u002F\u002Fhuggingface.co\u002FNousResearch\u002FGenstruct-7B) |\n| DataDreamer | 用于提示和合成数据生成的Python库。 | [🔗](https:\u002F\u002Fdatadreamer.dev\u002Fdocs\u002Flatest\u002F) |\n\n## 预训练数据集\n\n| 数据集 | 描述 | 链接 |\n| -------- | ------ | :-----: |\n| llm-swarm | 使用本地大语言模型或Hugging Face Hub上的推理端点生成用于预训练或微调的合成数据集。 | [🔗](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fllm-swarm) |\n| Cosmopedia | Hugging Face用于生成Cosmopedia数据集的代码。 | [🔗](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fcosmopedia) |\n| textbook_quality | 一个模仿微软Phi模型方法、用于生成教科书级别质量数据的仓库。 | [🔗](https:\u002F\u002Fgithub.com\u002FVikParuchuri\u002Ftextbook_quality) |\n\n## 数据探索\n\n| 数据集 | 描述 | 链接 |\n| -------- | ------ | :-----: |\n| sentence-transformers | 用于处理流行语言嵌入模型的Python模块。 | [🔗](https:\u002F\u002Fsbert.net\u002F) |\n| Lilac | 用于为大语言模型筛选优质数据的工具，已被NousResearch、Databricks、Cohere和Alignment Lab AI等机构使用，同时具备过滤功能。 | [🔗](https:\u002F\u002Fgithub.com\u002Flilacai\u002Flilac) |\n| Nomic Atlas | 可以交互式地探索指令数据，获取洞察并存储嵌入向量。 | [🔗](https:\u002F\u002Fgithub.com\u002Fnomic-ai\u002Fnomic) |\n| text-clustering | 轻松嵌入、聚类并对文本数据进行语义标注。 | [🔗](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Ftext-clustering) |\n\n## 数据抓取\n\n| 数据集 | 描述 | 链接 |\n| -------- | ------ | :-----: |\n| Trafilatura | 一款Python和命令行工具，用于在网络上收集文本和元数据。曾用于创建RefinedWeb数据集（参见[arXiv:2306.01116](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.01116)）。 | [🔗](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.01116) |\n| marker | 能够快速且高精度地将PDF转换为Markdown和JSON格式。 | [🔗](https:\u002F\u002Fgithub.com\u002FVikParuchuri\u002Fmarker) |\n\n## 了解大语言模型\n\n| 资源 | 链接 |\n| -------- | :-----: |\n| Brown, Tom B. “语言模型是少样本学习者。” arXiv预印本arXiv:2005.14165（2020年）。 | [🔗](https:\u002F\u002Frosanneliu.com\u002Fdlctfs\u002Fdlct_200724.pdf) |\n| Kambhampati, Subbarao, 等人。“大语言模型无法进行规划，但在LLM-modulo框架中可以帮助规划。” arXiv预印本arXiv:2402.01817（2024年）。 | [🔗](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.01817) |\n\n# 我正在学习的内容\n\n在过去将近一个月的时间里，我沉浸于最近的生成式AI文本语言模型热潮中，针对我在特定任务上的表现，得出了一些观察。\n\n请注意，这些观察是主观的，仅基于我个人的经验，您的结论可能会有所不同。\n\n- 为了获得最佳的自然语言理解性能，我们至少需要70亿参数级别的模型（\u003C7B）。参数量更少的模型会导致性能显著下降。然而，使用超过70亿参数的模型则需要配备24GB以上显存的GPU（>24GB）。\n- 基准测试可能比较棘手，因为不同的LLM在不同任务上的表现各有优劣。找到最适合您具体用例的模型至关重要。根据我的经验，MPT-7B仍然比Falcon-7B更胜一筹。\n- 每次模型迭代时，提示词都会发生变化。因此，需要多次调整以适应这些变化。虽然有一些潜在的解决方案，但其有效性仍在评估中。\n- 进行微调时，至少需要一块24GB以上的GPU（>24GB）。建议使用32GB或40GB显存的GPU。\n- 仅对最后几层进行微调以加快LLM的训练或微调速度，可能无法达到满意的效果。我曾尝试过这种方法，但效果并不理想。\n- 加载8位或4位模型可以节省显存。对于一个70亿参数的模型，原本需要16GB显存，现在分别只需约10GB或不到6GB。然而，这种显存占用的减少是以推理速度显著降低为代价的。同时，也可能导致文本理解任务的性能下降。\n- 那些正在为公司探索LLM应用的人士，应当注意许可方面的考量。以其他模型作为参考并要求原始权重来训练模型，在商业环境中并不推荐。\n- LLM主要有三种类型：基础型（如GPT-2\u002F3）、具备聊天功能的以及具备指令执行能力的。大多数情况下，基础模型本身并不具备直接可用性，需要进行微调。聊天版本通常是最佳选择，但它们往往不是开源的。\n- 并非所有问题都需要用LLM来解决。不要强行围绕LLM寻找解决方案。就像过去深度强化学习的情况一样，找到最合适的方法才是关键。\n- 我尝试过但并未使用langchains和向量数据库。对我来说，它们并不是必需的。简单的Python脚本、嵌入技术和高效的点积运算已经足够满足需求。\n- LLM并不需要拥有完整的世界知识。人类也并非无所不知，但我们能够灵活应变。LLM只需要知道如何利用现有的知识即可。或许可以通过分离知识组件来构建更小规模的模型。\n- 下一波创新可能会是在回答之前先模拟“思考”，而不是简单地逐字预测下一个词。这种方法有望带来重大突破。\n- LLM的过度参数化带来了一个重要挑战：它们往往会记住大量的训练数据。这在RAG场景中尤为突出，当上下文与这些“隐含”知识相冲突时。而当上下文本身包含相互矛盾的信息时，情况会更加复杂。一篇最新的综述论文全面分析了LLM中的“知识冲突”，将其分为三类：\n    - 上下文-记忆冲突：当外部上下文与LLM的内部知识相矛盾时产生。\n        - 解决方案：\n            - 在反事实上下文中进行微调，以优先考虑外部信息。\n            - 使用专门的提示词来强化对上下文的遵循。\n            - 应用解码技术，放大上下文的概率。\n            - 在跨文档的多样化上下文中进行预训练。\n    \n    - 上下文间冲突：多个外部来源之间的矛盾。\n        - 解决方案：\n            - 使用专门的模型来检测矛盾。\n            - 结合外部工具的事实核查框架。\n            - 微调判别器以识别可靠来源。\n            - 从增强查询中聚合高置信度的答案。\n    \n    - 内部记忆冲突：由于内部知识相互矛盾，LLM对相似输入给出不一致的输出。\n        - 解决方案：\n            - 使用一致性损失函数进行微调。\n            - 实施插件方法，重新训练词汇定义。\n            - 将一个模型的输出与另一个模型的一致性评分相结合。\n            - 应用对比解码，专注于真实的层或头。\n- PPO与DPO的区别在于：在DPO中，不再需要训练奖励模型，只需提供好的和坏的数据即可！\n- ORPO：“一种简单而创新的无参考模型单体似然比偏好优化算法ORPO，消除了对额外偏好对齐阶段的需求。” [Hong, Lee, Thorne (2024)](https:\u002F\u002Farxiv.org\u002Fabs\u002F2403.07691)\n- KTO：“KTO不需要偏好——只需要一个二元信号，表明对于给定输入，某个输出是可取的还是不可取的。这使得它在现实世界中更容易使用，因为在现实中偏好数据既稀缺又昂贵。” [Ethayarajh et al (2024)](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.01306)\n\n# 贡献\n欢迎贡献！如果您想为这个项目做出贡献，请随时提出问题或提交拉取请求。\n\n# 许可证\n本项目采用[MIT许可证](https:\u002F\u002Fgithub.com\u002Fghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\u002Fblob\u002Fmain\u002FLICENSE)。\n\n# 关于作者\n[Sunil Ghimire](https:\u002F\u002Fsunilghimire.com.np\u002F)是一位热衷于文学的NLP工程师。他认为，文字和数据是改变世界的两种最强大的工具。\n\n---\n\u003Cp align=\"center\">\n  \u003Ca href=\"https:\u002F\u002Fstar-history.com\u002F#ghimiresunil\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing&Date\">\n    \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fghimiresunil_LLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing_readme_58147ad582c7.png\" alt=\"Star History Chart\">\n  \u003C\u002Fa>\n\u003C\u002Fp>\n\n由[Sunil Ghimire](https:\u002F\u002Fsunilghimire.com.np\u002F)用心制作","# LLM PowerHouse 快速上手指南\n\nLLM PowerHouse 是一个精选的大型语言模型（LLM）资源库，旨在通过教程、最佳实践和即用代码，帮助开发者掌握 LLM 的自定义训练与推理。本指南将协助你快速搭建环境并开始探索。\n\n## 环境准备\n\n在开始之前，请确保你的开发环境满足以下要求：\n\n*   **操作系统**: Linux (推荐 Ubuntu 20.04+), macOS, 或 Windows (需 WSL2)。\n*   **Python 版本**: Python 3.8 或更高版本 (推荐 3.10+)。\n*   **硬件要求**:\n    *   **基础学习**: 任意 CPU 即可运行基础数学和 NLP 概念演示。\n    *   **模型训练\u002F推理**: 建议配备 NVIDIA GPU (显存 8GB 以上)，并安装对应的 CUDA 驱动。\n*   **前置依赖**:\n    *   `git`: 用于克隆仓库。\n    *   `pip` 或 `conda`: 用于管理 Python 包。\n    *   (可选) **国内加速**: 推荐使用清华源或阿里源加速 Python 包下载。\n\n## 安装步骤\n\n### 1. 克隆仓库\n首先，将项目代码克隆到本地：\n\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002FSunilGhimire12\u002FLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing.git\ncd LLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing\n```\n\n### 2. 创建虚拟环境\n建议使用 `conda` 或 `venv` 创建独立的虚拟环境以避免依赖冲突。\n\n**使用 Conda:**\n```bash\nconda create -n llm-powerhouse python=3.10\nconda activate llm-powerhouse\n```\n\n**使用 venv:**\n```bash\npython -m venv llm-powerhouse\nsource llm-powerhouse\u002Fbin\u002Factivate  # Windows: llm-powerhouse\\Scripts\\activate\n```\n\n### 3. 安装核心依赖\n该项目主要作为资源索引和代码示例集合。根据你想运行的具体示例（位于 `example_codebase` 目录），可能需要安装不同的库。以下是通用的基础数据科学与深度学习栈安装命令：\n\n**使用国内镜像源加速安装 (推荐):**\n```bash\npip install -r requirements.txt -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n```\n\n*注意：如果根目录下没有 `requirements.txt`，请进入具体的示例文件夹安装。例如，对于基于 PyTorch 的神经网络示例：*\n\n```bash\ncd example_codebase\u002Fneural_networks\npip install torch torchvision torchaudio numpy pandas matplotlib scikit-learn -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n```\n\n## 基本使用\n\n本项目核心在于“按目标学习”。你可以直接运行示例代码或阅读文章来开始。\n\n### 示例：运行一个简单的多层感知机 (MLP)\n根据仓库中的 \"Foundations of LLMs\" -> \"Neural Networks\" 部分，我们可以运行一个基础的 MLP 实现。\n\n1.  **定位代码**:\n    进入示例代码目录（假设路径如下，具体请以仓库实际结构为准）：\n    ```bash\n    cd example_codebase\u002Fneural_networks\n    ```\n\n2.  **执行脚本**:\n    运行提供的 Python 脚本来体验神经网络的训练过程：\n    ```bash\n    python mlp_implementation.py\n    ```\n\n3.  **预期输出**:\n    脚本将初始化网络结构，执行前向传播和反向传播，并输出训练损失的变化情况，帮助你直观理解 `Backpropagation` 和 `Optimization`。\n\n### 探索更多资源\n*   **学习基础理论**: 查看 `Articles\u002FFoundations` 目录下的数学与 NLP 笔记。\n*   **数据集探索**: 访问 `dataset` 目录获取推荐的 LLM 训练数据集列表。\n*   **生产级应用**: 参考 `Articles\u002FBuilding Production-Ready LLM Applications` 学习 RAG 架构与安全部署。\n\n现在，你已经准备好深入探索 LLM PowerHouse 的丰富内容了！","某金融科技公司的算法团队急需构建一个能精准解读复杂信贷条款并生成合规报告的垂直领域大模型，但团队缺乏从数据准备到模型微调的全链路实战经验。\n\n### 没有 LLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing 时\n- **基础理论缺失**：团队成员对 Transformer 架构背后的数学原理理解模糊，导致在调整超参数时只能盲目试错，浪费大量算力资源。\n- **数据工程混乱**：缺乏高质量的金融领域指令数据集构建指南，清洗后的数据格式不统一，严重影响了模型微调的收敛效果。\n- **部署落地困难**：在将模型推向生产环境时，面对量化压缩、推理加速和安全对齐等难题束手无策，项目迟迟无法上线。\n- **代码复用率低**：网上教程碎片化严重，找不到经过验证的端到端训练代码，开发人员需重复造轮子，开发周期被大幅拉长。\n\n### 使用 LLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing 后\n- **夯实理论根基**：通过\"LLM 基础”模块中系统的数学与神经网络教程，团队快速掌握了核心原理，能够科学地制定训练策略。\n- **数据质量飞跃**：参考\"LLM 数据集”和“数据生成”章节的最佳实践，构建了标准化的金融指令数据集，显著提升了模型的专业度。\n- **生产级交付**：利用“构建生产就绪应用”指南中的 RAG 架构、模型压缩及安全对齐方案，顺利解决了部署瓶颈，实现了低延迟上线。\n- **效率大幅提升**：直接复用\"Codebase Mastery\"中经过验证的完整代码库，将原本需要数周的开发工作缩短至几天，专注于业务逻辑优化。\n\nLLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing 通过提供从理论基础到生产落地的全栈式指南与代码，帮助团队跨越了技术鸿沟，实现了垂直大模型的高效定制与商业化交付。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fghimiresunil_LLM-PowerHouse-A-Curated-Guide-for-Large-Language-Models-with-Custom-Training-and-Inferencing_f692e15d.png","ghimiresunil","Sunil Ghimire","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fghimiresunil_74c12fd4.jpg","Train Machine Learning models for Humans","FuseMachines","Kathmandu, Nepal","info@sunilghimire.com.np",null,"sunilghimire.com.np","https:\u002F\u002Fgithub.com\u002Fghimiresunil",[83,87,91,95],{"name":84,"color":85,"percentage":86},"Jupyter Notebook","#DA5B0B",99,{"name":88,"color":89,"percentage":90},"Python","#3572A5",1,{"name":92,"color":93,"percentage":94},"Shell","#89e051",0,{"name":96,"color":97,"percentage":94},"Dockerfile","#384d54",727,121,"2026-04-10T00:37:19","MIT","","未说明",{"notes":105,"python":103,"dependencies":106},"该 README 内容主要是一个关于大语言模型（LLM）的学习指南、教程目录和资源索引，涵盖了数学基础、Python 编程、神经网络原理及 NLP 基础知识。文中并未提供具体的可运行代码库的安装说明、环境配置需求或依赖列表。虽然提到了使用 PyTorch 实现 MLP 作为学习示例，但整个项目本身是一个文档集合而非单一的可安装软件工具，因此无法从中提取具体的操作系统、GPU、内存或 Python 版本等运行环境需求。",[],[15,35,14],[109,110,111,112,113,114,115,116,117],"bert","huggingface","large-language-models","llm-inference","llm-training","open-source","open-source-llm","transformers","llm-tutorials","2026-03-27T02:49:30.150509","2026-04-11T10:04:15.132467",[],[]]