[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-Tebmer--Awesome-Knowledge-Distillation-of-LLMs":3,"tool-Tebmer--Awesome-Knowledge-Distillation-of-LLMs":64},[4,17,27,35,44,52],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":16},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,3,"2026-04-05T11:01:52",[13,14,15],"开发框架","图像","Agent","ready",{"id":18,"name":19,"github_repo":20,"description_zh":21,"stars":22,"difficulty_score":23,"last_commit_at":24,"category_tags":25,"status":16},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",140436,2,"2026-04-05T23:32:43",[13,15,26],"语言模型",{"id":28,"name":29,"github_repo":30,"description_zh":31,"stars":32,"difficulty_score":23,"last_commit_at":33,"category_tags":34,"status":16},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",107662,"2026-04-03T11:11:01",[13,14,15],{"id":36,"name":37,"github_repo":38,"description_zh":39,"stars":40,"difficulty_score":10,"last_commit_at":41,"category_tags":42,"status":16},4292,"Deep-Live-Cam","hacksider\u002FDeep-Live-Cam","Deep-Live-Cam 是一款专注于实时换脸与视频生成的开源工具，用户仅需一张静态照片，即可通过“一键操作”实现摄像头画面的即时变脸或制作深度伪造视频。它有效解决了传统换脸技术流程繁琐、对硬件配置要求极高以及难以实时预览的痛点，让高质量的数字内容创作变得触手可及。\n\n这款工具不仅适合开发者和技术研究人员探索算法边界，更因其极简的操作逻辑（仅需三步：选脸、选摄像头、启动），广泛适用于普通用户、内容创作者、设计师及直播主播。无论是为了动画角色定制、服装展示模特替换，还是制作趣味短视频和直播互动，Deep-Live-Cam 都能提供流畅的支持。\n\n其核心技术亮点在于强大的实时处理能力，支持口型遮罩（Mouth Mask）以保留使用者原始的嘴部动作，确保表情自然精准；同时具备“人脸映射”功能，可同时对画面中的多个主体应用不同面孔。此外，项目内置了严格的内容安全过滤机制，自动拦截涉及裸露、暴力等不当素材，并倡导用户在获得授权及明确标注的前提下合规使用，体现了技术发展与伦理责任的平衡。",88924,"2026-04-06T03:28:53",[13,14,15,43],"视频",{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":23,"last_commit_at":50,"category_tags":51,"status":16},3704,"NextChat","ChatGPTNextWeb\u002FNextChat","NextChat 是一款轻量且极速的 AI 助手，旨在为用户提供流畅、跨平台的大模型交互体验。它完美解决了用户在多设备间切换时难以保持对话连续性，以及面对众多 AI 模型不知如何统一管理的痛点。无论是日常办公、学习辅助还是创意激发，NextChat 都能让用户随时随地通过网页、iOS、Android、Windows、MacOS 或 Linux 端无缝接入智能服务。\n\n这款工具非常适合普通用户、学生、职场人士以及需要私有化部署的企业团队使用。对于开发者而言，它也提供了便捷的自托管方案，支持一键部署到 Vercel 或 Zeabur 等平台。\n\nNextChat 的核心亮点在于其广泛的模型兼容性，原生支持 Claude、DeepSeek、GPT-4 及 Gemini Pro 等主流大模型，让用户在一个界面即可自由切换不同 AI 能力。此外，它还率先支持 MCP（Model Context Protocol）协议，增强了上下文处理能力。针对企业用户，NextChat 提供专业版解决方案，具备品牌定制、细粒度权限控制、内部知识库整合及安全审计等功能，满足公司对数据隐私和个性化管理的高标准要求。",87618,"2026-04-05T07:20:52",[13,26],{"id":53,"name":54,"github_repo":55,"description_zh":56,"stars":57,"difficulty_score":23,"last_commit_at":58,"category_tags":59,"status":16},2268,"ML-For-Beginners","microsoft\u002FML-For-Beginners","ML-For-Beginners 是由微软推出的一套系统化机器学习入门课程，旨在帮助零基础用户轻松掌握经典机器学习知识。这套课程将学习路径规划为 12 周，包含 26 节精炼课程和 52 道配套测验，内容涵盖从基础概念到实际应用的完整流程，有效解决了初学者面对庞大知识体系时无从下手、缺乏结构化指导的痛点。\n\n无论是希望转型的开发者、需要补充算法背景的研究人员，还是对人工智能充满好奇的普通爱好者，都能从中受益。课程不仅提供了清晰的理论讲解，还强调动手实践，让用户在循序渐进中建立扎实的技能基础。其独特的亮点在于强大的多语言支持，通过自动化机制提供了包括简体中文在内的 50 多种语言版本，极大地降低了全球不同背景用户的学习门槛。此外，项目采用开源协作模式，社区活跃且内容持续更新，确保学习者能获取前沿且准确的技术资讯。如果你正寻找一条清晰、友好且专业的机器学习入门之路，ML-For-Beginners 将是理想的起点。",84991,"2026-04-05T10:45:23",[14,60,43,61,15,62,26,13,63],"数据工具","插件","其他","音频",{"id":65,"github_repo":66,"name":67,"description_en":68,"description_zh":69,"ai_summary_zh":69,"readme_en":70,"readme_zh":71,"quickstart_zh":72,"use_case_zh":73,"hero_image_url":74,"owner_login":75,"owner_name":76,"owner_avatar_url":77,"owner_bio":78,"owner_company":79,"owner_location":79,"owner_email":79,"owner_twitter":80,"owner_website":79,"owner_url":81,"languages":79,"stars":82,"forks":83,"last_commit_at":84,"license":79,"difficulty_score":85,"env_os":86,"env_gpu":87,"env_ram":87,"env_deps":88,"category_tags":91,"github_topics":92,"view_count":23,"oss_zip_url":79,"oss_zip_packed_at":79,"status":16,"created_at":108,"updated_at":109,"faqs":110,"releases":111},4225,"Tebmer\u002FAwesome-Knowledge-Distillation-of-LLMs","Awesome-Knowledge-Distillation-of-LLMs","This repository collects papers for \"A Survey on Knowledge Distillation of Large Language Models\". We break down KD into Knowledge Elicitation and Distillation Algorithms, and explore the Skill & Vertical Distillation of LLMs.","Awesome-Knowledge-Distillation-of-LLMs 是一个专注于大语言模型（LLM）知识蒸馏技术的开源论文合集。它旨在解决如何将 GPT-4 等闭源强大模型的能力高效迁移至 LLaMA、Mistral 等开源小模型，以及如何利用开源模型自我压缩与提升的难题。通过系统梳理“知识提取”与“蒸馏算法”，该资源帮助开发者在降低计算成本的同时，让小型模型获得接近大型模型的语境理解、伦理对齐及深度语义洞察能力。\n\n该项目特别适合 AI 研究人员、算法工程师及大模型开发者使用。无论是希望训练轻量级垂直领域模型，还是探索数据增强与知识蒸馏结合的前沿学者，都能从中获益。其核心亮点在于构建了清晰的三维分类体系：从底层算法机制、特定技能迁移到垂直行业应用，全方位覆盖了当前最新的研究成果。此外，项目团队每周持续更新论文列表，并配套发布了详细的综述文章，为社区提供了极具价值的技术导航。如果你正致力于优化模型效率或挖掘大模型潜力，这份动态更新的知识库将是不可或缺的参考指南。","# Awesome Knowledge Distillation of LLM Papers\n[![Awesome](https:\u002F\u002Fawesome.re\u002Fbadge.svg)]() \n![](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FPRs-Welcome-red) \n[![PDF](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FPDF-2402.13116-green)](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.13116)\n\n\n\n\u003C!-- Big font size -->\n\u003Ch2 align=\"center\">\nA Survey on Knowledge Distillation of Large Language Models\n\u003C\u002Fh2> \n\n\n\u003Cp align=\"center\">\n  Xiaohan Xu\u003Csup>1\u003C\u002Fsup>&nbsp&nbsp\n    Ming Li\u003Csup>2\u003C\u002Fsup>&nbsp&nbsp\n    Chongyang Tao\u003Csup>3\u003C\u002Fsup>&nbsp&nbsp\n    Tao Shen\u003Csup>4\u003C\u002Fsup>&nbsp&nbsp\n    Reynold Cheng\u003Csup>1\u003C\u002Fsup>&nbsp&nbsp\n    Jinyang Li\u003Csup>1\u003C\u002Fsup>&nbsp&nbsp\n    Can Xu\u003Csup>5\u003C\u002Fsup>&nbsp&nbsp\n    Dacheng Tao\u003Csup>6\u003C\u002Fsup>&nbsp&nbsp\n    Tianyi Zhou\u003Csup>2\u003C\u002Fsup>&nbsp&nbsp\n\u003C\u002Fp>  \n\n\n\u003Cp align=\"center\">\n\u003Csup>1\u003C\u002Fsup> The University of Hong Kong &nbsp&nbsp\n\u003Csup>2\u003C\u002Fsup> University of Maryland &nbsp&nbsp\n\u003Csup>3\u003C\u002Fsup> Microsoft &nbsp&nbsp\n\u003Csup>4\u003C\u002Fsup> University of Technology Sydney &nbsp&nbsp\n\u003Csup>5\u003C\u002Fsup> Peking University &nbsp&nbsp\n\u003Csup>6\u003C\u002Fsup> The University of Sydney\n\u003C\u002Fp>\n\u003Cdiv align=\"center\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FTebmer_Awesome-Knowledge-Distillation-of-LLMs_readme_1ea1cfe79e30.png\" width=\"700\">\u003Cbr>\n\u003C\u002Fdiv>\n\u003Cbr>\n\n*A collection of papers related to knowledge distillation of large language models (LLMs). \nIf you want to use LLMs for benefitting your own smaller models training, or use self-generated knowledge to achieve the self-improvement, just take a look at this collection.*\n\n***We will update this collection every week. Welcome to star ⭐️ this repo to keep track of the updates.***\n\n> ❗️Legal Consideration: It's crucial to note the legal implications of utilizing LLM outputs, such as those from ChatGPT ([Restrictions](https:\u002F\u002Fopenai.com\u002Fpolicies\u002Fbusiness-terms)), Llama ([License](https:\u002F\u002Fllama.meta.com\u002Fllama-downloads\u002F)), etc. We strongly advise users to adhere to the terms of use specified by the model providers, such as the restrictions on developing competitive products, and so on.\n\n\n## 💡 News\n- **2024-2-20**: 📃 We released a survey paper \"**[A Survey on Knowledge Distillation of Large Language Models](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.13116)**\". Welcome to read and cite it. We are looking forward to your feedback and suggestions.\n\n- Update Log\n  - **2024-3-19**: Add 14 papers.\n\n\n## Contributing to This Collection\n\nFeel free to **open an issue\u002FPR** or e-mail [shawnxxh@gmail.com](mailto:shawnxxh@gmail.com), [minglii@umd.edu](mailto:minglii@umd.edu), [hishentao@gmail.com](mailto:hishentao@gmail.com) and [chongyangtao@gmail.com](mailto:chongyangtao@gmail.com) if you find any missing taxonomies or papers. We will keep updating this collection and survey.\n\n## 📝 Introduction\n**KD of LLMs**: This survey delves into knowledge distillation (KD) techniques in Large Language Models (LLMs), highlighting KD's crucial role in transferring advanced capabilities from proprietary LLMs like GPT-4 to open-source counterparts such as LLaMA and Mistral. We also explore how KD enables the compression and self-improvement of open-source LLMs by using them as teachers. \n\n**KD and Data Augmentation**: Crucially, the survey navigates the intricate interplay between data augmentation (DA) and KD, illustrating how DA emerges as a powerful paradigm within the KD framework to bolster LLMs' performance. By leveraging DA to generate context-rich, skill-specific training data, KD transcends traditional boundaries, enabling open-source models to approximate the contextual adeptness, ethical alignment, and deep semantic insights characteristic of their proprietary counterparts. \n\n**Taxonomy**: Our analysis is meticulously structured around three foundational pillars: **[algorithm](#kd-algorithms)**, **[skill](#skill-distillation)**, and **[verticalization](#verticalization-distillation)** -- providing a comprehensive examination of KD mechanisms, the enhancement of specific cognitive abilities, and their practical implications across diverse fields. \n\n\n**[KD Algorithms](#kd-algorithms)**:  For KD algorithms, we categorize it into two principal steps: \"Knowledge Elicitation\" focusing on eliciting knowledge from teacher LLMs, and \"Distillation Algorithms\" centered on injecting this knowledge into student models. \n\n\u003Cdiv align=\"center\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FTebmer_Awesome-Knowledge-Distillation-of-LLMs_readme_7c427a1aa84d.png\" width=\"600\">\u003Cbr>\n  \u003Cem>Figure: An illustration of different knowledge elicitation methods from teacher LLMs.\u003C\u002Fem>\n\u003C\u002Fdiv>\n\u003Cbr>\n\n\n**[Skill Distillation](#skill-distillation)**:  We delve into the enhancement of specific cognitive abilities, such as context following, alignment, agent, NLP task specialization, and multi-modality.\n\n**[Verticalization Distillation](#verticalization-distillation)**: We explore the practical implications of KD across diverse fields, including law, medical & healthcare, finance, science, and miscellaneous domains.\n\n\n> Note that both [Skill Distillation]((#skill-distillation)) and [Verticalization Distillation](#verticalization-distillation) employ Knowledge Elicitation and Distillation Algorithms in [KD Algorithms](#kd-algorithms) to achieve their KD. Thus, there are overlaps between them. However, this could also provide different perspectives for the papers.\n\n\n## Why KD of LLMs?\nIn the era of LLMs, KD of LLMs plays the following crucial roles:\n\n\u003Cdiv align=\"center\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FTebmer_Awesome-Knowledge-Distillation-of-LLMs_readme_a494f9aaacda.png\" width=\"400\">\u003Cbr>\n\u003C\u002Fdiv>\n\u003Cbr>\n\n|  Role  |   Description  |   Trend   |\n|:--------|:--------:|:--------:|\n| **① Advancing SLMs** | Transferring advanced capabilities from proprietary LLMs to smaller SLMs, such as open source LLMs or other smaller models. | Most common |\n| **② Compression** | Compressing LLMs to make them more efficient and practical.  |  More popular with the prosperity of open-source LLMs |\n| **③ Self-Improvement** | Refining open-source LLMs' performance by leveraging their own knowledge, i.e. self-knowledge. | New trend to make open-source LLMs more competitive |\n\n\n\n\n## 📒 Table of Contents\n- [KD Algorithms](#kd-algorithms)\n    - [Knowledge Elicitation](#knowledge-elicitation)\n        - [Labeling](#labeling)\n        - [Expansion](#expansion)\n        - [Curation](#curation)\n        - [Feature](#feature)\n        - [Feedback](#feedback)\n        - [Self-Knowledge](#self-knowledge)\n    - [Distillation Algorithms](#distillation-algorithms)\n        - [Supervised Fine-Tuning](#supervised-fine-tuning)\n        - [Divergence and Similarity](#divergence-and-similarity)\n        - [Reinforcement Learning](#reinforcement-learning)\n        - [Rank Optimization](#rank-optimization)\n- [Skill Distillation](#skill-distillation)\n    - [Context Following](#context-following)\n        - [Instruction Following](#instruction-following)\n        - [Multi-turn Dialogue](#multi-turn-dialogue)\n        - [RAG Capability](#rag-capability)\n    - [Alignment](#alignment)\n        - [Thinking Pattern](#thinking-pattern)\n        - [Preference](#preference)\n        - [Value](#value)\n    - [Agent](#agent)\n        - [Tool Using](#tool-using)\n        - [Planning](#planning)\n    - [NLP Task Specialization](#nlp-task-specialization)\n        - [NLU](#nlu)\n        - [NLG](#nlg)\n        - [Information Retrieval](#information-retrieval)\n        - [Recommendation](#recommendation)\n        - [Text Generation Evaluation](#text-generation-evaluation)\n        - [Code](#code)\n    - [Multi-Modality](#multi-modality)\n    - [Summary Table](#summary-table)\n- [Verticalization Distillation](#verticalization-distillation)\n    - [Law](#law)\n    - [Medical & Healthcare](#medical--healthcare)\n    - [Finance](#finance)\n    - [Science](#science)\n    - [Misc.](#misc)\n\n- [Encoder-based KD](#encoder-based-kd)\n\n- [Citation](#citation)\n\n\n\n## KD Algorithms\n### Knowledge Elicitation\n#### Labeling\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Evidence-Focused Fact Summarization for Knowledge-Augmented Zero-Shot Question Answering**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2403.02966.pdf) | arXiv | 2024-03 |\n| [**Aligning Large and Small Language Models via Chain-of-Thought Reasoning**](https:\u002F\u002Faclanthology.org\u002F2024.eacl-long.109.pdf) | EACL | 2024-03 | [Github](https:\u002F\u002Fgithub.com\u002Flranaldii\u002FAligning_LLMs) |\n| [**Divide-or-Conquer? Which Part Should You Distill Your LLM?**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.15000.pdf) | arXiv | 2024-02 |\n| [**Miko: Multimodal Intention Knowledge Distillation from Large Language Models for Social-Media Commonsense Discovery**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.18169.pdf) | arXiv | 2024-02 |\n| [**KnowTuning: Knowledge-aware Fine-tuning for Large Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.11176.pdf) | arXiv | 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002Fyouganglyu\u002FKnowTuning) |\n| [**TinyLLM: Learning a Small Student from Multiple Large Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.04616.pdf) | arXiv | 2024-02 | \n| [**Mixed Distillation Helps Smaller Language Model Better Reasoning**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2312.10730.pdf) | arXiv | 2023-12 |\n| [**Tailoring Self-Rationalizers with Multi-Reward Distillation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.02805.pdf) | arXiv | 2023-11 | [Github](https:\u002F\u002Finklab.usc.edu\u002FMaRio\u002F)| [Data](https:\u002F\u002Finklab.usc.edu\u002FMaRio\u002F)|\n| [**Orca 2: Teaching Small Language Models How to Reason**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.11045.pdf) | arXiv | 2023-11 |\n| [**Mammoth: Building Math Generalist Models through Hybrid Instruction Tuning**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2309.05653) | arXiv | 2023-09 | [Github](https:\u002F\u002Ftiger-ai-lab.github.io\u002FMAmmoTH\u002F)| [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FTIGER-Lab\u002FMathInstruct)| \n| [**PandaLM: An Automatic Evaluation Benchmark for LLM Instruction Tuning Optimization**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.05087) | arXiv | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002FWeOpenML\u002FPandaLM)| [Data](https:\u002F\u002Fgithub.com\u002FWeOpenML\u002FPandaLM)|\n| [**Symbolic Chain-of-Thought Distillation: Small Models Can Also \"Think\" Step-by-Step**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.14050) | ACL | 2023-06 | \n| [**Orca: Progressive Learning from Complex Explanation Traces of GPT-4**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.02707) | arXiv | 2023-06 | \n| [**Distilling Step-by-Step! Outperforming Larger Language Models with Less Training Data and Smaller Model Sizes**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.02301) | ACL | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002Fgoogle-research\u002Fdistilling-step-by-step)| [Data](https:\u002F\u002Fgithub.com\u002Fgoogle-research\u002Fdistilling-step-by-step)|\n| [**Impossible Distillation: from Low-Quality Model to High-Quality Dataset & Model for Summarization and Paraphrasing**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.16635) | arXiv | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002Fjaehunjung1\u002Fimpossible-distillation)|\n| [**Baize: An Open-Source Chat Model with Parameter-Efficient Tuning on Self-Chat Data**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.01196) | EMNLP | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fproject-baize\u002Fbaize-chatbot)| [Data](https:\u002F\u002Fgithub.com\u002Fproject-baize\u002Fbaize-chatbot\u002Ftree\u002Fmain\u002Fdata)|\n| [**ChatGPT outperforms crowd workers for text-annotation tasks**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2303.15056.pdf) | arXiv | 2023-03 | \n| [**Annollm: Making large language models to be better crowdsourced annotators**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2303.16854) | arXiv | 2023-03 |\n| [**GPT-4All: Training an Assistant-Style Chatbot with Large Scale Data Distillation from GPT-3.5-Turbo**](https:\u002F\u002Fs3.amazonaws.com\u002Fstatic.nomic.ai\u002Fgpt4all\u002F2023_GPT4All_Technical_Report.pdf) | - | 2023-03 | [Github](https:\u002F\u002Fgithub.com\u002Fnomic-ai\u002Fgpt4all)|\n| [**Specializing Smaller Language Models towards Multi-Step Reasoning**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2301.12726) | arXiv | 2023-01 |\n|  [**Is GPT-3 a Good Data Annotator?**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2212.10450.pdf) | ACL | 2022-12 | [Github](https:\u002F\u002Fgithub.com\u002FDAMO-NLP-SG\u002FLLM-Data-Annotator)|\n| [**Large Language Models Are Reasoning Teachers**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2212.10071) | ACL | 2022-12 | [Github](https:\u002F\u002Fgithub.com\u002Fitsnamgyu\u002Freasoning-teacher)| [Data](https:\u002F\u002Fgithub.com\u002Fitsnamgyu\u002Freasoning-teacher)|\n| [**Teaching Small Language Models to Reason**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2212.08410) | ACL | 2022-12 |\n| [**Explanations from Large Language Models Make Small Reasoners Better**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2210.06726) | arXiv | 2022-10 | \n| [**Want To Reduce Labeling Cost? GPT-3 Can Help**](https:\u002F\u002Faclanthology.org\u002F2021.findings-emnlp.354) | Findings of EMNLP | 2021-08 |\n\n\n\n#### Expansion\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Instruction Fusion: Advancing Prompt Evolution through Hybridization**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2312.15692.pdf) | arXiv | 2023-12 | \n| [**An Empirical Study of Instruction-tuning Large Language Models in Chinese**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.07328.pdf) | EMNLP | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002FPhoebusSi\u002FAlpaca-CoT)| [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FQingyiSi\u002FAlpaca-CoT)|\n| [**PromptMix: A Class Boundary Augmentation Method for Large Language Model Distillation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.14192.pdf) | EMNLP | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002FServiceNow\u002FPromptMix-EMNLP-2023) |\n| [**Wizardmath: Empowering mathematical reasoning for large language models via reinforced evol-instruct**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2308.09583) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Fnlpxucan\u002FWizardLM)|\n| [**Code Llama: Open Foundation Models for Code**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.12950.pdf) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fcodellama)| \n| [**WizardCoder: Empowering Code Large Language Models with Evol-Instruct**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.08568) | ICLR | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002Fnlpxucan\u002FWizardLM) |\n| [**Principle-Driven Self-Alignment of Language Models from Scratch with Minimal Human Supervision**](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2023\u002Fhash\u002F0764db1151b936aca59249e2c1386101-Abstract-Conference.html) | NeurIPS | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002FIBM\u002FDromedary) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fzhiqings\u002Fdromedary-65b-verbose-clone-v0)|\n| [**Targeted Data Generation: Finding and Fixing Model Weaknesses**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.17804.pdf) | ACL | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002FZexueHe\u002FTDG)| \n| [**Wizardlm: Empowering large language models to follow complex instructions**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.12244) | ICLR | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fnlpxucan\u002FWizardLM)| [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FWizardLM\u002FWizardLM_evol_instruct_70k) \u003Cbr> [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FWizardLM\u002FWizardLM_evol_instruct_V2_196k)|\n| [**LaMini-LM: A Diverse Herd of Distilled Models from Large-Scale Instructions**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.14402) | arXiv | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fmbzuai-nlp\u002FLaMini-LM?tab=readme-ov-file) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FMBZUAI\u002FLaMini-instruction)|\n| [**Alpaca: Aligning Language Model with Human Preferences**](https:\u002F\u002Fcrfm.stanford.edu\u002F2023\u002F03\u002F13\u002Falpaca.html) | - | 2023-03 | [Github](https:\u002F\u002Fgithub.com\u002Ftatsu-lab\u002Fstanford_alpaca)| [Data](https:\u002F\u002Fgithub.com\u002Ftatsu-lab\u002Fstanford_alpaca\u002Fblob\u002Fmain\u002Falpaca_data.json)|\n| Code Alpaca: An Instruction-following LLaMA model for code generation | - | 2023-03 | [Github](https:\u002F\u002Fgithub.com\u002Fsahil280114\u002Fcodealpaca)| [Data](https:\u002F\u002Fgithub.com\u002Fsahil280114\u002Fcodealpaca?tab=readme-ov-file#data-release)|\n| [**Exploring the Impact of Instruction Data Scaling on Large Language Models: An Empirical Study on Real-World Use Cases**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2303.14742) | arXiv | 2023-03 | [Github](https:\u002F\u002Fgithub.com\u002FLianjiaTech\u002FBELLE) | [Data](https:\u002F\u002Fhuggingface.co\u002FBelleGroup)| \n| [**AugGPT: Leveraging ChatGPT for Text Data Augmentation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2302.13007.pdf) | arXiv | 2023-02 | [Github](https:\u002F\u002Fgithub.com\u002Fyhydhx\u002FAugGPT)| \n| [**Self-instruct: Aligning language model with self generated instructions**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2212.10560) | ACL | 2022-12 | [Github](https:\u002F\u002Fgithub.com\u002Fyizhongw\u002Fself-instruct)| [Data](https:\u002F\u002Fgithub.com\u002Fyizhongw\u002Fself-instruct) |\n| [**Symbolic Knowledge Distillation: from General Language Models to Commonsense Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2110.07178.pdf) | NAACL | 2021-10 | [Github](https:\u002F\u002Fgithub.com\u002Fpeterwestai2\u002Fsymbolic-knowledge-distillation) | [Data](https:\u002F\u002Fgithub.com\u002Fpeterwestai2\u002Fsymbolic-knowledge-distillation)|\n\n\n#### Curation\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Synthetic Data (Almost) from Scratch: Generalized Instruction Tuning for Language Models**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.13064) | arXiv | 2024-02 |\n| [**Phi-2: The surprising power of small language models**](https:\u002F\u002Fwww.microsoft.com\u002Fen-us\u002Fresearch\u002Fblog\u002Fphi-2-the-surprising-power-of-small-language-models\u002F) | - | 2023-12 |\n| [**WaveCoder: Widespread And Versatile Enhanced Instruction Tuning with Refined Data Generation**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2312.14187) | arXiv | 2023-12 |\n| [**Magicoder: Source Code Is All You Need**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2312.02120.pdf) | arXiv | 2023-12 | [Github](https:\u002F\u002Fgithub.com\u002Fise-uiuc\u002Fmagicoder) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fise-uiuc\u002FMagicoder-OSS-Instruct-75K) \u003Cbr> [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fise-uiuc\u002FMagicoder-Evol-Instruct-110K)|\n| [**MFTCoder: Boosting Code LLMs with Multitask Fine-Tuning**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.02303.pdf) | arXiv | 2023-11 | [Github](https:\u002F\u002Fgithub.com\u002Fcodefuse-ai\u002FMFTCOder)| [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fcodefuse-ai\u002FEvol-instruction-66k) \u003Cbr> [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fcodefuse-ai\u002FCodeExercise-Python-27k)|\n| [**Textbooks Are All You Need II: Phi-1.5 Technical Report**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2309.05463.pdf) | arXiv | 2023-09 |\n| [**Neural Machine Translation Data Generation and Augmentation using ChatGPT**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2307.05779.pdf) | arXiv | 2023-07 |\n| [**Textbooks Are All You Need: A Large-Scale Instructional Text Data Set for Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.11644.pdf) | arXiv | 2023-06 | \n| [**Enhancing Chat Language Models by Scaling High-quality Instructional Conversations**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.14233) | arXiv | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002Fthunlp\u002FUltraChat) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fstingning\u002Fultrachat)|\n| [**AugTriever: Unsupervised Dense Retrieval by Scalable Data Augmentation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2212.08841.pdf) | arXiv | 2022-12 | [Github](https:\u002F\u002Fgithub.com\u002Fsalesforce\u002FAugTriever)\n| [**SunGen: Self-Guided Noise-Free Data Generation for Efficient Zero-Shot Learning**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2205.12679.pdf) | ICLR | 2022-05 | [Github](https:\u002F\u002Fgithub.com\u002FSumilerGAO\u002FSunGen) \n| [**ZeroGen: Efficient Zero-shot Learning via Dataset Generation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2202.07922.pdf) | EMNLP | 2022-02 | [Github](https:\u002F\u002Fgithub.com\u002Fjiacheng-ye\u002FZeroGen)|\n| [**InPars: Data Augmentation for Information Retrieval using Large Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2202.05144.pdf) | arXiv | 2022-02 | [Github](https:\u002F\u002Fgithub.com\u002Fzetaalphavector\u002Finpars)| [Data](https:\u002F\u002Fgithub.com\u002Fzetaalphavector\u002Finpars)|\n| [**Towards Zero-Label Language Learning**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2109.09193.pdf) | arXiv | 2021-09 | \n\n\n#### Feature\n\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**PromptKD: Distilling Student-Friendly Knowledge for Generative Language Models via Prompt Tuning**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.12842) | EMNLP Findings| 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002Fgmkim-ai\u002FPromptKD) |  [Data](https:\u002F\u002Fgithub.com\u002Fgmkim-ai\u002FPromptKD\u002Ftree\u002Fmain\u002Fdata_utils)\n| [**Rethinking Kullback-Leibler Divergence in Knowledge Distillation for Large Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2404.02657.pdf) | arXiv | 2024-04 | \n| [**Direct Alignment of Draft Model for Speculative Decoding with Chat-Fine-Tuned LLMs**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2403.00858.pdf) | arXiv | 2024-03 |\n| [**DB-LLM: Accurate Dual-Binarization for Efficient LLMs**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.11960.pdf) | arXiv | 2024-02 | \n| [**BitDistiller: Unleashing the Potential of Sub-4-Bit LLMs via Self-Distillation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.10631.pdf) | arXiv | 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002FDD-DuDa\u002FBitDistiller) |\n| [**DISTILLM: Towards Streamlined Distillation for Large Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.03898.pdf) | arXiv | 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002Fjongwooko\u002Fdistillm) |\n| [**Towards Cross-Tokenizer Distillation: the Universal Logit Distillation Loss for LLMs**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.12030.pdf) | arXiv | 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002FNicolas-BZRD\u002Fllm-recipes) | [Data](https:\u002F\u002Fhuggingface.co\u002FNicolas-BZRD)|\n| [**Revisiting Knowledge Distillation for Autoregressive Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.11890.pdf) | arXiv | 2024-02 |\n| [**Knowledge Fusion of Large Language Models**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2401.10491) | ICLR | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002Ffanqiwan\u002FFuseLLM ) \n| [**Improving In-context Learning via Bidirectional Alignment**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2312.17055.pdf) | arXiv | 2023-12 \n| [**Towards the Fundamental Limits of Knowledge Transfer over Finite Domains**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.07838) | NeurIPS | 2023-10 |\n| [**Baby Llama: Knowledge Distillation from an Ensemble of Teachers Trained on a Small Dataset with No Performance Penalty**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.02019.pdf) | CoNLL | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Ftiminar\u002FBabyLlama) | [Data](https:\u002F\u002Fgithub.com\u002Ftiminar\u002FBabyLlama )|\n| [**f-Divergence Minimization for Sequence-Level Knowledge Distillation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2307.15190.pdf) | ACL | 2023-07 | [Github](https:\u002F\u002Fgithub.com\u002FMANGA-UOFA\u002Ffdistill) | [Data](https:\u002F\u002Fdrive.google.com\u002Ffile\u002Fd\u002F1V7bPndyoTQxcJ6m1BoXAw7-ub-jv8Wh1\u002Fview?usp=sharing)|\n| [**MiniLLM: Knowledge Distillation of Large Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.08543.pdf) | ICLR | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FLMOps\u002Ftree\u002Fmain\u002Fminillm) | [Data](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FLMOps\u002Ftree\u002Fmain\u002Fminillm) |\n| [**On-Policy Distillation of Language Models: Learning from Self-Generated Mistakes**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.13649.pdf) | ICLR | 2023-06 |\n| [**LLM-QAT: Data-Free Quantization Aware Training for Large Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.17888.pdf) | arXiv | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002FLLM-QAT)| [Data](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002FLLM-QAT)|\n| [**Less is more: Task-aware layer-wise distillation for language model compression**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2210.01351.pdf) | PMLR | 2022-10 | [Github](https:\u002F\u002Fgithub.com\u002Fcliang1453\u002Ftask-aware-distillation)\n\n\n#### Feedback\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**PromptKD: Distilling Student-Friendly Knowledge for Generative Language Models via Prompt Tuning**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.12842) | EMNLP Findings| 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002Fgmkim-ai\u002FPromptKD) |  [Data](https:\u002F\u002Fgithub.com\u002Fgmkim-ai\u002FPromptKD\u002Ftree\u002Fmain\u002Fdata_utils)\n| [**Evidence-Focused Fact Summarization for Knowledge-Augmented Zero-Shot Question Answering**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2403.02966.pdf) | arXiv | 2024-03 |\n| [**Evolving Knowledge Distillation with Large Language Models and Active Learning**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2403.06414v1.pdf) | arXiv | 2024-03 |\n| [**Direct Language Model Alignment from Online AI Feedback**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.04792.pdf) | arXiv | 2024-02 | \n| [**DISTILLM: Towards Streamlined Distillation for Large Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.03898.pdf) | arXiv | 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002Fjongwooko\u002Fdistillm) |\n| [**Improving Large Language Models via Fine-grained Reinforcement Learning with Minimum Editing Constraint**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.06081.pdf) | arXiv | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002FRUCAIBox\u002FRLMEC)\n| [**Beyond Imitation: Leveraging Fine-grained Quality Signals for Alignment**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.04072.pdf) | arXiv | 2023-11 | \n| [**Can Language Models Teach Weaker Agents? Teacher Explanations Improve Students via Personalization**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.02421) | ICLR | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002FswarnaHub\u002FExplanationIntervention) |\n| [**Motif: Intrinsic Motivation from Artificial Intelligence Feedback**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.00166) | ICLR | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fmotif) |\n| [**Ultrafeedback: Boosting language models with high-quality feedback**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.01377.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Fthunlp\u002FUltraFeedback) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fopenbmb\u002FUltraFeedback)|\n| [**Personalised Distillation: Empowering Open-Sourced LLMs with Adaptive Learning for Code Generation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.18628.pdf) | EMNLP | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002FSalesforceAIResearch\u002FPersDistill)|\n| [**CycleAlign: Iterative Distillation from Black-box LLM to White-box Models for Better Human Alignment**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.16271) | arXiv | 2023-10 \n| [**Rlaif: Scaling Reinforcement Learning from Human Feedback with AI Feedback**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2309.00267.pdf) | arXiv | 2023-09 |\n| [**Wizardmath: Empowering mathematical reasoning for large language models via reinforced evol-instruct**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2308.09583) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Fnlpxucan\u002FWizardLM)|\n| [**On-Policy Distillation of Language Models: Learning from Self-Generated Mistakes**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.13649.pdf) | ICLR | 2023-06 |\n| [**MiniLLM: Knowledge Distillation of Large Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.08543.pdf) | ICLR | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FLMOps\u002Ftree\u002Fmain\u002Fminillm) | [Data](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FLMOps\u002Ftree\u002Fmain\u002Fminillm) |\n| [**Language to Rewards for Robotic Skill Synthesis**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.08647.pdf) | arXiv | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002Fgoogle-deepmind\u002Flanguage_to_reward_2023)|\n| [**Lion: Adversarial Distillation of Closed-Source Large Language Model**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.12870.pdf) | EMNLP | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002FYJiangcm\u002FLion)|\n| [**SelFee: Iterative Self-Revising LLM Empowered by Self-Feedback Generation**](https:\u002F\u002Fkaistai.github.io\u002FSelFee\u002F) | arXiv | 2023-05 \n| [**LaMini-LM: A Diverse Herd of Distilled Models from Large-Scale Instructions**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.14402) | arXiv | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fmbzuai-nlp\u002FLaMini-LM?tab=readme-ov-file) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FMBZUAI\u002FLaMini-instruction)|\n| [**Reward Design with Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2303.00001.pdf) | ICLR | 2023-03 | [Github](https:\u002F\u002Fgithub.com\u002Fminaek\u002Freward_design_with_llms)|\n| [**Consitutional AI: Harmlessness from AI Feedback**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2212.08073.pdf) | arXiv | 2022-12 |\n\n\n\n\n#### Self-Knowledge\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**V-STaR: Training Verifiers for Self-Taught Reasoners**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.06457.pdf) | arXiv | 2024-02 \n| [**Self-Rewarding Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.10020.pdf) | arXiv | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002Flucidrains\u002Fself-rewarding-lm-pytorch?tab=readme-ov-file    )|\n| [**Self-Play Fine-Tuning Converts Weak Language Models to Strong Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.01335.pdf) | arXiv | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002Fuclaml\u002FSPIN) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FUCLA-AGI\u002FSPIN_iter0)|\n| [**Kun: Answer Polishment for Chinese Self-Alignment with Instruction Back-Translation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.06477.pdf) | arXiv | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002FZheng0428\u002FCOIG-Kun) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fm-a-p\u002FCOIG-Kun)|\n| [**APT: Adaptive Pruning and Tuning Pretrained Language Models for Efficient Training and Inference**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.12200.pdf) | arXiv | 2024-01 |\n| [**GRATH: Gradual Self-Truthifying for Large Language Models**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2401.12292) | arXiv | 2024-01 | \n| [**Beyond human data: Scaling self-training for problem-solving with language models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2312.06585.pdf) | arXiv | 2023-12 \n| [**Self-Knowledge Guided Retrieval Augmentation for Large Language Models**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.05002) | EMNLP Findings | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002FTHUNLP-MT\u002FSKR) |\n| [**RAIN: Your Language Models Can Align Themselves without Finetuning**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2309.07124.pdf) | arXiv | 2023-09 | [Github](https:\u002F\u002Fgithub.com\u002FSafeAILab\u002FRAIN) \n| [**Reinforced Self-Training (ReST) for Language Modeling**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.08998.pdf) | arXiv | 2023-08 \n| [**Humback: Self-Alignment with Instruction Backtranslation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.06259.pdf) | ICLR | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002FSpico197\u002FHumback)\n| [**Self-Alignment of Large Language Models via Reinforcement Learning from Contrast Distillation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2307.12950.pdf) | ICLR | 2023-07 | [Github](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Frlcd)|\n| [**Self-Improvement of Large Language Models via Reinforcement Learning from Human Feedback**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.14050.pdf) | EMNLP | 2023-06 | \n| [**Principle-Driven Self-Alignment of Language Models from Scratch with Minimal Human Supervision**](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2023\u002Fhash\u002F0764db1151b936aca59249e2c1386101-Abstract-Conference.html) | NeurIPS | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002FIBM\u002FDromedary) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fzhiqings\u002Fdromedary-65b-verbose-clone-v0)|\n| [**Impossible Distillation: from Low-Quality Model to High-Quality Dataset & Model for Summarization and Paraphrasing**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.16635) | arXiv | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002Fjaehunjung1\u002Fimpossible-distillation)|\n| [**Language Model Self-improvement by Reinforcement Learning Contemplation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.14483.pdf) | arXiv | 2023-05 \n| [**Baize: An Open-Source Chat Model with Parameter-Efficient Tuning on Self-Chat Data**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.01196) | EMNLP | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fproject-baize\u002Fbaize-chatbot)| [Data](https:\u002F\u002Fgithub.com\u002Fproject-baize\u002Fbaize-chatbot\u002Ftree\u002Fmain\u002Fdata)|\n| [**Self-instruct: Aligning language model with self generated instructions**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2212.10560) | ACL | 2022-12 | [Github](https:\u002F\u002Fgithub.com\u002Fyizhongw\u002Fself-instruct)| [Data](https:\u002F\u002Fgithub.com\u002Fyizhongw\u002Fself-instruct) |\n| [**Large Language Models Can Self-Improve**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2210.11610.pdf) | EMNLP | 2022-10\n| [**STaR: Bootstrapping Reasoning With Reasoning**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2203.14465.pdf) | NeurIPS | 2022-03 | [Github](https:\u002F\u002Fgithub.com\u002Fezelikman\u002FSTaR)|\n\n\n\n\n\n### Distillation Algorithms\n#### Supervised Fine-Tuning\n\n> Due to the large number of works applying supervised fine-tuning, we only list the most representative ones here. \n\n\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Evidence-Focused Fact Summarization for Knowledge-Augmented Zero-Shot Question Answering**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2403.02966.pdf) | arXiv | 2024-03 |\n| [**Aligning Large and Small Language Models via Chain-of-Thought Reasoning**](https:\u002F\u002Faclanthology.org\u002F2024.eacl-long.109.pdf) | EACL | 2024-03 | [Github](https:\u002F\u002Fgithub.com\u002Flranaldii\u002FAligning_LLMs) |\n| [**Divide-or-Conquer? Which Part Should You Distill Your LLM?**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.15000.pdf) | arXiv | 2024-02 |\n| [**Synthetic Data (Almost) from Scratch: Generalized Instruction Tuning for Language Models**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.13064) | arXiv | 2024-02 |\n| [**Orca 2: Teaching Small Language Models How to Reason**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.11045.pdf) | arXiv | 2023-11 |\n| [**TinyLLM: Learning a Small Student from Multiple Large Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.04616.pdf) | arXiv | 2024-02 | \n| [**Wizardmath: Empowering mathematical reasoning for large language models via reinforced evol-instruct**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2308.09583) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Fnlpxucan\u002FWizardLM)|\n| [**Orca: Progressive Learning from Complex Explanation Traces of GPT-4**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.02707) | arXiv | 2023-06 | \n| [**LaMini-LM: A Diverse Herd of Distilled Models from Large-Scale Instructions**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.14402) | arXiv | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fmbzuai-nlp\u002FLaMini-LM?tab=readme-ov-file) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FMBZUAI\u002FLaMini-instruction)|\n| [**Wizardlm: Empowering large language models to follow complex instructions**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.12244) | ICLR | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fnlpxucan\u002FWizardLM)| [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FWizardLM\u002FWizardLM_evol_instruct_70k) \u003Cbr> [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FWizardLM\u002FWizardLM_evol_instruct_V2_196k)|\n| [**Baize: An Open-Source Chat Model with Parameter-Efficient Tuning on Self-Chat Data**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.01196) | EMNLP | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fproject-baize\u002Fbaize-chatbot)| [Data](https:\u002F\u002Fgithub.com\u002Fproject-baize\u002Fbaize-chatbot\u002Ftree\u002Fmain\u002Fdata)|\n| [**Alpaca: Aligning Language Model with Human Preferences**](https:\u002F\u002Fcrfm.stanford.edu\u002F2023\u002F03\u002F13\u002Falpaca.html) | - | 2023-03 | [Github](https:\u002F\u002Fgithub.com\u002Ftatsu-lab\u002Fstanford_alpaca)| [Data](https:\u002F\u002Fgithub.com\u002Ftatsu-lab\u002Fstanford_alpaca\u002Fblob\u002Fmain\u002Falpaca_data.json)|\n| [**Vicuna: An Open-Source Chatbot Impressing GPT-4 with 90\\%* ChatGPT Quality**](https:\u002F\u002Flmsys.org\u002Fblog\u002F2023-03-30-vicuna\u002F) | - | 2023-03 | [Github]( https:\u002F\u002Fgithub.com\u002Flm-sys\u002FFastChat) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fanon8231489123\u002FShareGPT_Vicuna_unfiltered)|\n| [**Self-instruct: Aligning language model with self generated instructions**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2212.10560) | ACL | 2022-12 | [Github](https:\u002F\u002Fgithub.com\u002Fyizhongw\u002Fself-instruct)| [Data](https:\u002F\u002Fgithub.com\u002Fyizhongw\u002Fself-instruct) |\n| [**Large Language Models Can Self-Improve**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2210.11610.pdf) | EMNLP | 2022-10\n| [**STaR: Bootstrapping Reasoning With Reasoning**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2203.14465.pdf) | NeurIPS | 2022-03 | [Github](https:\u002F\u002Fgithub.com\u002Fezelikman\u002FSTaR)|\n\n\n#### Divergence and Similarity\n\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**PromptKD: Distilling Student-Friendly Knowledge for Generative Language Models via Prompt Tuning**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.12842) | EMNLP Findings| 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002Fgmkim-ai\u002FPromptKD) |  [Data](https:\u002F\u002Fgithub.com\u002Fgmkim-ai\u002FPromptKD\u002Ftree\u002Fmain\u002Fdata_utils)\n| [**Rethinking Kullback-Leibler Divergence in Knowledge Distillation for Large Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2404.02657.pdf) | arXiv | 2024-04 | \n| [**Weight-Inherited Distillation for Task-Agnostic BERT Compression**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.09098.pdf) | NAACL | 2024-03 | [Github](https:\u002F\u002Fgithub.com\u002Fwutaiqiang\u002FWID-NAACL2024) |\n| [**BitDistiller: Unleashing the Potential of Sub-4-Bit LLMs via Self-Distillation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.10631.pdf) | arXiv | 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002FDD-DuDa\u002FBitDistiller) |\n| [**DISTILLM: Towards Streamlined Distillation for Large Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.03898.pdf) | arXiv | 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002Fjongwooko\u002Fdistillm) |\n| [**Towards Cross-Tokenizer Distillation: the Universal Logit Distillation Loss for LLMs**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.12030.pdf) | arXiv | 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002FNicolas-BZRD\u002Fllm-recipes) | [Data](https:\u002F\u002Fhuggingface.co\u002FNicolas-BZRD)|\n| [**Revisiting Knowledge Distillation for Autoregressive Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.11890.pdf) | arXiv | 2024-02 |\n| [**Knowledge Distillation for Closed-Source Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.07013.pdf) | arXiv | 2024-01 | \n| [**Knowledge Fusion of Large Language Models**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2401.10491) | ICLR | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002Ffanqiwan\u002FFuseLLM ) \n| [**Improving In-context Learning via Bidirectional Alignment**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2312.17055.pdf) | arXiv | 2023-12 \n| [**Towards the Fundamental Limits of Knowledge Transfer over Finite Domains**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.07838) | NeurIPS | 2023-10 |\n| [**Baby Llama: Knowledge Distillation from an Ensemble of Teachers Trained on a Small Dataset with No Performance Penalty**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.02019.pdf) | CoNLL | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Ftiminar\u002FBabyLlama) | [Data](https:\u002F\u002Fgithub.com\u002Ftiminar\u002FBabyLlama )|\n| [**f-Divergence Minimization for Sequence-Level Knowledge Distillation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2307.15190.pdf) | ACL | 2023-07 | [Github](https:\u002F\u002Fgithub.com\u002FMANGA-UOFA\u002Ffdistill) | [Data](https:\u002F\u002Fdrive.google.com\u002Ffile\u002Fd\u002F1V7bPndyoTQxcJ6m1BoXAw7-ub-jv8Wh1\u002Fview?usp=sharing)|\n| [**f-Divergence Minimization for Sequence-Level Knowledge Distillation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2307.15190.pdf) | ACL | 2023-07 | [Github](https:\u002F\u002Fgithub.com\u002FMANGA-UOFA\u002Ffdistill) | [Data](https:\u002F\u002Fdrive.google.com\u002Ffile\u002Fd\u002F1V7bPndyoTQxcJ6m1BoXAw7-ub-jv8Wh1\u002Fview?usp=sharing)|\n| [**MiniLLM: Knowledge Distillation of Large Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.08543.pdf) | ICLR | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FLMOps\u002Ftree\u002Fmain\u002Fminillm) | [Data](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FLMOps\u002Ftree\u002Fmain\u002Fminillm) |\n| [**On-Policy Distillation of Language Models: Learning from Self-Generated Mistakes**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.13649.pdf) | ICLR | 2023-06 |\n| [**LLM-QAT: Data-Free Quantization Aware Training for Large Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.17888.pdf) | arXiv | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002FLLM-QAT)| [Data](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002FLLM-QAT)|\n| [**Less is more: Task-aware layer-wise distillation for language model compression**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2210.01351.pdf) | PMLR | 2022-10 | [Github](https:\u002F\u002Fgithub.com\u002Fcliang1453\u002Ftask-aware-distillation)\n| [**DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1910.01108.pdf) | NeurIPS | 2019-10\n\n\n#### Reinforcement Learning\n\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Direct Language Model Alignment from Online AI Feedback**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.04792.pdf) | arXiv | 2024-02 | \n| [**Improving Large Language Models via Fine-grained Reinforcement Learning with Minimum Editing Constraint**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.06081.pdf) | arXiv | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002FRUCAIBox\u002FRLMEC)\n| [**Accelerating Reinforcement Learning of Robotic Manipulations via Feedback from Large Language Models**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2311.02379) | CoRL | 2023-11 |\n| [**Motif: Intrinsic Motivation from Artificial Intelligence Feedback**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.00166) | ICLR | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fmotif) |\n| [**Ultrafeedback: Boosting language models with high-quality feedback**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.01377.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Fthunlp\u002FUltraFeedback) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fopenbmb\u002FUltraFeedback)|\n| [**Eureka: Human-Level Reward Design via Coding Large Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.12931.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Feureka-research\u002FEureka)\n| [**Rlaif: Scaling Reinforcement Learning from Human Feedback with AI Feedback**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2309.00267.pdf) | arXiv | 2023-09 |\n| [**Wizardmath: Empowering mathematical reasoning for large language models via reinforced evol-instruct**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2308.09583) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Fnlpxucan\u002FWizardLM)|\n| [**On-Policy Distillation of Language Models: Learning from Self-Generated Mistakes**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.13649.pdf) | ICLR | 2023-06 |\n| [**Aligning Large Language Models through Synthetic Feedback**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.13735.pdf) | EMNLP | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002Fnaver-ai\u002Falmost)| [Data](https:\u002F\u002Fgithub.com\u002Fnaver-ai\u002Falmost )|\n| [**Language Model Self-improvement by Reinforcement Learning Contemplation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.14483.pdf) | arXiv | 2023-05 \n| [**Consitutional AI: Harmlessness from AI Feedback**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2212.08073.pdf) | arXiv | 2022-12 |\n\n\n#### Rank Optimization\n\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Evidence-Focused Fact Summarization for Knowledge-Augmented Zero-Shot Question Answering**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2403.02966.pdf) | arXiv | 2024-03 |\n| [**KnowTuning: Knowledge-aware Fine-tuning for Large Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.11176.pdf) | arXiv | 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002Fyouganglyu\u002FKnowTuning) |\n| [**Self-Rewarding Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.10020.pdf) | arXiv | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002Flucidrains\u002Fself-rewarding-lm-pytorch?tab=readme-ov-file    )|\n| [**Self-Play Fine-Tuning Converts Weak Language Models to Strong Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.01335.pdf) | arXiv | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002Fuclaml\u002FSPIN) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FUCLA-AGI\u002FSPIN_iter0)|\n| [**Zephyr: Direct Distillation of Language Model Alignment**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.16944.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Falignment-handbook ) |  [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FHuggingFaceH4\u002Fultrachat_200k)|\n| [**CycleAlign: Iterative Distillation from Black-box LLM to White-box Models for Better Human Alignment**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.16271) | arXiv | 2023-10\n\n\n\n\n## Skill Distillation\n### Context Following\n#### Instruction Following\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Synthetic Data (Almost) from Scratch: Generalized Instruction Tuning for Language Models**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.13064) | arXiv | 2024-02 |\n| [**Revisiting Knowledge Distillation for Autoregressive Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.11890.pdf) | arXiv | 2024-02 |\n| [**Selective Reflection-Tuning: Student-Selected Data Recycling for LLM Instruction-Tuning**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.10110) | arXiv | 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002Ftianyi-lab\u002FReflection_Tuning) | [Data](https:\u002F\u002Fgithub.com\u002Ftianyi-lab\u002FReflection_Tuning)|\n| [**Phi-2: The surprising power of small language models**](https:\u002F\u002Fwww.microsoft.com\u002Fen-us\u002Fresearch\u002Fblog\u002Fphi-2-the-surprising-power-of-small-language-models\u002F) | - | 2023-12 |\n| [**What Makes Good Data for Alignment? A Comprehensive Study of Automatic Data Selection in Instruction Tuning**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2312.15685) | ICLR | 2023-12 | [Github](https:\u002F\u002Fgithub.com\u002Fhkust-nlp\u002Fdeita) | [Data](https:\u002F\u002Fgithub.com\u002Fhkust-nlp\u002Fdeita)|\n| [**MUFFIN: Curating Multi-Faceted Instructions for Improving Instruction-Following**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2312.02436) | arXiv | 2023-12 | [Github](https:\u002F\u002Fgithub.com\u002FRenzeLou\u002FMuffin) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FReza8848\u002FMUFFIN_68k)|\n| [**Instruction Fusion: Advancing Prompt Evolution through Hybridization**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2312.15692.pdf) | arXiv | 2023-12 | \n| [**Orca 2: Teaching Small Language Models How to Reason**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.11045.pdf) | arXiv | 2023-11 |\n| [**Reflection-Tuning: Data Recycling Improves LLM Instruction-Tuning**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.11716) | NIPS Workshop | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Ftianyi-lab\u002FReflection_Tuning) | [Data](https:\u002F\u002Fgithub.com\u002Ftianyi-lab\u002FReflection_Tuning)|\n| [**Textbooks Are All You Need II: Phi-1.5 Technical Report**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2309.05463.pdf) | arXiv | 2023-09 |\n| [**Orca: Progressive Learning from Complex Explanation Traces of GPT-4**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.02707) | arXiv | 2023-06 | \n| [**Textbooks Are All You Need: A Large-Scale Instructional Text Data Set for Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.11644.pdf) | arXiv | 2023-06 | \n| [**SelFee: Iterative Self-Revising LLM Empowered by Self-Feedback Generation**](https:\u002F\u002Fkaistai.github.io\u002FSelFee\u002F) | arXiv | 2023-05 \n| [**ExpertPrompting: Instructing Large Language Models to be Distinguished Experts**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.14688) | arXiv | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002FOFA-Sys\u002FExpertLLaMA) | [Data](https:\u002F\u002Fgithub.com\u002FOFA-Sys\u002FExpertLLaMA)|\n| [**LaMini-LM: A Diverse Herd of Distilled Models from Large-Scale Instructions**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.14402) | arXiv | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fmbzuai-nlp\u002FLaMini-LM?tab=readme-ov-file) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FMBZUAI\u002FLaMini-instruction)|\n| [**Wizardlm: Empowering large language models to follow complex instructions**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.12244) | ICLR | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fnlpxucan\u002FWizardLM)| [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FWizardLM\u002FWizardLM_evol_instruct_70k) \u003Cbr> [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FWizardLM\u002FWizardLM_evol_instruct_V2_196k)|\n| [**Koala: A Dialogue Model for Academic Research**](https:\u002F\u002Fbair.berkeley.edu\u002Fblog\u002F2023\u002F04\u002F03\u002Fkoala\u002F) | - | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Flm-sys\u002FFastChat)| [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Flmsys\u002Fchatbot_arena_conversations)|\n| [**Alpaca: Aligning Language Model with Human Preferences**](https:\u002F\u002Fcrfm.stanford.edu\u002F2023\u002F03\u002F13\u002Falpaca.html) | - | 2023-03 | [Github](https:\u002F\u002Fgithub.com\u002Ftatsu-lab\u002Fstanford_alpaca)| [Data](https:\u002F\u002Fgithub.com\u002Ftatsu-lab\u002Fstanford_alpaca\u002Fblob\u002Fmain\u002Falpaca_data.json)|\n| [**Vicuna: An Open-Source Chatbot Impressing GPT-4 with 90\\%* ChatGPT Quality**](https:\u002F\u002Flmsys.org\u002Fblog\u002F2023-03-30-vicuna\u002F) | - | 2023-03 | [Github](https:\u002F\u002Fgithub.com\u002Flm-sys\u002FFastChat)| [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Flmsys\u002Fchatbot_arena_conversations)|\n| [**Self-instruct: Aligning language model with self generated instructions**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2212.10560) | ACL | 2022-12 | [Github](https:\u002F\u002Fgithub.com\u002Fyizhongw\u002Fself-instruct)| [Data](https:\u002F\u002Fgithub.com\u002Fyizhongw\u002Fself-instruct) |\n\n#### Multi-turn Dialogue\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Zephyr: Direct Distillation of LM Alignment**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.16944) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Falignment-handbook) | [Data](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Falignment-handbook)|\n| [**OPENCHAT: ADVANCING OPEN-SOURCE LANGUAGE MODELS WITH MIXED-QUALITY DATA**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2309.11235.pdf) | ICLR | 2023-09 | [Github](https:\u002F\u002Fgithub.com\u002Fimoneoi\u002Fopenchat) | [Data](https:\u002F\u002Fgithub.com\u002Fimoneoi\u002Fopenchat)|\n| [**Enhancing Chat Language Models by Scaling High-quality Instructional Conversations**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.14233) | arXiv | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002Fthunlp\u002FUltraChat) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fstingning\u002Fultrachat)|\n| [**Baize: An Open-Source Chat Model with Parameter-Efficient Tuning on Self-Chat Data**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.01196) | EMNLP | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fproject-baize\u002Fbaize-chatbot)| [Data](https:\u002F\u002Fgithub.com\u002Fproject-baize\u002Fbaize-chatbot\u002Ftree\u002Fmain\u002Fdata)|\n| [**Vicuna: An Open-Source Chatbot Impressing GPT-4 with 90\\%* ChatGPT Quality**](https:\u002F\u002Flmsys.org\u002Fblog\u002F2023-03-30-vicuna\u002F) | - | 2023-03 | [Github](https:\u002F\u002Fgithub.com\u002Flm-sys\u002FFastChat)| [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Flmsys\u002Fchatbot_arena_conversations)|\n\n#### RAG Capability\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.11511) | NIPS | 2023-10 | [Github](https:\u002F\u002Fselfrag.github.io\u002F) | [Data](https:\u002F\u002Fselfrag.github.io\u002F)|\n| [**SAIL: Search-Augmented Instruction Learning**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.15225) | arXiv | 2023-05 | [Github](https:\u002F\u002Fopenlsr.org\u002Fsail-7b) | [Data](https:\u002F\u002Fgithub.com\u002Fluohongyin\u002FSAIL#reproducing-sail-models)|\n| [**Knowledge-Augmented Reasoning Distillation for Small Language Models in Knowledge-Intensive Tasks**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.18395) | NIPS | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002FNardien\u002FKARD) | [Data](https:\u002F\u002Fgithub.com\u002FNardien\u002FKARD)|\n\n\n### Alignment\n#### Thinking Pattern\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Aligning Large and Small Language Models via Chain-of-Thought Reasoning**](https:\u002F\u002Faclanthology.org\u002F2024.eacl-long.109.pdf) | EACL | 2024-03 | [Github](https:\u002F\u002Fgithub.com\u002Flranaldii\u002FAligning_LLMs) |\n| [**Divide-or-Conquer? Which Part Should You Distill Your LLM?**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.15000.pdf) | arXiv | 2024-02 |\n| [**Selective Reflection-Tuning: Student-Selected Data Recycling for LLM Instruction-Tuning**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.10110) | arXiv | 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002Ftianyi-lab\u002FReflection_Tuning) | [Data](https:\u002F\u002Fgithub.com\u002Ftianyi-lab\u002FReflection_Tuning)|\n| [**Can LLMs Speak For Diverse People? Tuning LLMs via Debate to Generate Controllable Controversial Statements**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.10614) | arXiv | 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002Ftianyi-lab\u002FDEBATunE) | [Data](https:\u002F\u002Fgithub.com\u002Ftianyi-lab\u002FDEBATunE)|\n| [**Knowledgeable Preference Alignment for LLMs in Domain-specific Question Answering**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2311.06503) | arXiv | 2023-11 | [Github](https:\u002F\u002Fgithub.com\u002Fzjukg\u002FKnowPAT) |\n| [**Orca 2: Teaching Small Language Models How to Reason**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.11045.pdf) | arXiv | 2023-11 |\n| [**Reflection-Tuning: Data Recycling Improves LLM Instruction-Tuning**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.11716) | NIPS Workshop | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Ftianyi-lab\u002FReflection_Tuning) | [Data](https:\u002F\u002Fgithub.com\u002Ftianyi-lab\u002FReflection_Tuning)|\n| [**Orca: Progressive Learning from Complex Explanation Traces of GPT-4**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.02707) | arXiv | 2023-06 | \n| [**SelFee: Iterative Self-Revising LLM Empowered by Self-Feedback Generation**](https:\u002F\u002Fkaistai.github.io\u002FSelFee\u002F) | arXiv | 2023-05\n\n\n#### Preference\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Ultrafeedback: Boosting language models with high-quality feedback**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.01377.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Fthunlp\u002FUltraFeedback) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fopenbmb\u002FUltraFeedback)|\n| [**Zephyr: Direct Distillation of LM Alignment**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.16944) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Falignment-handbook) | [Data](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Falignment-handbook)|\n| [**Rlaif: Scaling Reinforcement Learning from Human Feedback with AI Feedback**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2309.00267.pdf) | arXiv | 2023-09 |\n| [**OPENCHAT: ADVANCING OPEN-SOURCE LANGUAGE MODELS WITH MIXED-QUALITY DATA**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2309.11235.pdf) | ICLR | 2023-09 | [Github](https:\u002F\u002Fgithub.com\u002Fimoneoi\u002Fopenchat) | [Data](https:\u002F\u002Fgithub.com\u002Fimoneoi\u002Fopenchat)|\n| [**RLCD: Reinforcement Learning from Contrast Distillation for Language Model Alignment**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2307.12950) | arXiv | 2023-07 | [Github](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Frlcd)|\n| [**Aligning Large Language Models through Synthetic Feedbacks**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.13735) | EMNLP | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002Fnaver-ai\u002Falmost)|[Data](https:\u002F\u002Fgithub.com\u002Fnaver-ai\u002Falmost)|\n| [**Reward Design with Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2303.00001.pdf) | ICLR | 2023-03 | [Github](https:\u002F\u002Fgithub.com\u002Fminaek\u002Freward_design_with_llms)|\n| [**Training Language Models with Language Feedback at Scale**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2303.16755) | arXiv | 2023-03 |\n| [**Constitutional AI: Harmlessness from AI Feedback**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2212.08073) | arXiv | 2022-12 |\n\n\n#### Value\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Ultrafeedback: Boosting language models with high-quality feedback**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.01377.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Fthunlp\u002FUltraFeedback) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fopenbmb\u002FUltraFeedback)|\n| [**RLCD: Reinforcement Learning from Contrast Distillation for Language Model Alignment**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2307.12950) | arXiv | 2023-07 | [Github](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Frlcd)|\n| [**Principle-Driven Self-Alignment of Language Models from Scratch with Minimal Human Supervision**](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2023\u002Fhash\u002F0764db1151b936aca59249e2c1386101-Abstract-Conference.html) | NeurIPS | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002FIBM\u002FDromedary) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fzhiqings\u002Fdromedary-65b-verbose-clone-v0)|\n| [**Training Socially Aligned Language Models on Simulated Social Interactions**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.16960) | arXiv | 2023-05 |\n| [**Constitutional AI: Harmlessness from AI Feedback**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2212.08073) | arXiv | 2022-12 |\n\n\n### Agent\n#### Tool Using\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Toolformer: Language Models Can Teach Themselves to Use Tools**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2302.04761) | arXiv | 2023-02 |\n| [**Graph-ToolFormer: To Empower LLMs with Graph Reasoning Ability via Prompt Augmented by ChatGPT**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.11116) | arXiv | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fjwzhanggy\u002FGraph_Toolformer) | [Data](https:\u002F\u002Fgithub.com\u002Fjwzhanggy\u002FGraph_Toolformer)|\n| [**Gorilla: Large Language Model Connected with Massive APIs**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.15334) | arXiv | 2023-05 | [Github](https:\u002F\u002Fgorilla.cs.berkeley.edu\u002F) | [Data](https:\u002F\u002Fgorilla.cs.berkeley.edu\u002F)|\n| [**GPT4Tools: Teaching Large Language Model to Use Tools via Self-instruction**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.18752) | arXiv | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002FAILab-CVC\u002FGPT4Tools) | [Data](https:\u002F\u002Fgithub.com\u002FAILab-CVC\u002FGPT4Tools)|\n| [**ToolAlpaca: Generalized Tool Learning for Language Models with 3000 Simulated Cases**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.05301) | arXiv | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002Ftangqiaoyu\u002FToolAlpaca) | [Data](https:\u002F\u002Fgithub.com\u002Ftangqiaoyu\u002FToolAlpaca)|\n| [**ToolLLM: Facilitating Large Language Models to Master 16000+ Real-world APIs**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2307.16789) | arXiv | 2023-07 | [Github](https:\u002F\u002Fgithub.com\u002FOpenBMB\u002FToolBench) | [Data](https:\u002F\u002Fgithub.com\u002FOpenBMB\u002FToolBench)|\n| [**Confucius: Iterative Tool Learning from Introspection Feedback by Easy-to-Difficult Curriculum**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2308.14034) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Fshizhl\u002FConfucius) | \n| [**CRAFT: Customizing LLMs by Creating and Retrieving from Specialized Toolsets**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2309.17428) | arXiv | 2023-09 | [Github](https:\u002F\u002Fgithub.com\u002Flifan-yuan\u002FCRAFT) | \n| [**MLLM-Tool: A Multimodal Large Language Model For Tool Agent Learning**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2401.10727) | arXiv | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002FMLLM-Tool\u002FMLLM-Tool) | [Data](https:\u002F\u002Fgithub.com\u002FMLLM-Tool\u002FMLLM-Tool)|\n| [**Small LLMs Are Weak Tool Learners: A Multi-LLM Agent**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2401.07324) | arXiv | 2024-01 |[Github](https:\u002F\u002Fgithub.com\u002FX-PLUG\u002FMulti-LLM-Agent) | \n| [**EASYTOOL: Enhancing LLM-based Agents with Concise Tool Instruction**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2401.06201) | arXiv | 2024-01 |[Github](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FJARVIS\u002F) | \n\n\n#### Planning\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**AUTOACT: Automatic Agent Learning from Scratch via Self-Planning**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2401.05268) | arXiv | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002Fzjunlp\u002FAutoAct)\n| [**Lumos: Learning Agents with Unified Data, Modular Design, and Open-Source LLMs**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2311.05657) | arXiv | 2023-11 | [Github](https:\u002F\u002Fallenai.github.io\u002Flumos\u002F) | [Data](https:\u002F\u002Fallenai.github.io\u002Flumos\u002F)|\n| [**TPTU-v2: Boosting Task Planning and Tool Usage of Large Language Model-based Agents in Real-world Systems**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2311.11315) | arXiv | 2023-11 |\n| [**Embodied Multi-Modal Agent trained by an LLM from a Parallel TextWorld**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2311.16714) | arXiv | 2023-11 |\n| [**Accelerating Reinforcement Learning of Robotic Manipulations via Feedback from Large Language Models**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2311.02379) | CoRL | 2023-11 |\n| [**Motif: Intrinsic Motivation from Artificial Intelligence Feedback**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.00166) | ICLR | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fmotif) |\n| [**FireAct: Toward Language Agent Fine-tuning**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.05915) | arXiv | 2023-10 | [Github](https:\u002F\u002Ffireact-agent.github.io\u002F) | [Data](https:\u002F\u002Ffireact-agent.github.io\u002F)|\n| [**AgentTuning: Enabling Generalized Agent Abilities for LLMs**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.12823) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002FTHUDM\u002FAgentTuning) |\n| [**Eureka: Human-Level Reward Design via Coding Large Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.12931.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Feureka-research\u002FEureka)\n| [**Language Instructed Reinforcement Learning for Human-AI Coordination**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2304.07297.pdf) | PMLR | 2023-04 |\n| [**Guiding Pretraining in Reinforcement Learning with Large Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2302.06692.pdf) | PMLR | 2023-02 |\n| [**Distilling Internet-Scale Vision-Language Models into Embodied Agents**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2301.12507) | ICML | 2023-01 |\n\n\n\n\n### NLP Task Specialization\n#### NLU\n\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**LLM vs Small Model? Large Language Model Based Text Augmentation Enhanced Personality Detection Model**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2403.07581v1.pdf) | arXiv | 2024-03 |\n| [**Evolving Knowledge Distillation with Large Language Models and Active Learning**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2403.06414v1.pdf) | arXiv | 2024-03 |\n| [**Mixed Distillation Helps Smaller Language Model Better Reasoning**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2312.10730.pdf) | arXiv | 2023-12 |\n| [**PromptMix: A Class Boundary Augmentation Method for Large Language Model Distillation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.14192.pdf) | EMNLP | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002FServiceNow\u002FPromptMix-EMNLP-2023) |\n| [**TinyLLM: Learning a Small Student from Multiple Large Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.04616.pdf) | arXiv | 2024-02 | \n| [**Targeted Data Generation: Finding and Fixing Model Weaknesses**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.17804.pdf) | ACL | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002FZexueHe\u002FTDG)| \n| [**Distilling ChatGPT for Explainable Automated Student Answer Assessment**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.12962.pdf) | arXiv | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002Flijiazheng99\u002Faera) |\n| [**ChatGPT outperforms crowd workers for text-annotation tasks**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2303.15056.pdf) | arXiv | 2023-03 | \n| [**Annollm: Making large language models to be better crowdsourced annotators**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2303.16854) | arXiv | 2023-03 |\n| [**AugGPT: Leveraging ChatGPT for Text Data Augmentation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2302.13007.pdf) | arXiv | 2023-02 | [Github](https:\u002F\u002Fgithub.com\u002Fyhydhx\u002FAugGPT)| \n|  [**Is GPT-3 a Good Data Annotator?**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2212.10450.pdf) | ACL | 2022-12 | [Github](https:\u002F\u002Fgithub.com\u002FDAMO-NLP-SG\u002FLLM-Data-Annotator)|\n| [**SunGen: Self-Guided Noise-Free Data Generation for Efficient Zero-Shot Learning**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2205.12679.pdf) | ICLR | 2022-05 | [Github](https:\u002F\u002Fgithub.com\u002FSumilerGAO\u002FSunGen) \n| [**ZeroGen: Efficient Zero-shot Learning via Dataset Generation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2202.07922.pdf) | EMNLP | 2022-02 | [Github](https:\u002F\u002Fgithub.com\u002Fjiacheng-ye\u002FZeroGen)|\n| [**Generating Training Data with Language Models: Towards Zero-Shot Language Understanding**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2202.04538.pdf) | NeurIPS | 2022-02 | [Github](https:\u002F\u002Fgithub.com\u002Fyumeng5\u002FSuperGen)\n| [**Towards Zero-Label Language Learning**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2109.09193.pdf) | arXiv | 2021-09 | \n| [**Generate, Annotate, and Learn: NLP with Synthetic Text**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2106.06168.pdf) | TACL | 2021-06\n\n\n#### NLG\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Tailoring Self-Rationalizers with Multi-Reward Distillation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.02805.pdf) | arXiv | 2023-11 | [Github](https:\u002F\u002Finklab.usc.edu\u002FMaRio\u002F)| [Data](https:\u002F\u002Finklab.usc.edu\u002FMaRio\u002F)|\n| [**RECOMP: Improving Retrieval-Augmented LMs with Compression and Selective Augmentation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.04408.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Fcarriex\u002Frecomp)|\n| [**Neural Machine Translation Data Generation and Augmentation using ChatGPT**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2307.05779.pdf) | arXiv | 2023-07 |\n| [**On-Policy Distillation of Language Models: Learning from Self-Generated Mistakes**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.13649.pdf) | ICLR | 2023-06 |\n| [**Can LLMs generate high-quality synthetic note-oriented doctor-patient conversations?**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.16931.pdf) | arXiv | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002Fbelievewhat\u002FDr.NoteAid) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FakemiH\u002FNoteChat)|\n| [**InheritSumm: A General, Versatile and Compact Summarizer by Distilling from GPT**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.13083.pdf) | EMNLP | 2023-05 | \n| [**Impossible Distillation: from Low-Quality Model to High-Quality Dataset & Model for Summarization and Paraphrasing**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.16635) | arXiv | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002Fjaehunjung1\u002Fimpossible-distillation)|\n| [**Data Augmentation for Radiology Report Simplification**](https:\u002F\u002Faclanthology.org\u002F2023.findings-eacl.144.pdf) | Findings of EACL | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002FZiyu-Yang\u002FRadiology-Text-Simplification-Liver)\n| [**Want To Reduce Labeling Cost? GPT-3 Can Help**](https:\u002F\u002Faclanthology.org\u002F2021.findings-emnlp.354) | Findings of EMNLP | 2021-08 |\n\n\n#### Information Retrieval\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**InstructDistill: Instruction Distillation Makes Large Language Models Efficient Zero-shot Rankers**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.01555.pdf) | arXiv | 2023-11 | [Github](https:\u002F\u002Fgithub.com\u002Fsunnweiwei\u002FRankGPT\u002Ftree\u002Fmain\u002FInstructDistill)| [Data](https:\u002F\u002Fgithub.com\u002Fsunnweiwei\u002FRankGPT?tab=readme-ov-file#download-data-and-model)\n| [**Soft prompt tuning for augmenting dense retrieval with large language models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2307.08303.pdf) | arXiv | 2023-07 | [Github](https:\u002F\u002Fgithub.com\u002Fzhiyuanpeng\u002FSPTAR.git)\n| [**Query Rewriting in Retrieval-Augmented Large Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.14283.pdf) | EMNLP | 2023-05 \n| [**Is ChatGPT Good at Search? Investigating Large Language Models as Re-Ranking Agents**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2304.09542.pdf) | EMNLP | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fsunnweiwei\u002FRankGPT)|[Data](https:\u002F\u002Fgithub.com\u002Fsunnweiwei\u002FRankGPT?tab=readme-ov-file#download-data-and-models)|\n| [**AugTriever: Unsupervised Dense Retrieval by Scalable Data Augmentation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2212.08841.pdf) | arXiv | 2022-12 | [Github](https:\u002F\u002Fgithub.com\u002Fsalesforce\u002FAugTriever)\n| [**QUILL: Query Intent with Large Language Models using Retrieval Augmentation and Multi-stage Distillation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2210.15718.pdf) | EMNLP | 2022-10 |\n| [**Promptagator: Few-shot Dense Retrieval From 8 Examples**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2209.11755.pdf) | ICLR | 2022-09 |\n| [**Questions Are All You Need to Train a Dense Passage Retrieval**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2206.10658.pdf) | TACL | 2022-06 | [Github](https:\u002F\u002Fgithub.com\u002FDevSinghSachan\u002Fart) |\n| [**Improving Passage Retrieval with Zero-Shot Question Generation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2204.07496.pdf) | EMNLP | 2022-04 | [Github](https:\u002F\u002Fgithub.com\u002FDevSinghSachan\u002Funsupervised-passage-reranking) | [Data](https:\u002F\u002Fgithub.com\u002FDevSinghSachan\u002Funsupervised-passage-reranking)|\n| [**InPars: Data Augmentation for Information Retrieval using Large Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2202.05144.pdf) | arXiv | 2022-02 | [Github](https:\u002F\u002Fgithub.com\u002Fzetaalphavector\u002Finpars)| [Data](https:\u002F\u002Fgithub.com\u002Fzetaalphavector\u002Finpars)|\n| [**Generating Datasets with Pretrained Language Models**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2104.07540) | EMNLP | 2021-04 | [Github](https:\u002F\u002Fgithub.com\u002Ftimoschick\u002Fdino) |\n\n\n#### Recommendation\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Can Small Language Models be Good Reasoners for Sequential Recommendation?**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2403.04260.pdf) | arXiv | 2024-03 | \n| [**Large Language Model Augmented Narrative Driven Recommendations**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.02250.pdf) | arXiv | 2023-06 | \n| [**Recommendation as Instruction Following: A Large Language Model Empowered Recommendation Approach**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.07001.pdf) | arXiv | 2023-05 |\n| [**ONCE: Boosting Content-based Recommendation with Both Open- and Closed-source Large Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.06566.pdf) | WSDM | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002FJyonn\u002FONCE) | [Data](https:\u002F\u002Fgithub.com\u002FJyonn\u002FONCE\u002Freleases\u002Ftag\u002FDataset)\n\n#### Text Generation Evaluation\n\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Prometheus: Inducing Fine-grained Evaluation Capability in Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.08491.pdf) | ICLR | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002FkaistAI\u002FPrometheus) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fkaist-ai\u002FFeedback-Collection)|\n| [**TIGERScore: Towards Building Explainable Metric for All Text Generation Tasks**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.00752.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Ftiger-ai-lab.github.io\u002FTIGERScore\u002F) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FTIGER-Lab\u002FMetricInstruct)|\n| [**Generative Judge for Evaluating Alignment**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.05470.pdf) | ICLR | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002FGAIR-NLP\u002Fauto-j) | [Data](https:\u002F\u002Fgithub.com\u002FGAIR-NLP\u002Fauto-j)\n| [**PandaLM: An Automatic Evaluation Benchmark for LLM Instruction Tuning Optimization**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.05087) | arXiv | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002FWeOpenML\u002FPandaLM)| [Data](https:\u002F\u002Fgithub.com\u002FWeOpenML\u002FPandaLM)|\n| [**INSTRUCTSCORE: Explainable Text Generation Evaluation with Fine-grained Feedback**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.14282.pdf) | EMNLP | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002Fxu1998hz\u002FInstructScore_SEScore3) | [Data](https:\u002F\u002Fgithub.com\u002Fxu1998hz\u002FInstructScore_SEScore3)\n\n\n#### Code\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Magicoder: Source Code Is All You Need**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2312.02120.pdf) | arXiv | 2023-12 | [Github](https:\u002F\u002Fgithub.com\u002Fise-uiuc\u002Fmagicoder) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fise-uiuc\u002FMagicoder-OSS-Instruct-75K) \u003Cbr> [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fise-uiuc\u002FMagicoder-Evol-Instruct-110K)|\n| [**WaveCoder: Widespread And Versatile Enhanced Instruction Tuning with Refined Data Generation**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2312.14187) | arXiv | 2023-12 |\n| [**Instruction Fusion: Advancing Prompt Evolution through Hybridization**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2312.15692.pdf) | arXiv | 2023-12 |\n| [**MFTCoder: Boosting Code LLMs with Multitask Fine-Tuning**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.02303.pdf) | arXiv | 2023-11 | [Github](https:\u002F\u002Fgithub.com\u002Fcodefuse-ai\u002FMFTCOder)| [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fcodefuse-ai\u002FEvol-instruction-66k) \u003Cbr> [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fcodefuse-ai\u002FCodeExercise-Python-27k)|\n| [**LLM-Assisted Code Cleaning For Training Accurate Code Generators**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.14904.pdf) | arXiv | 2023-11 \n| [**Personalised Distillation: Empowering Open-Sourced LLMs with Adaptive Learning for Code Generation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.18628.pdf) | EMNLP | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002FSalesforceAIResearch\u002FPersDistill)|\n| [**Code Llama: Open Foundation Models for Code**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.12950.pdf) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fcodellama)| \n| [**Distilled GPT for Source Code Summarization**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.14731.pdf) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Fapcl-research\u002Fjam-cgpt) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fapcl\u002FJam-CGPT\u002Ftree\u002Fmain)|\n| [**Textbooks Are All You Need: A Large-Scale Instructional Text Data Set for Language Models**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.11644.pdf) | arXiv | 2023-06 | \n| Code Alpaca: An Instruction-following LLaMA model for code generation | - | 2023-03 | [Github](https:\u002F\u002Fgithub.com\u002Fsahil280114\u002Fcodealpaca)| [Data](https:\u002F\u002Fgithub.com\u002Fsahil280114\u002Fcodealpaca?tab=readme-ov-file#data-release)|\n\n\n\n### Multi-Modality\n| Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Miko: Multimodal Intention Knowledge Distillation from Large Language Models for Social-Media Commonsense Discovery**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.18169.pdf) | arXiv | 2024-02 |\n| [**Localizing Visual Commonsense Knowledge in Large Language Models**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2312.04837) | NeurIPS | 2023-12 | [Github](https:\u002F\u002Fgithub.com\u002Fjamespark3922\u002Flocalized-skd) | [Data](https:\u002F\u002Fgithub.com\u002Fjamespark3922\u002Flocalized-skd?tab=readme-ov-file) |\n| [**To See is to Believe: Prompting GPT-4V for Better Visual Instruction Tuning**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.07574.pdf) | arXiv | 2023-11 | [Github](https:\u002F\u002Fgithub.com\u002FX2FD\u002FLVIS-INSTRUCT4V ) | [Data](https:\u002F\u002Fgithub.com\u002FX2FD\u002FLVIS-INSTRUCT4V) | \n| [**ILuvUI: Instruction-tuned LangUage-Vision modeling of UIs from Machine Conversations**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.04869.pdf) | arXiv | 2023-10 |\n| [**NExT-GPT: Any-to-Any Multimodal LLM**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2309.05519.pdf) | arXiv | 2023-09 | [Github](https:\u002F\u002Fgithub.com\u002FNExT-GPT\u002FNExT-GPT) | [Data](https:\u002F\u002Fgithub.com\u002FNExT-GPT\u002FNExT-GPT)|\n| [**StableLLaVA: Enhanced Visual Instruction Tuning with Synthesized Image-Dialogue Data**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.10253.pdf) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Ficoz69\u002FStableLLAVA?tab=readme-ov-file) | [Data](https:\u002F\u002Fgithub.com\u002Ficoz69\u002FStableLLAVA?tab=readme-ov-file)|\n| [**PointLLM: Empowering Large Language Models to Understand Point Clouds**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.16911.pdf) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002FOpenRobotLab\u002FPointLLM) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FRunsenXu\u002FPointLLM\u002Ftree\u002Fmain)|\n| [**SVIT: Scaling up Visual Instruction Tuning**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2307.04087.pdf) | arXiv | 2023-07 | [Github](https:\u002F\u002Fgithub.com\u002FBAAI-DCAI\u002FVisual-Instruction-Tuning) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FBAAI\u002FSVIT)|\n| [**ChatSpot: Bootstrapping Multimodal LLMs via Precise Referring Instruction Tuning**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2307.09474.pdf) | arXiv | 2023-07 | \n| [**Shikra: Unleashing Multimodal LLM's Referential Dialogue Magic**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.15195.pdf) | arXiv | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002Fshikras\u002Fshikra) | [Data](https:\u002F\u002Fgithub.com\u002Fshikras\u002Fshikra\u002Fblob\u002Fmain\u002Fdocs\u002Fdata.md)\n| [**Mitigating Hallucination in Large Multi-Modal Models via Robust Instruction Tuning**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.14565.pdf) | ICLR | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002FFuxiaoLiu\u002FLRV-Instruction) | [Data](https:\u002F\u002Fgithub.com\u002FFuxiaoLiu\u002FLRV-Instruction?tab=readme-ov-file) |\n| [**Valley: Video Assistant with Large Language model Enhanced abilitY**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.07207.pdf) | arXiv | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002FRupertLuo\u002FValley) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fluoruipu1\u002FValley-Instruct-73k)|\n| [**DetGPT: Detect What You Need via Reasoning**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.14167.pdf) | EMNLP | 2023-05 | [Github](https:\u002F\u002Fdetgpt.github.io) | \n| [**Visual Instruction Tuning: A Comprehensive Study of Visual Instruction Tuning for Large Language Models**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.08485) | NeurIPS | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fhaotian-liu\u002FLLaVA) | [Data](https:\u002F\u002Fgithub.com\u002Fhaotian-liu\u002FLLaVA\u002Fblob\u002Fmain\u002Fdocs\u002FData.md)|\n\n### Summary Table\n\n\u003Cdiv align=\"center\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FTebmer_Awesome-Knowledge-Distillation-of-LLMs_readme_c22927fea2b5.jpg\">\u003Cbr>\n  \u003Cem>Figure: A summary of representative works about skill distillation.\u003C\u002Fem>\n\u003C\u002Fdiv>\n\u003Cbr>\n\n\n## Verticalization Distillation\n### Law\n\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Fuzi**](https:\u002F\u002Fgithub.com\u002Firlab-sdu\u002Ffuzi.mingcha) | - | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Firlab-sdu\u002Ffuzi.mingcha)\n| [**ChatLaw: Open-Source Legal Large Language Model with Integrated External Knowledge Bases**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.16092.pdf) | arXiv | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002FPKU-YuanGroup\u002FChatLaw) |\n| [**Lawyer LLaMA Technical Report**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.15062.pdf) | arXiv | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002FAndrewZhe\u002Flawyer-llama) | [Data](https:\u002F\u002Fgithub.com\u002FAndrewZhe\u002Flawyer-llama)|\n\n\n### Medical & Healthcare\n\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**HuatuoGPT-II, One-stage Training for Medical Adaption of LLMs**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.09774.pdf) | arXiv | 2023-11 | [Github](https:\u002F\u002Fgithub.com\u002FFreedomIntelligence\u002FHuatuoGPT-II) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FFreedomIntelligence\u002FHuatuoGPT2_sft_instruct_GPT4_50K)|\n| [**AlpaCare: Instruction-tuned large language models for medical application**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.14558.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Fxzhang97666\u002Falpacare) | [Data](https:\u002F\u002Fgithub.com\u002FXZhang97666\u002FAlpaCare\u002Fblob\u002Fmaster\u002Fdata\u002FMedInstruct-52k.json)|\n| [**DISC-MedLLM: Bridging General Large Language Models and Real-World Medical Consultation**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.14346.pdf) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002FFudanDISC\u002FDISC-MedLLM\u002Ftree\u002Fmain) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FFlmc\u002FDISC-Med-SFT)|\n| [**HuatuoGPT: Taming Language Model to Be a Doctor**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.15075.pdf) | EMNLP | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002FFreedomIntelligence\u002FHuatuoGPT) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FFreedomIntelligence\u002FHuatuoGPT-sft-data-v1)|\n| [**DoctorGLM: Fine-tuning your Chinese doctor is not a herculean task**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2304.01097.pdf) | arXiv | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fxionghonglin\u002FDoctorGLM) | [Data](https:\u002F\u002Fgithub.com\u002FToyhom\u002FChinese-medical-dialogue-data)|\n| [**Huatuo: Tuning LLM with Chinese Medical Knowledge**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2304.06975.pdf) | arXiv | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002FSCIR-HI\u002FHuatuo-Llama-Med-Chinese) | \n| [**MedAlpaca: An Open-Source Collection of Medical Conversational AI Models and Training Data**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2304.08247.pdf) | arXiv | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fkbressem\u002FmedAlpaca) | [Data](https:\u002F\u002Fgithub.com\u002Fkbressem\u002FmedAlpaca)\n| [**PMC-LLaMA: Further Finetuning LLaMA on Medical Papers**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2304.14454.pdf) | arXiv | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fchaoyi-wu\u002FPMC-LLaMA) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Faxiong\u002Fpmc_llama_instructions)|\n| [**ChatDoctor: A Medical Chat Model Fine-Tuned on a Large Language Model Meta-AI (LLaMA) Using Medical Domain Knowledge**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2303.14070.pdf) | arXiv | 2023-03 | [Github](https:\u002F\u002Fgithub.com\u002FKent0n-Li\u002FChatDoctor) |\n\n\n### Finance\n\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**XuanYuan 2.0: A Large Chinese Financial Chat Model with Hundreds of Billions Parameters**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.12002.pdf) | CIKM | 2023-05 |  \n\n\n### Science\n\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**MuseGraph: Graph-oriented Instruction Tuning of Large Language Models for Generic Graph Mining**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2403.04780.pdf) | arXiv | 2024-03 | \n| [**SciGLM: Training Scientific Language Models with Self-Reflective Instruction Annotation and Tuning**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.07950.pdf) | arXiv | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002FTHUDM\u002FSciGLM) | \n| [**AstroLLaMA-Chat: Scaling AstroLLaMA with Conversational and Diverse Datasets**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.01916.pdf) | arXiv | 2024-01 \n| [**GeoGalactica: A Scientific Large Language Model in Geoscience**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.00434.pdf) | arXiv | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002Fgeobrain-ai\u002Fgeogalactica) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fdaven3\u002Fgeobench)\n| [**InstructMol: Multi-Modal Integration for Building a Versatile and Reliable Molecular Assistant in Drug Discovery**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.16208.pdf) | arXiv | 2023-11 | [Github](https:\u002F\u002Fgithub.com\u002FIDEA-XL\u002FInstructMol) | \n| [**LLM-Prop: Predicting Physical And Electronic Properties Of Crystalline Solids From Their Text Descriptions**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.14029.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Fvertaix\u002FLLM-Prop) | \n| [**OceanGPT: A Large Language Model for Ocean Science Tasks**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.02031.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Fzjunlp\u002FKnowLM) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fzjunlp\u002FOceanBench)|\n| [**MarineGPT: Unlocking Secrets of Ocean to the Public**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.13596.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Fhkust-vgd\u002FMarineGPT) \n| [**Mammoth: Building Math Generalist Models through Hybrid Instruction Tuning**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2309.05653) | arXiv | 2023-09 | [Github](https:\u002F\u002Ftiger-ai-lab.github.io\u002FMAmmoTH\u002F)| [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FTIGER-Lab\u002FMathInstruct)| \n| [**ToRA: A Tool-Integrated Reasoning Agent for Mathematical Problem Solving**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2309.17452.pdf) | ICLR | 2023-09 | [Github](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FToRA)\n| [**DARWIN Series: Domain Specific Large Language Models for Natural Science**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.13565.pdf) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002FMasterAI-EAM\u002FDarwin) | \n| [**Wizardmath: Empowering mathematical reasoning for large language models via reinforced evol-instruct**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2308.09583) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Fnlpxucan\u002FWizardLM)|\n| [**Biomedgpt: Open Multimodal Generative Pre-trained Transformer for Biomedicine**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.09442.pdf) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002FPharMolix\u002FOpenBioMed) | [Data](https:\u002F\u002Fgithub.com\u002FPharMolix\u002FOpenBioMed)|\n| [**Prot2Text: Multimodal Protein’s Function Generation with GNNs and Transformers**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2307.14367.pdf) | NeurIPS | 2023-07 | \n| [**xTrimoPGLM: Unified 100B-Scale Pre-trained Transformer for Deciphering the Language of Protein**](https:\u002F\u002Fwww.biorxiv.org\u002Fcontent\u002F10.1101\u002F2023.07.05.547496v1.full.pdf) | bioRxiv | 2023-07 |\n| [**GIMLET: A Unified Graph-Text Model for Instruction-Based Molecule Zero-Shot Learning**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.13089.pdf) | NeurIPS | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002Fzhao-ht\u002FGIMLET) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fhaitengzhao\u002Fmolecule_property_instruction)|\n| [**K2: A Foundation Language Model for Geoscience Knowledge Understanding and Utilization**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.05064.pdf) | arXiv | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002Fdavendw49\u002Fk2) \n| [**Visual Instruction Tuning: A Comprehensive Study of Visual Instruction Tuning for Large Language Models**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.08485) | NeurIPS | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fhaotian-liu\u002FLLaVA) | [Data](https:\u002F\u002Fgithub.com\u002Fhaotian-liu\u002FLLaVA\u002Fblob\u002Fmain\u002Fdocs\u002FData.md)|\n\n\n\n\n### Misc.\n\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**OWL: A Large Language Model for IT Operations**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2309.09298.pdf) | arXiv | 2023-09 | [Github](https:\u002F\u002Fgithub.com\u002FHC-Guo\u002FOwl) | [Data](https:\u002F\u002Fgithub.com\u002FHC-Guo\u002FOwl\u002Ftree\u002Fmain\u002FOWL-Instruct\u002Fdata)|\n| [**EduChat: A Large-Scale Language Model-based Chatbot System for Intelligent Education**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.02773.pdf) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002FECNU-ICALK\u002FEduChat) | [Data](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fecnu-icalk\u002Feduchat-sft-002-data-osm)    |\n\n\n## Encoder-based KD\n\n> **Note**: Our survey mainly focuses on generative LLMs, and thus the encoder-based KD is not included in the survey. However, we are also interested in this topic and continue to update the latest works in this area.\n\n|  Title  |   Venue  |   Date   | Code | Data|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Masked Latent Semantic Modeling: an Efficient Pre-training Alternative to Masked Language Modeling**](https:\u002F\u002Faclanthology.org\u002F2023.findings-acl.876.pdf) | Findings of ACL | 2023-08 |\n| [**Better Together: Jointly Using Masked Latent Semantic Modeling and Masked Language Modeling for Sample Efficient Pre-training**](https:\u002F\u002Faclanthology.org\u002F2023.conll-babylm.26.pdf) | CoNLL | 2023-08 |\n\n\n## TODO List\n- [ ] Add works about O1-like distillation. Stay tuned! \n\n\n## Citation\n\nIf you find this repository helpful, please consider citing the following paper:\n\n```\n@misc{xu2024survey,\n      title={A Survey on Knowledge Distillation of Large Language Models}, \n      author={Xiaohan Xu and Ming Li and Chongyang Tao and Tao Shen and Reynold Cheng and Jinyang Li and Can Xu and Dacheng Tao and Tianyi Zhou},\n      year={2024},\n      eprint={2402.13116},\n      archivePrefix={arXiv},\n      primaryClass={cs.CL}\n}\n``````\n\n\n\n## Star History\n\n[![Star History Chart](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FTebmer_Awesome-Knowledge-Distillation-of-LLMs_readme_b95f7dc1bd0e.png)](https:\u002F\u002Fstar-history.com\u002F#Tebmer\u002FAwesome-Knowledge-Distillation-of-LLMs&Date)\n","# LLM 论文中的知识蒸馏精选\n[![Awesome](https:\u002F\u002Fawesome.re\u002Fbadge.svg)]() \n![](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FPRs-Welcome-red) \n[![PDF](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FPDF-2402.13116-green)](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.13116)\n\n\n\n\u003C!-- 字号较大 -->\n\u003Ch2 align=\"center\">\n大型语言模型的知识蒸馏综述\n\u003C\u002Fh2> \n\n\n\u003Cp align=\"center\">\n  夏浩然\u003Csup>1\u003C\u002Fsup>&nbsp&nbsp\n    李明\u003Csup>2\u003C\u002Fsup>&nbsp&nbsp\n    陶重阳\u003Csup>3\u003C\u002Fsup>&nbsp&nbsp\n    沈涛\u003Csup>4\u003C\u002Fsup>&nbsp&nbsp\n    雷诺德·程\u003Csup>1\u003C\u002Fsup>&nbsp&nbsp\n    李金阳\u003Csup>1\u003C\u002Fsup>&nbsp&nbsp\n    徐灿\u003Csup>5\u003C\u002Fsup>&nbsp&nbsp\n    陶大成\u003Csup>6\u003C\u002Fsup>&nbsp&nbsp\n    周天一\u003Csup>2\u003C\u002Fsup>&nbsp&nbsp\n\u003C\u002Fp>  \n\n\n\u003Cp align=\"center\">\n\u003Csup>1\u003C\u002Fsup> 香港大学 &nbsp&nbsp\n\u003Csup>2\u003C\u002Fsup> 马里兰大学 &nbsp&nbsp\n\u003Csup>3\u003C\u002Fsup> 微软 &nbsp&nbsp\n\u003Csup>4\u003C\u002Fsup> 悉尼科技大学 &nbsp&nbsp\n\u003Csup>5\u003C\u002Fsup> 北京大学 &nbsp&nbsp\n\u003Csup>6\u003C\u002Fsup> 悉尼大学\n\u003C\u002Fp>\n\u003Cdiv align=\"center\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FTebmer_Awesome-Knowledge-Distillation-of-LLMs_readme_1ea1cfe79e30.png\" width=\"700\">\u003Cbr>\n\u003C\u002Fdiv>\n\u003Cbr>\n\n*这是一份与大型语言模型（LLM）知识蒸馏相关的论文合集。如果您希望利用 LLM 来提升小型模型的训练效果，或者通过自生成的知识实现自我改进，不妨看看这份合集。*\n\n***我们将每周更新此合集。欢迎给本仓库标星 ⭐️，以便及时了解最新动态。***\n\n> ❗️法律注意事项：需要注意的是，使用 LLM 的输出结果可能存在法律风险，例如 ChatGPT 的使用限制（[限制条款](https:\u002F\u002Fopenai.com\u002Fpolicies\u002Fbusiness-terms)）、Llama 的许可协议（[许可协议](https:\u002F\u002Fllama.meta.com\u002Fllama-downloads\u002F)）等。我们强烈建议用户遵守模型提供商规定的使用条款，比如不得开发竞争性产品等。\n\n## 💡 最新消息\n- **2024年2月20日**：📄 我们发布了一篇综述论文“**大型语言模型知识蒸馏综述**”（[链接](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.13116)）。欢迎大家阅读并引用。我们也非常期待您的反馈和建议。\n\n- 更新记录\n  - **2024年3月19日**：新增14篇论文。\n\n## 如何参与本合集的贡献\n\n如果您发现有任何遗漏的分类或论文，请随时**提交 issue 或 PR**，或发送邮件至 [shawnxxh@gmail.com](mailto:shawnxxh@gmail.com)、[minglii@umd.edu](mailto:minglii@umd.edu)、[hishentao@gmail.com](mailto:hishentao@gmail.com) 和 [chongyangtao@gmail.com](mailto:chongyangtao@gmail.com)。我们将持续更新本合集及综述内容。\n\n## 📝 引言\n**LLM 的知识蒸馏**：本综述深入探讨了大型语言模型（LLM）中的知识蒸馏（KD）技术，重点阐述了 KD 在将 GPT-4 等专有 LLM 的先进能力迁移到 LLaMA、Mistral 等开源模型中所起到的关键作用。同时，我们也研究了如何通过以开源 LLM 为教师模型，实现其自身的压缩与自我提升。\n\n**KD 与数据增强**：尤为重要的是，本综述剖析了数据增强（DA）与 KD 之间的复杂关系，指出 DA 已经成为 KD 框架下一种强大的范式，能够显著提升 LLM 的性能。通过利用 DA 生成富含上下文信息、针对特定技能的训练数据，KD 能够突破传统框架的限制，使开源模型在上下文理解能力、伦理对齐以及深层语义洞察力等方面逼近其专有对手的水平。\n\n**分类体系**：我们的分析围绕三大基础支柱展开：**[算法](#kd-algorithms)**、**[技能蒸馏](#skill-distillation)** 和 **[垂直化蒸馏](#verticalization-distillation)**——全面审视 KD 的机制、特定认知能力的提升及其在不同领域的实际应用。\n\n\n**[KD 算法](#kd-algorithms)**：对于 KD 算法，我们将其分为两个主要步骤：“知识提取”，专注于从教师 LLM 中提取知识；以及“蒸馏算法”，核心在于将这些知识注入学生模型中。\n\n\u003Cdiv align=\"center\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FTebmer_Awesome-Knowledge-Distillation-of-LLMs_readme_7c427a1aa84d.png\" width=\"600\">\u003Cbr>\n  \u003Cem>图示：来自教师 LLM 的不同知识提取方法。\u003C\u002Fem>\n\u003C\u002Fdiv>\n\u003Cbr>\n\n\n**[技能蒸馏](#skill-distillation)**：我们深入探讨了特定认知能力的提升，如上下文理解、对齐性、代理能力、NLP 任务专业化以及多模态处理等。\n\n**[垂直化蒸馏](#verticalization-distillation)**：我们探索了 KD 在法律、医疗健康、金融、科学及其他领域中的实际应用意义。\n\n\n> 需要注意的是，无论是 [技能蒸馏](#skill-distillation) 还是 [垂直化蒸馏](#verticalization-distillation)，都依赖于 [KD 算法](#kd-algorithms) 中的知识提取和蒸馏算法来实现其目标。因此，两者之间存在一定的交叉。不过，这也为相关论文提供了不同的视角。\n\n\n## 为什么需要 LLM 的知识蒸馏？\n在 LLM 时代，LLM 的知识蒸馏具有以下关键作用：\n\n\u003Cdiv align=\"center\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FTebmer_Awesome-Knowledge-Distillation-of-LLMs_readme_a494f9aaacda.png\" width=\"400\">\u003Cbr>\n\u003C\u002Fdiv>\n\u003Cbr>\n\n| 作用  |   描述  |   趋势   |\n|:--------|:--------:|:--------:|\n| **① 推动小型模型发展** | 将专有 LLM 的先进能力迁移到小型模型中，例如开源 LLM 或其他小型模型。 | 最常见 |\n| **② 模型压缩** | 压缩 LLM，使其更加高效实用。 | 随着开源 LLM 的兴起而愈发流行 |\n| **③ 自我改进** | 利用开源 LLM 自身的知识进行优化，即所谓的“自我蒸馏”。 | 是当前使开源 LLM 更具竞争力的新趋势 |\n\n## 📒 目录\n- [KD算法](#kd-algorithms)\n    - [知识提炼](#knowledge-elicitation)\n        - [标注](#labeling)\n        - [扩展](#expansion)\n        - [精选](#curation)\n        - [特征](#feature)\n        - [反馈](#feedback)\n        - [自我知识](#self-knowledge)\n    - [蒸馏算法](#distillation-algorithms)\n        - [监督微调](#supervised-fine-tuning)\n        - [差异与相似性](#divergence-and-similarity)\n        - [强化学习](#reinforcement-learning)\n        - [排序优化](#rank-optimization)\n- [技能蒸馏](#skill-distillation)\n    - [上下文遵循](#context-following)\n        - [指令遵循](#instruction-following)\n        - [多轮对话](#multi-turn-dialogue)\n        - [RAG能力](#rag-capability)\n    - [对齐](#alignment)\n        - [思维模式](#thinking-pattern)\n        - [偏好](#preference)\n        - [价值观](#value)\n    - [智能体](#agent)\n        - [工具使用](#tool-using)\n        - [规划](#planning)\n    - [NLP任务专业化](#nlp-task-specialization)\n        - [NLU](#nlu)\n        - [NLG](#nlg)\n        - [信息检索](#information-retrieval)\n        - [推荐](#recommendation)\n        - [文本生成评估](#text-generation-evaluation)\n        - [代码](#code)\n    - [多模态](#multi-modality)\n    - [汇总表](#summary-table)\n- [垂直领域蒸馏](#verticalization-distillation)\n    - [法律](#law)\n    - [医疗健康](#medical--healthcare)\n    - [金融](#finance)\n    - [科学](#science)\n    - [其他](#misc)\n\n- [基于编码器的KD](#encoder-based-kd)\n\n- [引用](#citation)\n\n\n\n## KD算法\n### 知识提炼\n#### 标注\n| 标题 | 会议\u002F期刊 | 发表日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**面向知识增强零样本问答的证据聚焦事实摘要**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2403.02966.pdf) | arXiv | 2024-03 |\n| [**通过思维链推理对齐大小语言模型**](https:\u002F\u002Faclanthology.org\u002F2024.eacl-long.109.pdf) | EACL | 2024-03 | [Github](https:\u002F\u002Fgithub.com\u002Flranaldii\u002FAligning_LLMs) |\n| [**分而治之？你应该蒸馏大型语言模型的哪一部分？**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.15000.pdf) | arXiv | 2024-02 |\n| [**Miko：从大型语言模型中进行多模态意图知识蒸馏，用于社交媒体常识发现**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.18169.pdf) | arXiv | 2024-02 |\n| [**KnowTuning：面向大型语言模型的知识感知微调**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.11176.pdf) | arXiv | 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002Fyouganglyu\u002FKnowTuning) |\n| [**TinyLLM：从多个大型语言模型中学习小型学生模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.04616.pdf) | arXiv | 2024-02 |\n| [**混合蒸馏有助于小型语言模型更好地进行推理**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2312.10730.pdf) | arXiv | 2023-12 |\n| [**通过多奖励蒸馏定制自我解释器**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.02805.pdf) | arXiv | 2023-11 | [Github](https:\u002F\u002Finklab.usc.edu\u002FMaRio\u002F) | [数据](https:\u002F\u002Finklab.usc.edu\u002FMaRio\u002F)|\n| [**Orca 2：教导小型语言模型如何推理**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.11045.pdf) | arXiv | 2023-11 |\n| [**Mammoth：通过混合指令微调构建数学通才模型**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2309.05653) | arXiv | 2023-09 | [Github](https:\u002F\u002Ftiger-ai-lab.github.io\u002FMAmmoTH\u002F) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FTIGER-Lab\u002FMathInstruct)|\n| [**PandaLM：用于LLM指令微调优化的自动评估基准**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.05087) | arXiv | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002FWeOpenML\u002FPandaLM) | [数据](https:\u002F\u002Fgithub.com\u002FWeOpenML\u002FPandaLM)|\n| [**符号化思维链蒸馏：小型模型也能“步步思考”**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.14050) | ACL | 2023-06 |\n| [**Orca：从GPT-4的复杂解释轨迹中逐步学习**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.02707) | arXiv | 2023-06 |\n| [**逐级蒸馏！用更少的训练数据和更小的模型规模超越大型语言模型**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.02301) | ACL | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002Fgoogle-research\u002Fdistilling-step-by-step) | [数据](https:\u002F\u002Fgithub.com\u002Fgoogle-research\u002Fdistilling-step-by-step)|\n| [**不可能的蒸馏：从低质量模型到高质量的摘要和改写数据集及模型**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.16635) | arXiv | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002Fjaehunjung1\u002Fimpossible-distillation)|\n| [**Baize：一款基于自对话数据进行参数高效微调的开源聊天模型**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.01196) | EMNLP | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fproject-baize\u002Fbaize-chatbot) | [数据](https:\u002F\u002Fgithub.com\u002Fproject-baize\u002Fbaize-chatbot\u002Ftree\u002Fmain\u002Fdata)|\n| [**ChatGPT在文本标注任务中表现优于众包工作者**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2303.15056.pdf) | arXiv | 2023-03 |\n| [**Annollm：让大型语言模型成为更好的众包标注者**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2303.16854) | arXiv | 2023-03 |\n| [**GPT-4All：利用从GPT-3.5-Turbo大规模蒸馏的数据训练助理型聊天机器人**](https:\u002F\u002Fs3.amazonaws.com\u002Fstatic.nomic.ai\u002Fgpt4all\u002F2023_GPT4All_Technical_Report.pdf) | - | 2023-03 | [Github](https:\u002F\u002Fgithub.com\u002Fnomic-ai\u002Fgpt4all)|\n| [**将小型语言模型专门化为多步推理**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2301.12726) | arXiv | 2023-01 |\n| [**GPT-3是优秀的数据标注者吗？**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2212.10450.pdf) | ACL | 2022-12 | [Github](https:\u002F\u002Fgithub.com\u002FDAMO-NLP-SG\u002FLLM-Data-Annotator)|\n| [**大型语言模型是推理教师**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2212.10071) | ACL | 2022-12 | [Github](https:\u002F\u002Fgithub.com\u002Fitsnamgyu\u002Freasoning-teacher) | [数据](https:\u002F\u002Fgithub.com\u002Fitsnamgyu\u002Freasoning-teacher)|\n| [**教导小型语言模型进行推理**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2212.08410) | ACL | 2022-12 |\n| [**来自大型语言模型的解释使小型推理者更优秀**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2210.06726) | arXiv | 2022-10 |\n| [**想降低标注成本吗？GPT-3可以帮忙**](https:\u002F\u002Faclanthology.org\u002F2021.findings-emnlp.354) | EMNLP成果 | 2021-08 |\n\n#### 扩展\n|  标题  |   场所  |   日期   | 代码 | 数据|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**指令融合：通过杂交推进提示进化**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2312.15692.pdf) | arXiv | 2023-12 | \n| [**中文大型语言模型指令微调的实证研究**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.07328.pdf) | EMNLP | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002FPhoebusSi\u002FAlpaca-CoT)| [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FQingyiSi\u002FAlpaca-CoT)|\n| [**PromptMix：用于大型语言模型蒸馏的类别边界增强方法**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.14192.pdf) | EMNLP | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002FServiceNow\u002FPromptMix-EMNLP-2023) |\n| [**Wizardmath：通过强化版Evol-Instruct赋能大型语言模型的数学推理能力**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2308.09583) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Fnlpxucan\u002FWizardLM)|\n| [**Code Llama：面向代码的开源基础模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.12950.pdf) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fcodellama)| \n| [**WizardCoder：用Evol-Instruct赋能代码大型语言模型**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.08568) | ICLR | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002Fnlpxucan\u002FWizardLM) |\n| [**从零开始、在极少人工监督下进行原则驱动的语言模型自对齐**](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2023\u002Fhash\u002F0764db1151b936aca59249e2c1386101-Abstract-Conference.html) | NeurIPS | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002FIBM\u002FDromedary) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fzhiqings\u002Fdromedary-65b-verbose-clone-v0)|\n| [**目标数据生成：发现并修复模型弱点**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.17804.pdf) | ACL | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002FZexueHe\u002FTDG)| \n| [**Wizardlm：赋能大型语言模型遵循复杂指令**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.12244) | ICLR | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fnlpxucan\u002FWizardLM)| [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FWizardLM\u002FWizardLM_evol_instruct_70k) \u003Cbr> [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FWizardLM\u002FWizardLM_evol_instruct_V2_196k)|\n| [**LaMini-LM：由大规模指令蒸馏而来的多样化模型群**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.14402) | arXiv | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fmbzuai-nlp\u002FLaMini-LM?tab=readme-ov-file) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FMBZUAI\u002FLaMini-instruction)|\n| [**Alpaca：使语言模型与人类偏好对齐**](https:\u002F\u002Fcrfm.stanford.edu\u002F2023\u002F03\u002F13\u002Falpaca.html) | - | 2023-03 | [Github](https:\u002F\u002Fgithub.com\u002Ftatsu-lab\u002Fstanford_alpaca)| [数据](https:\u002F\u002Fgithub.com\u002Ftatsu-lab\u002Fstanford_alpaca\u002Fblob\u002Fmain\u002Falpaca_data.json)|\n| Code Alpaca：用于代码生成的指令遵循LLaMA模型 | - | 2023-03 | [Github](https:\u002F\u002Fgithub.com\u002Fsahil280114\u002Fcodealpaca)| [数据](https:\u002F\u002Fgithub.com\u002Fsahil280114\u002Fcodealpaca?tab=readme-ov-file#data-release)|\n| [**探索指令数据规模对大型语言模型的影响：基于真实世界用例的实证研究**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2303.14742) | arXiv | 2023-03 | [Github](https:\u002F\u002Fgithub.com\u002FLianjiaTech\u002FBELLE) | [数据](https:\u002F\u002Fhuggingface.co\u002FBelleGroup)| \n| [**AugGPT：利用ChatGPT进行文本数据增强**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2302.13007.pdf) | arXiv | 2023-02 | [Github](https:\u002F\u002Fgithub.com\u002Fyhydhx\u002FAugGPT)| \n| [**Self-instruct：用自我生成的指令使语言模型对齐**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2212.10560) | ACL | 2022-12 | [Github](https:\u002F\u002Fgithub.com\u002Fyizhongw\u002Fself-instruct)| [数据](https:\u002F\u002Fgithub.com\u002Fyizhongw\u002Fself-instruct) |\n| [**符号知识蒸馏：从通用语言模型到常识模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2110.07178.pdf) | NAACL | 2021-10 | [Github](https:\u002F\u002Fgithub.com\u002Fpeterwestai2\u002Fsymbolic-knowledge-distillation) | [数据](https:\u002F\u002Fgithub.com\u002Fpeterwestai2\u002Fsymbolic-knowledge-distillation)|\n\n\n#### 精选\n|  标题  |   场所  |   日期   | 代码 | 数据|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**几乎从零开始的合成数据：面向语言模型的广义指令微调**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.13064) | arXiv | 2024-02 |\n| [**Phi-2：小型语言模型的惊人力量**](https:\u002F\u002Fwww.microsoft.com\u002Fen-us\u002Fresearch\u002Fblog\u002Fphi-2-the-surprising-power-of-small-language-models\u002F) | - | 2023-12 |\n| [**WaveCoder：广泛而多功能的增强型指令微调，结合精细化数据生成**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2312.14187) | arXiv | 2023-12 |\n| [**Magicoder：源代码就是你所需要的全部**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2312.02120.pdf) | arXiv | 2023-12 | [Github](https:\u002F\u002Fgithub.com\u002Fise-uiuc\u002Fmagicoder) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fise-uiuc\u002FMagicoder-OSS-Instruct-75K) \u003Cbr> [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fise-uiuc\u002FMagicoder-Evol-Instruct-110K)|\n| [**MFTCoder：通过多任务微调提升代码LLM**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.02303.pdf) | arXiv | 2023-11 | [Github](https:\u002F\u002Fgithub.com\u002Fcodefuse-ai\u002FMFTCOder)| [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fcodefuse-ai\u002FEvol-instruction-66k) \u003Cbr> [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fcodefuse-ai\u002FCodeExercise-Python-27k)|\n| [**教科书就是你所需要的全部II：Phi-1.5技术报告**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2309.05463.pdf) | arXiv | 2023-09 |\n| [**利用ChatGPT进行神经机器翻译数据生成和增强**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2307.05779.pdf) | arXiv | 2023-07 |\n| [**教科书就是你所需要的全部：面向语言模型的大规模指令文本数据集**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.11644.pdf) | arXiv | 2023-06 | \n| [**通过扩展高质量指令对话来增强聊天语言模型**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.14233) | arXiv | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002Fthunlp\u002FUltraChat) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fstingning\u002Fultrachat)|\n| [**AugTriever：通过可扩展的数据增强实现无监督密集检索**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2212.08841.pdf) | arXiv | 2022-12 | [Github](https:\u002F\u002Fgithub.com\u002Fsalesforce\u002FAugTriever)\n| [**SunGen：用于高效零样本学习的自引导无噪声数据生成**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2205.12679.pdf) | ICLR | 2022-05 | [Github](https:\u002F\u002Fgithub.com\u002FSumilerGAO\u002FSunGen) \n| [**ZeroGen：通过数据集生成实现高效的零样本学习**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2202.07922.pdf) | EMNLP | 2022-02 | [Github](https:\u002F\u002Fgithub.com\u002Fjiacheng-ye\u002FZeroGen)|\n| [**InPars：利用大型语言模型进行信息检索的数据增强**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2202.05144.pdf) | arXiv | 2022-02 | [Github](https:\u002F\u002Fgithub.com\u002Fzetaalphavector\u002Finpars)| [数据](https:\u002F\u002Fgithub.com\u002Fzetaalphavector\u002Finpars)|\n| [**迈向零标签语言学习**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2109.09193.pdf) | arXiv | 2021-09 | \n\n\n#### 特色\n\n| 标题 | 场所 | 日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**PromptKD：通过提示微调为生成式语言模型提炼学生友好的知识**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.12842) | EMNLP Findings| 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002Fgmkim-ai\u002FPromptKD) |  [数据](https:\u002F\u002Fgithub.com\u002Fgmkim-ai\u002FPromptKD\u002Ftree\u002Fmain\u002Fdata_utils)\n| [**重新思考大型语言模型知识蒸馏中的KL散度**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2404.02657.pdf) | arXiv | 2024-04 | \n| [**用于推测解码的草稿模型与聊天微调LLM的直接对齐**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2403.00858.pdf) | arXiv | 2024-03 |\n| [**DB-LLM：高效LLM的精确双二值化**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.11960.pdf) | arXiv | 2024-02 | \n| [**BitDistiller：通过自蒸馏释放低于4比特LLM的潜力**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.10631.pdf) | arXiv | 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002FDD-DuDa\u002FBitDistiller) |\n| [**DISTILLM：迈向大型语言模型的简化蒸馏**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.03898.pdf) | arXiv | 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002Fjongwooko\u002Fdistillm) |\n| [**迈向跨分词器的知识蒸馏：LLM的通用logit蒸馏损失**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.12030.pdf) | arXiv | 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002FNicolas-BZRD\u002Fllm-recipes) | [数据](https:\u002F\u002Fhuggingface.co\u002FNicolas-BZRD)|\n| [**重访自回归语言模型的知识蒸馏**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.11890.pdf) | arXiv | 2024-02 |\n| [**大型语言模型的知识融合**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2401.10491) | ICLR | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002Ffanqiwan\u002FFuseLLM ) \n| [**通过双向对齐提升上下文学习能力**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2312.17055.pdf) | arXiv | 2023-12 \n| [**关于有限域上知识迁移的基本极限**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.07838) | NeurIPS | 2023-10 |\n| [**Baby Llama：在小型数据集上训练的教师集成进行知识蒸馏，且无性能损失**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.02019.pdf) | CoNLL | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Ftiminar\u002FBabyLlama) | [数据](https:\u002F\u002Fgithub.com\u002Ftiminar\u002FBabyLlama )|\n| [**序列级知识蒸馏中的f散度最小化**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2307.15190.pdf) | ACL | 2023-07 | [Github](https:\u002F\u002Fgithub.com\u002FMANGA-UOFA\u002Ffdistill) | [数据](https:\u002F\u002Fdrive.google.com\u002Ffile\u002Fd\u002F1V7bPndyoTQxcJ6m1BoXAw7-ub-jv8Wh1\u002Fview?usp=sharing)|\n| [**MiniLLM：大型语言模型的知识蒸馏**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.08543.pdf) | ICLR | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FLMOps\u002Ftree\u002Fmain\u002Fminillm) | [数据](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FLMOps\u002Ftree\u002Fmain\u002Fminillm) |\n| [**语言模型的策略内蒸馏：从自我生成的错误中学习**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.13649.pdf) | ICLR | 2023-06 |\n| [**LLM-QAT：大型语言模型的无数据量化感知训练**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.17888.pdf) | arXiv | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002FLLM-QAT)| [数据](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002FLLM-QAT)|\n| [**少即是多：面向任务的逐层蒸馏用于语言模型压缩**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2210.01351.pdf) | PMLR | 2022-10 | [Github](https:\u002F\u002Fgithub.com\u002Fcliang1453\u002Ftask-aware-distillation)\n\n#### 反馈\n| 标题 | 会议\u002F平台 | 日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**PromptKD：通过提示微调为生成式语言模型提炼学生友好的知识**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.12842) | EMNLP Findings| 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002Fgmkim-ai\u002FPromptKD) |  [数据](https:\u002F\u002Fgithub.com\u002Fgmkim-ai\u002FPromptKD\u002Ftree\u002Fmain\u002Fdata_utils)\n| [**面向知识增强型零样本问答的证据聚焦事实摘要**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2403.02966.pdf) | arXiv | 2024-03 |\n| [**利用大型语言模型和主动学习演进知识蒸馏**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2403.06414v1.pdf) | arXiv | 2024-03 |\n| [**基于在线AI反馈的直接语言模型对齐**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.04792.pdf) | arXiv | 2024-02 | \n| [**DISTILLM：迈向大型语言模型的流线型知识蒸馏**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.03898.pdf) | arXiv | 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002Fjongwooko\u002Fdistillm) |\n| [**通过带有最小编辑约束的细粒度强化学习改进大型语言模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.06081.pdf) | arXiv | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002FRUCAIBox\u002FRLMEC)\n| [**超越模仿：利用细粒度质量信号进行对齐**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.04072.pdf) | arXiv | 2023-11 | \n| [**语言模型能教导较弱的智能体吗？教师解释通过个性化提升学生表现**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.02421) | ICLR | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002FswarnaHub\u002FExplanationIntervention) |\n| [**Motif：来自人工智能反馈的内在动机**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.00166) | ICLR | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fmotif) |\n| [**Ultrafeedback：用高质量反馈提升语言模型性能**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.01377.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Fthunlp\u002FUltraFeedback) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fopenbmb\u002FUltraFeedback)|\n| [**个性化知识蒸馏：以自适应学习赋能开源LLM进行代码生成**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.18628.pdf) | EMNLP | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002FSalesforceAIResearch\u002FPersDistill)|\n| [**CycleAlign：从黑盒LLM到白盒模型的迭代知识蒸馏，以实现更好的人类对齐**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.16271) | arXiv | 2023-10 \n| [**Rlaif：结合AI反馈扩展基于人类反馈的强化学习**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2309.00267.pdf) | arXiv | 2023-09 |\n| [**Wizardmath：通过强化版evol-instruct赋能大型语言模型的数学推理能力**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2308.09583) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Fnlpxucan\u002FWizardLM)|\n| [**语言模型的策略内知识蒸馏：从自我生成的错误中学习**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.13649.pdf) | ICLR | 2023-06 |\n| [**MiniLLM：大型语言模型的知识蒸馏**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.08543.pdf) | ICLR | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FLMOps\u002Ftree\u002Fmain\u002Fminillm) | [数据](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FLMOps\u002Ftree\u002Fmain\u002Fminillm) |\n| [**用于机器人技能合成的语言到奖励机制**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.08647.pdf) | arXiv | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002Fgoogle-deepmind\u002Flanguage_to_reward_2023)|\n| [**Lion：闭源大型语言模型的对抗性知识蒸馏**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.12870.pdf) | EMNLP | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002FYJiangcm\u002FLion)|\n| [**SelFee：由自我反馈生成驱动的迭代自我修正LLM**](https:\u002F\u002Fkaistai.github.io\u002FSelFee\u002F) | arXiv | 2023-05 \n| [**LaMini-LM：基于大规模指令蒸馏出的多样化模型群**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.14402) | arXiv | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fmbzuai-nlp\u002FLaMini-LM?tab=readme-ov-file) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FMBZUAI\u002FLaMini-instruction)|\n| [**使用语言模型进行奖励设计**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2303.00001.pdf) | ICLR | 2023-03 | [Github](https:\u002F\u002Fgithub.com\u002Fminaek\u002Freward_design_with_llms)|\n| [**宪章式AI：从AI反馈中确保无害性**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2212.08073.pdf) | arXiv | 2022-12 |\n\n#### 自我认知\n| 标题 | 会议\u002F平台 | 日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**V-STaR：为自学习推理者训练验证器**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.06457.pdf) | arXiv | 2024-02 \n| [**自我奖励语言模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.10020.pdf) | arXiv | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002Flucidrains\u002Fself-rewarding-lm-pytorch?tab=readme-ov-file    )|\n| [**自对弈微调将弱语言模型转化为强语言模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.01335.pdf) | arXiv | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002Fuclaml\u002FSPIN) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FUCLA-AGI\u002FSPIN_iter0)|\n| [**Kun：基于指令反向翻译的中文自我对齐答案润色**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.06477.pdf) | arXiv | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002FZheng0428\u002FCOIG-Kun) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fm-a-p\u002FCOIG-Kun)|\n| [**APT：用于高效训练和推理的预训练语言模型自适应剪枝与调优**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.12200.pdf) | arXiv | 2024-01 |\n| [**GRATH：面向大型语言模型的渐进式自我校正**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2401.12292) | arXiv | 2024-01 | \n| [**超越人类数据：扩展语言模型解决问题的自训练规模**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2312.06585.pdf) | arXiv | 2023-12 \n| [**自我认知引导的检索增强技术应用于大型语言模型**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.05002) | EMNLP Findings | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002FTHUNLP-MT\u002FSKR) |\n| [**RAIN：您的语言模型无需微调即可实现自我对齐**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2309.07124.pdf) | arXiv | 2023-09 | [Github](https:\u002F\u002Fgithub.com\u002FSafeAILab\u002FRAIN) \n| [**用于语言建模的强化自我训练（ReST）**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.08998.pdf) | arXiv | 2023-08 \n| [**Humback：基于指令反向翻译的自我对齐**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.06259.pdf) | ICLR | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002FSpico197\u002FHumback)\n| [**通过对比蒸馏的强化学习实现大型语言模型的自我对齐**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2307.12950.pdf) | ICLR | 2023-07 | [Github](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Frlcd)|\n| [**通过人类反馈的强化学习提升大型语言模型性能**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.14050.pdf) | EMNLP | 2023-06 | \n| [**从零开始、以原则为导向，在极少人工监督下实现语言模型自我对齐**](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2023\u002Fhash\u002F0764db1151b936aca59249e2c1386101-Abstract-Conference.html) | NeurIPS | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002FIBM\u002FDromedary) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fzhiqings\u002Fdromedary-65b-verbose-clone-v0)|\n| [**不可能蒸馏：从低质量模型到高质量摘要与改写数据集及模型**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.16635) | arXiv | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002Fjaehunjung1\u002Fimpossible-distillation)|\n| [**通过强化学习反思实现语言模型自我改进**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.14483.pdf) | arXiv | 2023-05 \n| [**Baize：基于自我对话数据进行参数高效调优的开源聊天模型**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.01196) | EMNLP | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fproject-baize\u002Fbaize-chatbot)| [数据](https:\u002F\u002Fgithub.com\u002Fproject-baize\u002Fbaize-chatbot\u002Ftree\u002Fmain\u002Fdata)|\n| [**Self-instruct：利用自动生成的指令对齐语言模型**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2212.10560) | ACL | 2022-12 | [Github](https:\u002F\u002Fgithub.com\u002Fyizhongw\u002Fself-instruct)| [数据](https:\u002F\u002Fgithub.com\u002Fyizhongw\u002Fself-instruct) |\n| [**大型语言模型可以自我改进**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2210.11610.pdf) | EMNLP | 2022-10\n| [**STaR：用推理来启动推理能力**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2203.14465.pdf) | NeurIPS | 2022-03 | [Github](https:\u002F\u002Fgithub.com\u002Fezelikman\u002FSTaR)|\n\n\n\n\n\n\n\n### 蒸馏算法\n#### 监督微调\n\n> 由于应用监督微调的研究成果众多，此处仅列出最具代表性的几项。\n\n| 标题 | 会议\u002F平台 | 日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**面向知识增强零样本问答的证据导向事实摘要**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2403.02966.pdf) | arXiv | 2024-03 |\n| [**通过思维链推理对齐大小语言模型**](https:\u002F\u002Faclanthology.org\u002F2024.eacl-long.109.pdf) | EACL | 2024-03 | [Github](https:\u002F\u002Fgithub.com\u002Flranaldii\u002FAligning_LLMs) |\n| [**分而治之？你的大语言模型应该蒸馏哪一部分？**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.15000.pdf) | arXiv | 2024-02 |\n| [**从零开始（几乎）生成合成数据：面向语言模型的通用指令微调**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.13064) | arXiv | 2024-02 |\n| [**Orca 2：教小型语言模型如何进行推理**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.11045.pdf) | arXiv | 2023-11 |\n| [**TinyLLM：从多个大型语言模型中学习一个小型学生模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.04616.pdf) | arXiv | 2024-02 | \n| [**Wizardmath：通过强化进化指令微调赋能大型语言模型的数学推理能力**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2308.09583) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Fnlpxucan\u002FWizardLM)|\n| [**Orca：基于GPT-4复杂解释轨迹的渐进式学习**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.02707) | arXiv | 2023-06 | \n| [**LaMini-LM：由大规模指令蒸馏得到的多样化模型集合**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.14402) | arXiv | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fmbzuai-nlp\u002FLaMini-LM?tab=readme-ov-file) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FMBZUAI\u002FLaMini-instruction)|\n| [**Wizardlm：赋能大型语言模型遵循复杂指令**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.12244) | ICLR | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fnlpxucan\u002FWizardLM)| [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FWizardLM\u002FWizardLM_evol_instruct_70k) \u003Cbr> [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FWizardLM\u002FWizardLM_evol_instruct_V2_196k)|\n| [**Baize：基于自对话数据进行参数高效微调的开源聊天模型**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.01196) | EMNLP | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fproject-baize\u002Fbaize-chatbot)| [数据](https:\u002F\u002Fgithub.com\u002Fproject-baize\u002Fbaize-chatbot\u002Ftree\u002Fmain\u002Fdata)|\n| [**Alpaca：使语言模型与人类偏好对齐**](https:\u002F\u002Fcrfm.stanford.edu\u002F2023\u002F03\u002F13\u002Falpaca.html) | - | 2023-03 | [Github](https:\u002F\u002Fgithub.com\u002Ftatsu-lab\u002Fstanford_alpaca)| [数据](https:\u002F\u002Fgithub.com\u002Ftatsu-lab\u002Fstanford_alpaca\u002Fblob\u002Fmain\u002Falpaca_data.json)|\n| [**Vicuna：一款以90%* ChatGPT质量惊艳GPT-4的开源聊天机器人**](https:\u002F\u002Flmsys.org\u002Fblog\u002F2023-03-30-vicuna\u002F) | - | 2023-03 | [Github]( https:\u002F\u002Fgithub.com\u002Flm-sys\u002FFastChat) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fanon8231489123\u002FShareGPT_Vicuna_unfiltered)|\n| [**Self-instruct：利用自我生成的指令对齐语言模型**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2212.10560) | ACL | 2022-12 | [Github](https:\u002F\u002Fgithub.com\u002Fyizhongw\u002Fself-instruct)| [数据](https:\u002F\u002Fgithub.com\u002Fyizhongw\u002Fself-instruct) |\n| [**大型语言模型可以自我改进**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2210.11610.pdf) | EMNLP | 2022-10\n| [**STaR：用推理来启动推理**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2203.14465.pdf) | NeurIPS | 2022-03 | [Github](https:\u002F\u002Fgithub.com\u002Fezelikman\u002FSTaR)|\n\n\n#### 差异与相似性\n\n| 标题 | 会议\u002F平台 | 日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**PromptKD：通过提示微调为生成式语言模型提炼学生友好的知识**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.12842) | EMNLP Findings| 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002Fgmkim-ai\u002FPromptKD) |  [数据](https:\u002F\u002Fgithub.com\u002Fgmkim-ai\u002FPromptKD\u002Ftree\u002Fmain\u002Fdata_utils)|\n| [**重新思考大型语言模型知识蒸馏中的KL散度**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2404.02657.pdf) | arXiv | 2024-04 | \n| [**面向任务无关的BERT压缩的权重继承型蒸馏**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.09098.pdf) | NAACL | 2024-03 | [Github](https:\u002F\u002Fgithub.com\u002Fwutaiqiang\u002FWID-NAACL2024) |\n| [**BitDistiller：通过自蒸馏释放低于4比特LLM的潜力**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.10631.pdf) | arXiv | 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002FDD-DuDa\u002FBitDistiller) |\n| [**DISTILLM：迈向大型语言模型的简化蒸馏**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.03898.pdf) | arXiv | 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002Fjongwooko\u002Fdistillm) |\n| [**迈向跨分词器蒸馏：LLM的通用logit蒸馏损失**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.12030.pdf) | arXiv | 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002FNicolas-BZRD\u002Fllm-recipes) | [数据](https:\u002F\u002Fhuggingface.co\u002FNicolas-BZRD)|\n| [**重访自回归语言模型的知识蒸馏**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.11890.pdf) | arXiv | 2024-02 |\n| [**闭源语言模型的知识蒸馏**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.07013.pdf) | arXiv | 2024-01 | \n| [**大型语言模型的知识融合**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2401.10491) | ICLR | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002Ffanqiwan\u002FFuseLLM ) |\n| [**通过双向对齐改进上下文学习**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2312.17055.pdf) | arXiv | 2023-12 |\n| [**关于有限域上知识迁移的基本极限**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.07838) | NeurIPS | 2023-10 |\n| [**Baby Llama：在小型数据集上训练的教师集成进行知识蒸馏，且无性能损失**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.02019.pdf) | CoNLL | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Ftiminar\u002FBabyLlama) | [数据](https:\u002F\u002Fgithub.com\u002Ftiminar\u002FBabyLlama )|\n| [**序列级知识蒸馏中的f散度最小化**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2307.15190.pdf) | ACL | 2023-07 | [Github](https:\u002F\u002Fgithub.com\u002FMANGA-UOFA\u002Ffdistill) | [数据](https:\u002F\u002Fdrive.google.com\u002Ffile\u002Fd\u002F1V7bPndyoTQxcJ6m1BoXAw7-ub-jv8Wh1\u002Fview?usp=sharing)|\n| [**序列级知识蒸馏中的f散度最小化**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2307.15190.pdf) | ACL | 2023-07 | [Github](https:\u002F\u002Fgithub.com\u002FMANGA-UOFA\u002Ffdistill) | [数据](https:\u002F\u002Fdrive.google.com\u002Ffile\u002Fd\u002F1V7bPndyoTQxcJ6m1BoXAw7-ub-jv8Wh1\u002Fview?usp=sharing)|\n| [**MiniLLM：大型语言模型的知识蒸馏**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.08543.pdf) | ICLR | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FLMOps\u002Ftree\u002Fmain\u002Fminillm) | [数据](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FLMOps\u002Ftree\u002Fmain\u002Fminillm) |\n| [**语言模型的策略内蒸馏：从自我生成的错误中学习**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.13649.pdf) | ICLR | 2023-06 |\n| [**LLM-QAT：大型语言模型的无数据量化感知训练**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.17888.pdf) | arXiv | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002FLLM-QAT)| [数据](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002FLLM-QAT)|\n| [**少即是多：面向任务的语言模型压缩分层蒸馏**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2210.01351.pdf) | PMLR | 2022-10 | [Github](https:\u002F\u002Fgithub.com\u002Fcliang1453\u002Ftask-aware-distillation)|\n| [**DistilBERT，一个蒸馏版的BERT：更小、更快、更便宜、更轻**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1910.01108.pdf) | NeurIPS | 2019-10 |\n\n\n#### 强化学习\n\n| 标题 | 会议\u002F平台 | 日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**基于在线AI反馈的直接语言模型对齐**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.04792.pdf) | arXiv | 2024-02 | \n| [**通过带有最小编辑约束的细粒度强化学习改进大型语言模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.06081.pdf) | arXiv | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002FRUCAIBox\u002FRLMEC)|\n| [**利用大型语言模型反馈加速机器人操作的强化学习**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2311.02379) | CoRL | 2023-11 |\n| [**Motif：来自人工智能反馈的内在动机**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.00166) | ICLR | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fmotif) |\n| [**Ultrafeedback：用高质量反馈提升语言模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.01377.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Fthunlp\u002FUltraFeedback) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fopenbmb\u002FUltraFeedback)|\n| [**Eureka：通过编码大型语言模型实现人类水平的奖励设计**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.12931.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Feureka-research\u002FEureka)|\n| [**Rlaif：利用AI反馈扩展人类反馈的强化学习**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2309.00267.pdf) | arXiv | 2023-09 |\n| [**Wizardmath：通过强化evol-instruct赋能大型语言模型的数学推理能力**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2308.09583) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Fnlpxucan\u002FWizardLM)|\n| [**语言模型的策略内蒸馏：从自我生成的错误中学习**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.13649.pdf) | ICLR | 2023-06 |\n| [**通过合成反馈对齐大型语言模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.13735.pdf) | EMNLP | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002Fnaver-ai\u002Falmost)| [数据](https:\u002F\u002Fgithub.com\u002Fnaver-ai\u002Falmost )|\n| [**语言模型通过强化学习反思实现自我改进**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.14483.pdf) | arXiv | 2023-05 |\n| [**宪法式AI：来自AI反馈的无害性**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2212.08073.pdf) | arXiv | 2022-12 |\n\n\n#### 排序优化\n\n| 标题 | 场所 | 日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**面向知识增强的零样本问答的证据聚焦事实摘要**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2403.02966.pdf) | arXiv | 2024-03 |\n| [**KnowTuning：面向大型语言模型的知识感知微调**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.11176.pdf) | arXiv | 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002Fyouganglyu\u002FKnowTuning) |\n| [**自我奖励的语言模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.10020.pdf) | arXiv | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002Flucidrains\u002Fself-rewarding-lm-pytorch?tab=readme-ov-file    )|\n| [**自对弈微调将弱语言模型转化为强语言模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.01335.pdf) | arXiv | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002Fuclaml\u002FSPIN) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FUCLA-AGI\u002FSPIN_iter0)|\n| [**Zephyr：语言模型对齐的直接蒸馏**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.16944.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Falignment-handbook ) |  [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FHuggingFaceH4\u002Fultrachat_200k)|\n| [**CycleAlign：从黑盒LLM到白盒模型的迭代蒸馏，以实现更好的人类对齐**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.16271) | arXiv | 2023-10\n\n\n\n\n\n\n## 技能蒸馏\n\n### 上下文跟随\n#### 指令跟随\n| 标题 | 场所 | 日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**从零开始（几乎）生成合成数据：面向语言模型的广义指令微调**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.13064) | arXiv | 2024-02 |\n| [**重新审视自回归语言模型的知识蒸馏**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.11890.pdf) | arXiv | 2024-02 |\n| [**选择性反思-微调：用于LLM指令微调的学生选择型数据循环利用**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.10110) | arXiv | 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002Ftianyi-lab\u002FReflection_Tuning) | [数据](https:\u002F\u002Fgithub.com\u002Ftianyi-lab\u002FReflection_Tuning)|\n| [**Phi-2：小型语言模型的惊人能力**](https:\u002F\u002Fwww.microsoft.com\u002Fen-us\u002Fresearch\u002Fblog\u002Fphi-2-the-surprising-power-of-small-language-models\u002F) | - | 2023-12 |\n| [**什么才是对齐任务中的优质数据？指令微调中自动数据选择的全面研究**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2312.15685) | ICLR | 2023-12 | [Github](https:\u002F\u002Fgithub.com\u002Fhkust-nlp\u002Fdeita) | [数据](https:\u002F\u002Fgithub.com\u002Fhkust-nlp\u002Fdeita)|\n| [**MUFFIN：为提升指令遵循能力而策划的多维度指令集**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2312.02436) | arXiv | 2023-12 | [Github](https:\u002F\u002Fgithub.com\u002FRenzeLou\u002FMuffin) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FReza8848\u002FMUFFIN_68k)|\n| [**指令融合：通过混合方法推进提示进化**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2312.15692.pdf) | arXiv | 2023-12 | \n| [**Orca 2：教导小型语言模型如何进行推理**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.11045.pdf) | arXiv | 2023-11 |\n| [**反思-微调：数据循环利用提升LLM指令微调效果**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.11716) | NIPS Workshop | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Ftianyi-lab\u002FReflection_Tuning) | [数据](https:\u002F\u002Fgithub.com\u002Ftianyi-lab\u002FReflection_Tuning)|\n| [**教科书就够了II：Phi-1.5技术报告**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2309.05463.pdf) | arXiv | 2023-09 |\n| [**Orca：从GPT-4的复杂解释轨迹中逐步学习**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.02707) | arXiv | 2023-06 | \n| [**教科书就够了：面向语言模型的大规模教学文本数据集**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.11644.pdf) | arXiv | 2023-06 | \n| [**SelFee：由自我反馈生成驱动的迭代式自我修正LLM**](https:\u002F\u002Fkaistai.github.io\u002FSelFee\u002F) | arXiv | 2023-05 \n| [**ExpertPrompting：指导大型语言模型成为杰出专家**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.14688) | arXiv | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002FOFA-Sys\u002FExpertLLaMA) | [数据](https:\u002F\u002Fgithub.com\u002FOFA-Sys\u002FExpertLLaMA)|\n| [**LaMini-LM：基于大规模指令蒸馏而成的多样化模型群**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.14402) | arXiv | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fmbzuai-nlp\u002FLaMini-LM?tab=readme-ov-file) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FMBZUAI\u002FLaMini-instruction)|\n| [**Wizardlm：赋能大型语言模型以遵循复杂指令**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.12244) | ICLR | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fnlpxucan\u002FWizardLM)| [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FWizardLM\u002FWizardLM_evol_instruct_70k) \u003Cbr> [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FWizardLM\u002FWizardLM_evol_instruct_V2_196k)|\n| [**Koala：一款用于学术研究的对话模型**](https:\u002F\u002Fbair.berkeley.edu\u002Fblog\u002F2023\u002F04\u002F03\u002Fkoala\u002F) | - | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Flm-sys\u002FFastChat)| [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Flmsys\u002Fchatbot_arena_conversations)|\n| [**Alpaca：使语言模型与人类偏好对齐**](https:\u002F\u002Fcrfm.stanford.edu\u002F2023\u002F03\u002F13\u002Falpaca.html) | - | 2023-03 | [Github](https:\u002F\u002Fgithub.com\u002Ftatsu-lab\u002Fstanford_alpaca)| [数据](https:\u002F\u002Fgithub.com\u002Ftatsu-lab\u002Fstanford_alpaca\u002Fblob\u002Fmain\u002Falpaca_data.json)|\n| [**Vicuna：一款开源聊天机器人，其质量可媲美ChatGPT的90%*，并能打动GPT-4**](https:\u002F\u002Flmsys.org\u002Fblog\u002F2023-03-30-vicuna\u002F) | - | 2023-03 | [Github](https:\u002F\u002Fgithub.com\u002Flm-sys\u002FFastChat)| [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Flmsys\u002Fchatbot_arena_conversations)|\n| [**Self-instruct：通过自动生成的指令使语言模型对齐**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2212.10560) | ACL | 2022-12 | [Github](https:\u002F\u002Fgithub.com\u002Fyizhongw\u002Fself-instruct)| [数据](https:\u002F\u002Fgithub.com\u002Fyizhongw\u002Fself-instruct) |\n\n#### 多轮对话\n| 标题 | 场所 | 日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Zephyr：直接蒸馏实现LM对齐**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.16944) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Falignment-handbook) | [数据](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Falignment-handbook)|\n| [**OPENCHAT：利用混合质量数据推进开源语言模型发展**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2309.11235.pdf) | ICLR | 2023-09 | [Github](https:\u002F\u002Fgithub.com\u002Fimoneoi\u002Fopenchat) | [数据](https:\u002F\u002Fgithub.com\u002Fimoneoi\u002Fopenchat)|\n| [**通过扩展高质量指令型对话来增强聊天语言模型**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.14233) | arXiv | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002Fthunlp\u002FUltraChat) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fstingning\u002Fultrachat)|\n| [**Baize：一款开源聊天模型，在自聊天数据上采用参数高效的微调**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.01196) | EMNLP | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fproject-baize\u002Fbaize-chatbot)| [数据](https:\u002F\u002Fgithub.com\u002Fproject-baize\u002Fbaize-chatbot\u002Ftree\u002Fmain\u002Fdata)|\n| [**Vicuna：一款开源聊天机器人，其质量可媲美ChatGPT的90%*，并能打动GPT-4**](https:\u002F\u002Flmsys.org\u002Fblog\u002F2023-03-30-vicuna\u002F) | - | 2023-03 | [Github](https:\u002F\u002Fgithub.com\u002Flm-sys\u002FFastChat)| [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Flmsys\u002Fchatbot_arena_conversations)|\n\n#### RAG能力\n| 标题 | 场所 | 日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Self-RAG：通过自我反思学习检索、生成和批判**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.11511) | NIPS | 2023-10 | [Github](https:\u002F\u002Fselfrag.github.io\u002F) | [数据](https:\u002F\u002Fselfrag.github.io\u002F)|\n| [**SAIL：搜索增强型指令学习**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.15225) | arXiv | 2023-05 | [Github](https:\u002F\u002Fopenlsr.org\u002Fsail-7b) | [数据](https:\u002F\u002Fgithub.com\u002Fluohongyin\u002FSAIL#reproducing-sail-models)|\n| [**面向知识密集型任务的小型语言模型的知识增强型推理蒸馏**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.18395) | NIPS | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002FNardien\u002FKARD) | [数据](https:\u002F\u002Fgithub.com\u002FNardien\u002FKARD)|\n\n### 对齐\n#### 思维模式\n| 标题 | 会议\u002F平台 | 日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**通过思维链推理对齐大型和小型语言模型**](https:\u002F\u002Faclanthology.org\u002F2024.eacl-long.109.pdf) | EACL | 2024-03 | [Github](https:\u002F\u002Fgithub.com\u002Flranaldii\u002FAligning_LLMs) |\n| [**分而治之？你应该蒸馏你的LLM的哪一部分？**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.15000.pdf) | arXiv | 2024-02 |\n| [**选择性反思调优：用于LLM指令调优的学生自选数据循环利用**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.10110) | arXiv | 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002Ftianyi-lab\u002FReflection_Tuning) | [数据](https:\u002F\u002Fgithub.com\u002Ftianyi-lab\u002FReflection_Tuning)|\n| [**LLM能为不同人群发声吗？通过辩论调优LLM以生成可控的争议性言论**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.10614) | arXiv | 2024-02 | [Github](https:\u002F\u002Fgithub.com\u002Ftianyi-lab\u002FDEBATunE) | [数据](https:\u002F\u002Fgithub.com\u002Ftianyi-lab\u002FDEBATunE)|\n| [**面向领域特定问答的LLM知识型偏好对齐**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2311.06503) | arXiv | 2023-11 | [Github](https:\u002F\u002Fgithub.com\u002Fzjukg\u002FKnowPAT) |\n| [**Orca 2：教小型语言模型如何进行推理**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.11045.pdf) | arXiv | 2023-11 |\n| [**反思调优：数据循环利用提升LLM指令调优效果**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.11716) | NIPS Workshop | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Ftianyi-lab\u002FReflection_Tuning) | [数据](https:\u002F\u002Fgithub.com\u002Ftianyi-lab\u002FReflection_Tuning)|\n| [**Orca：从GPT-4复杂解释轨迹中逐步学习**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.02707) | arXiv | 2023-06 | \n| [**SelFee：由自我反馈生成赋能的迭代式自我修正LLM**](https:\u002F\u002Fkaistai.github.io\u002FSelFee\u002F) | arXiv | 2023-05\n\n\n#### 偏好\n| 标题 | 会议\u002F平台 | 日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Ultrafeedback：用高质量反馈提升语言模型性能**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.01377.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Fthunlp\u002FUltraFeedback) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fopenbmb\u002FUltraFeedback)|\n| [**Zephyr：直接蒸馏实现LM对齐**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.16944) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Falignment-handbook) | [数据](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Falignment-handbook)|\n| [**Rlaif：结合人类与AI反馈扩展强化学习**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2309.00267.pdf) | arXiv | 2023-09 |\n| [**OPENCHAT：利用混合质量数据推进开源语言模型发展**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2309.11235.pdf) | ICLR | 2023-09 | [Github](https:\u002F\u002Fgithub.com\u002Fimoneoi\u002Fopenchat) | [数据](https:\u002F\u002Fgithub.com\u002Fimoneoi\u002Fopenchat)|\n| [**RLCD：基于对比蒸馏的强化学习用于语言模型对齐**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2307.12950) | arXiv | 2023-07 | [Github](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Frlcd)|\n| [**通过合成反馈对齐大型语言模型**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.13735) | EMNLP | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002Fnaver-ai\u002Falmost)|[数据](https:\u002F\u002Fgithub.com\u002Fnaver-ai\u002Falmost)|\n| [**使用语言模型设计奖励函数**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2303.00001.pdf) | ICLR | 2023-03 | [Github](https:\u002F\u002Fgithub.com\u002Fminaek\u002Freward_design_with_llms)|\n| [**大规模语言反馈训练语言模型**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2303.16755) | arXiv | 2023-03 |\n| [**宪法式AI：由AI反馈确保无害性**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2212.08073) | arXiv | 2022-12 |\n\n\n#### 价值观\n| 标题 | 会议\u002F平台 | 日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Ultrafeedback：用高质量反馈提升语言模型性能**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.01377.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Fthunlp\u002FUltraFeedback) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fopenbmb\u002FUltraFeedback)|\n| [**RLCD：基于对比蒸馏的强化学习用于语言模型对齐**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2307.12950) | arXiv | 2023-07 | [Github](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Frlcd)|\n| [**从零开始、在极少人工监督下实现原则驱动的语言模型自我对齐**](https:\u002F\u002Fproceedings.neurips.cc\u002Fpaper\u002F2023\u002Fhash\u002F0764db1151b936aca59249e2c1386101-Abstract-Conference.html) | NeurIPS | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002FIBM\u002FDromedary) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fzhiqings\u002Fdromedary-65b-verbose-clone-v0)|\n| [**在模拟社交互动上训练社会对齐的语言模型**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.16960) | arXiv | 2023-05 |\n| [**宪法式AI：由AI反馈确保无害性**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2212.08073) | arXiv | 2022-12 |\n\n### 代理\n#### 工具使用\n| 标题 | 出处 | 日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Toolformer：语言模型可以自我学习使用工具**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2302.04761) | arXiv | 2023-02 |\n| [**Graph-ToolFormer：通过ChatGPT增强的提示词赋予大语言模型图推理能力**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.11116) | arXiv | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fjwzhanggy\u002FGraph_Toolformer) | [数据](https:\u002F\u002Fgithub.com\u002Fjwzhanggy\u002FGraph_Toolformer)|\n| [**Gorilla：连接海量API的大语言模型**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.15334) | arXiv | 2023-05 | [Github](https:\u002F\u002Fgorilla.cs.berkeley.edu\u002F) | [数据](https:\u002F\u002Fgorilla.cs.berkeley.edu\u002F)|\n| [**GPT4Tools：通过自我指令教导大语言模型使用工具**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.18752) | arXiv | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002FAILab-CVC\u002FGPT4Tools) | [数据](https:\u002F\u002Fgithub.com\u002FAILab-CVC\u002FGPT4Tools)|\n| [**ToolAlpaca：基于3000个模拟案例的语言模型通用工具学习**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.05301) | arXiv | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002Ftangqiaoyu\u002FToolAlpaca) | [数据](https:\u002F\u002Fgithub.com\u002Ftangqiaoyu\u002FToolAlpaca)|\n| [**ToolLLM：助力大语言模型掌握16000+真实世界API**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2307.16789) | arXiv | 2023-07 | [Github](https:\u002F\u002Fgithub.com\u002FOpenBMB\u002FToolBench) | [数据](https:\u002F\u002Fgithub.com\u002FOpenBMB\u002FToolBench)|\n| [**Confucius：从内省反馈中循序渐进地进行工具学习**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2308.14034) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Fshizhl\u002FConfucius) | \n| [**CRAFT：通过创建和检索专用工具集来定制大语言模型**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2309.17428) | arXiv | 2023-09 | [Github](https:\u002F\u002Fgithub.com\u002Flifan-yuan\u002FCRAFT) | \n| [**MLLM-Tool：用于工具代理学习的多模态大语言模型**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2401.10727) | arXiv | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002FMLLM-Tool\u002FMLLM-Tool) | [数据](https:\u002F\u002Fgithub.com\u002FMLLM-Tool\u002FMLLM-Tool)|\n| [**小型语言模型是弱工具学习者：一个多语言模型代理**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2401.07324) | arXiv | 2024-01 |[Github](https:\u002F\u002Fgithub.com\u002FX-PLUG\u002FMulti-LLM-Agent) | \n| [**EASYTOOL：用简洁的工具指令增强基于大语言模型的代理**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2401.06201) | arXiv | 2024-01 |[Github](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FJARVIS\u002F) | \n\n\n#### 规划\n| 标题 | 出处 | 日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**AUTOACT：通过自我规划从零开始自动学习代理**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2401.05268) | arXiv | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002Fzjunlp\u002FAutoAct)\n| [**Lumos：使用统一数据、模块化设计和开源大语言模型学习代理**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2311.05657) | arXiv | 2023-11 | [Github](https:\u002F\u002Fallenai.github.io\u002Flumos\u002F) | [数据](https:\u002F\u002Fallenai.github.io\u002Flumos\u002F)|\n| [**TPTU-v2：提升大型语言模型代理在现实系统中的任务规划和工具使用能力**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2311.11315) | arXiv | 2023-11 |\n| [**由大语言模型从平行TextWorld训练的具身多模态代理**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2311.16714) | arXiv | 2023-11 |\n| [**通过大语言模型的反馈加速机器人操作的强化学习**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2311.02379) | CoRL | 2023-11 |\n| [**Motif：来自人工智能反馈的内在动机**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.00166) | ICLR | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fmotif) |\n| [**FireAct：迈向语言模型代理的微调**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.05915) | arXiv | 2023-10 | [Github](https:\u002F\u002Ffireact-agent.github.io\u002F) | [数据](https:\u002F\u002Ffireact-agent.github.io\u002F)|\n| [**AgentTuning：为大语言模型实现通用代理能力**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.12823) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002FTHUDM\u002FAgentTuning) |\n| [**Eureka：通过大语言模型编码实现人类水平的奖励设计**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.12931.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Feureka-research\u002FEureka)\n| [**面向人机协作的语言指导式强化学习**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2304.07297.pdf) | PMLR | 2023-04 |\n| [**用大语言模型引导强化学习的预训练**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2302.06692.pdf) | PMLR | 2023-02 |\n| [**将互联网规模的视觉-语言模型提炼为具身代理**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2301.12507) | ICML | 2023-01 |\n\n\n\n\n### NLP任务专业化\n#### NLU\n\n| 标题 | 会议\u002F平台 | 日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**大模型 vs 小模型？基于大语言模型的文本增强个性化检测模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2403.07581v1.pdf) | arXiv | 2024-03 |\n| [**利用大语言模型与主动学习进化知识蒸馏**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2403.06414v1.pdf) | arXiv | 2024-03 |\n| [**混合蒸馏助力小型语言模型提升推理能力**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2312.10730.pdf) | arXiv | 2023-12 |\n| [**PromptMix：一种用于大语言模型蒸馏的类别边界增强方法**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.14192.pdf) | EMNLP | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002FServiceNow\u002FPromptMix-EMNLP-2023) |\n| [**TinyLLM：从多个大语言模型中学习小型学生模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.04616.pdf) | arXiv | 2024-02 | \n| [**目标数据生成：发现并修复模型弱点**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.17804.pdf) | ACL | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002FZexueHe\u002FTDG)| \n| [**为可解释的自动化学生答案评估蒸馏ChatGPT**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.12962.pdf) | arXiv | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002Flijiazheng99\u002Faera) |\n| [**ChatGPT在文本标注任务中优于众包工作者**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2303.15056.pdf) | arXiv | 2023-03 | \n| [**Annollm：让大语言模型成为更好的众包标注者**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2303.16854) | arXiv | 2023-03 |\n| [**AugGPT：利用ChatGPT进行文本数据增强**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2302.13007.pdf) | arXiv | 2023-02 | [Github](https:\u002F\u002Fgithub.com\u002Fyhydhx\u002FAugGPT)| \n|  [**GPT-3是优秀的数据标注者吗？**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2212.10450.pdf) | ACL | 2022-12 | [Github](https:\u002F\u002Fgithub.com\u002FDAMO-NLP-SG\u002FLLM-Data-Annotator)|\n| [**SunGen：自引导无噪声数据生成，实现高效的零样本学习**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2205.12679.pdf) | ICLR | 2022-05 | [Github](https:\u002F\u002Fgithub.com\u002FSumilerGAO\u002FSunGen) \n| [**ZeroGen：通过数据集生成实现高效零样本学习**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2202.07922.pdf) | EMNLP | 2022-02 | [Github](https:\u002F\u002Fgithub.com\u002Fjiacheng-ye\u002FZeroGen)|\n| [**利用语言模型生成训练数据：迈向零样本语言理解**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2202.04538.pdf) | NeurIPS | 2022-02 | [Github](https:\u002F\u002Fgithub.com\u002Fyumeng5\u002FSuperGen)\n| [**迈向零标签语言学习**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2109.09193.pdf) | arXiv | 2021-09 | \n| [**生成、标注与学习：使用合成文本进行NLP**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2106.06168.pdf) | TACL | 2021-06\n\n\n#### NLG\n| 标题 | 会议\u002F平台 | 日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**通过多奖励蒸馏定制自我解释器**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.02805.pdf) | arXiv | 2023-11 | [Github](https:\u002F\u002Finklab.usc.edu\u002FMaRio\u002F) | [数据](https:\u002F\u002Finklab.usc.edu\u002FMaRio\u002F)|\n| [**RECOMP：通过压缩和选择性增强改进检索增强型LMs**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.04408.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Fcarriex\u002Frecomp)|\n| [**利用ChatGPT进行神经机器翻译数据生成与增强**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2307.05779.pdf) | arXiv | 2023-07 |\n| [**语言模型的在线蒸馏：从自我生成的错误中学习**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.13649.pdf) | ICLR | 2023-06 |\n| [**LLMs能否生成高质量的以笔记为导向的医患对话？**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.16931.pdf) | arXiv | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002Fbelievewhat\u002FDr.NoteAid) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FakemiH\u002FNoteChat)|\n| [**InheritSumm：通过从GPT蒸馏得到的通用、多功能且紧凑的摘要器**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.13083.pdf) | EMNLP | 2023-05 | \n| [**不可能的蒸馏：从低质量模型到高质量的摘要和改写数据集及模型**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2305.16635) | arXiv | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002Fjaehunjung1\u002Fimpossible-distillation)|\n| [**放射科报告简化中的数据增强**](https:\u002F\u002Faclanthology.org\u002F2023.findings-eacl.144.pdf) | EACL成果 | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002FZiyu-Yang\u002FRadiology-Text-Simplification-Liver)|\n| [**想降低标注成本吗？GPT-3可以帮忙**](https:\u002F\u002Faclanthology.org\u002F2021.findings-emnlp.354) | EMNLP成果 | 2021-08 |\n\n\n#### 信息检索\n| 标题 | 会议\u002F平台 | 日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**InstructDistill：指令蒸馏使大语言模型成为高效的零样本排序器**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.01555.pdf) | arXiv | 2023-11 | [Github](https:\u002F\u002Fgithub.com\u002Fsunnweiwei\u002FRankGPT\u002Ftree\u002Fmain\u002FInstructDistill)| [数据](https:\u002F\u002Fgithub.com\u002Fsunnweiwei\u002FRankGPT?tab=readme-ov-file#download-data-and-model)\n| [**软提示调优：用大语言模型增强密集检索**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2307.08303.pdf) | arXiv | 2023-07 | [Github](https:\u002F\u002Fgithub.com\u002Fzhiyuanpeng\u002FSPTAR.git)\n| [**检索增强型大语言模型中的查询重写**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.14283.pdf) | EMNLP | 2023-05 \n| [**ChatGPT擅长搜索吗？探究大语言模型作为重新排序代理**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2304.09542.pdf) | EMNLP | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fsunnweiwei\u002FRankGPT)|[数据](https:\u002F\u002Fgithub.com\u002Fsunnweiwei\u002FRankGPT?tab=readme-ov-file#download-data-and-models)|\n| [**AugTriever：通过可扩展的数据增强实现无监督密集检索**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2212.08841.pdf) | arXiv | 2022-12 | [Github](https:\u002F\u002Fgithub.com\u002Fsalesforce\u002FAugTriever)\n| [**QUILL：利用检索增强和多阶段蒸馏，通过大语言模型理解查询意图**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2210.15718.pdf) | EMNLP | 2022-10 |\n| [**Promptagator：仅需8个示例即可实现少样本密集检索**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2209.11755.pdf) | ICLR | 2022-09 |\n| [**只需问题就能训练密集段落检索**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2206.10658.pdf) | TACL | 2022-06 | [Github](https:\u002F\u002Fgithub.com\u002FDevSinghSachan\u002Fart) |\n| [**通过零样本问题生成改进段落检索**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2204.07496.pdf) | EMNLP | 2022-04 | [Github](https:\u002F\u002Fgithub.com\u002FDevSinghSachan\u002Funsupervised-passage-reranking) | [数据](https:\u002F\u002Fgithub.com\u002FDevSinghSachan\u002Funsupervised-passage-reranking)|\n| [**InPars：利用大语言模型进行信息检索的数据增强**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2202.05144.pdf) | arXiv | 2022-02 | [Github](https:\u002F\u002Fgithub.com\u002Fzetaalphavector\u002Finpars)| [数据](https:\u002F\u002Fgithub.com\u002Fzetaalphavector\u002Finpars)|\n| [**利用预训练语言模型生成数据集**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2104.07540) | EMNLP | 2021-04 | [Github](https:\u002F\u002Fgithub.com\u002Ftimoschick\u002Fdino) |\n\n#### 推荐\n| 标题 | 会议\u002F期刊 | 发表日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**小型语言模型能否成为序列推荐的良好推理者？**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2403.04260.pdf) | arXiv | 2024-03 | \n| [**大型语言模型增强的叙事驱动推荐**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.02250.pdf) | arXiv | 2023-06 | \n| [**作为指令遵循的推荐：一种由大型语言模型赋能的推荐方法**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.07001.pdf) | arXiv | 2023-05 |\n| [**ONCE：利用开源与闭源大型语言模型提升基于内容的推荐**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.06566.pdf) | WSDM | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002FJyonn\u002FONCE) | [数据](https:\u002F\u002Fgithub.com\u002FJyonn\u002FONCE\u002Freleases\u002Ftag\u002FDataset)\n\n#### 文本生成评估\n\n| 标题 | 会议\u002F期刊 | 发表日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**普罗米修斯：在语言模型中诱导细粒度评估能力**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.08491.pdf) | ICLR | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002FkaistAI\u002FPrometheus) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fkaist-ai\u002FFeedback-Collection)|\n| [**TIGERScore：迈向构建适用于所有文本生成任务的可解释性指标**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.00752.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Ftiger-ai-lab.github.io\u002FTIGERScore\u002F) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FTIGER-Lab\u002FMetricInstruct)|\n| [**用于评估对齐性的生成式评判器**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.05470.pdf) | ICLR | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002FGAIR-NLP\u002Fauto-j) | [数据](https:\u002F\u002Fgithub.com\u002FGAIR-NLP\u002Fauto-j)\n| [**PandaLM：一个用于LLM指令微调优化的自动评估基准**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2306.05087) | arXiv | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002FWeOpenML\u002FPandaLM)| [数据](https:\u002F\u002Fgithub.com\u002FWeOpenML\u002FPandaLM)|\n| [**INSTRUCTSCORE：带有细粒度反馈的可解释性文本生成评估**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.14282.pdf) | EMNLP | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002Fxu1998hz\u002FInstructScore_SEScore3) | [数据](https:\u002F\u002Fgithub.com\u002Fxu1998hz\u002FInstructScore_SEScore3)\n\n\n#### 代码\n| 标题 | 会议\u002F期刊 | 发表日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Magicoder：源代码就是你需要的一切**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2312.02120.pdf) | arXiv | 2023-12 | [Github](https:\u002F\u002Fgithub.com\u002Fise-uiuc\u002Fmagicoder) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fise-uiuc\u002FMagicoder-OSS-Instruct-75K) \u003Cbr> [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fise-uiuc\u002FMagicoder-Evol-Instruct-110K)|\n| [**WaveCoder：广泛而多功能的增强型指令微调，结合精细化数据生成**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2312.14187) | arXiv | 2023-12 |\n| [**指令融合：通过混合化推进提示进化**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2312.15692.pdf) | arXiv | 2023-12 |\n| [**MFTCoder：通过多任务微调提升代码LLM性能**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.02303.pdf) | arXiv | 2023-11 | [Github](https:\u002F\u002Fgithub.com\u002Fcodefuse-ai\u002FMFTCOder)| [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fcodefuse-ai\u002FEvol-instruction-66k) \u003Cbr> [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fcodefuse-ai\u002FCodeExercise-Python-27k)|\n| [**LLM辅助的代码清理：用于训练精准代码生成器**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.14904.pdf) | arXiv | 2023-11 \n| [**个性化蒸馏：以自适应学习赋能开源LLM进行代码生成**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.18628.pdf) | EMNLP | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002FSalesforceAIResearch\u002FPersDistill)|\n| [**Code Llama：面向代码的开源基础模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.12950.pdf) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fcodellama)| \n| [**用于源代码摘要的蒸馏版GPT**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.14731.pdf) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Fapcl-research\u002Fjam-cgpt) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fapcl\u002FJam-CGPT\u002Ftree\u002Fmain)|\n| [**教科书就是你需要的一切：面向语言模型的大规模教学文本数据集**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.11644.pdf) | arXiv | 2023-06 | \n| Code Alpaca：一款用于代码生成的遵循指令的LLaMA模型 | - | 2023-03 | [Github](https:\u002F\u002Fgithub.com\u002Fsahil280114\u002Fcodealpaca)| [数据](https:\u002F\u002Fgithub.com\u002Fsahil280114\u002Fcodealpaca?tab=readme-ov-file#data-release)|\n\n### 多模态\n| 标题  |   地点  |   日期   | 代码 | 数据|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Miko：从大型语言模型中进行多模态意图知识蒸馏，用于社交媒体常识发现**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2402.18169.pdf) | arXiv | 2024-02 |\n| [**在大型语言模型中定位视觉常识知识**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2312.04837) | NeurIPS | 2023-12 | [Github](https:\u002F\u002Fgithub.com\u002Fjamespark3922\u002Flocalized-skd) | [数据](https:\u002F\u002Fgithub.com\u002Fjamespark3922\u002Flocalized-skd?tab=readme-ov-file) |\n| [**眼见为实：通过提示优化GPT-4V以更好地进行视觉指令微调**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.07574.pdf) | arXiv | 2023-11 | [Github](https:\u002F\u002Fgithub.com\u002FX2FD\u002FLVIS-INSTRUCT4V ) | [数据](https:\u002F\u002Fgithub.com\u002FX2FD\u002FLVIS-INSTRUCT4V) | \n| [**ILuvUI：基于机器对话的用户界面指令微调语言-视觉模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.04869.pdf) | arXiv | 2023-10 |\n| [**NExT-GPT：任意到任意的多模态大语言模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2309.05519.pdf) | arXiv | 2023-09 | [Github](https:\u002F\u002Fgithub.com\u002FNExT-GPT\u002FNExT-GPT) | [数据](https:\u002F\u002Fgithub.com\u002FNExT-GPT\u002FNExT-GPT)|\n| [**StableLLaVA：利用合成图像-对话数据增强视觉指令微调**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.10253.pdf) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Ficoz69\u002FStableLLAVA?tab=readme-ov-file) | [数据](https:\u002F\u002Fgithub.com\u002Ficoz69\u002FStableLLAVA?tab=readme-ov-file)|\n| [**PointLLM：赋能大型语言模型理解点云数据**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.16911.pdf) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002FOpenRobotLab\u002FPointLLM) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FRunsenXu\u002FPointLLM\u002Ftree\u002Fmain)|\n| [**SVIT：扩展视觉指令微调规模**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2307.04087.pdf) | arXiv | 2023-07 | [Github](https:\u002F\u002Fgithub.com\u002FBAAI-DCAI\u002FVisual-Instruction-Tuning) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FBAAI\u002FSVIT)|\n| [**ChatSpot：通过精准指代指令微调来构建多模态大语言模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2307.09474.pdf) | arXiv | 2023-07 | \n| [**Shikra：释放多模态大语言模型的指代对话魔法**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.15195.pdf) | arXiv | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002Fshikras\u002Fshikra) | [数据](https:\u002F\u002Fgithub.com\u002Fshikras\u002Fshikra\u002Fblob\u002Fmain\u002Fdocs\u002Fdata.md)\n| [**通过稳健的指令微调缓解大型多模态模型中的幻觉现象**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.14565.pdf) | ICLR | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002FFuxiaoLiu\u002FLRV-Instruction) | [数据](https:\u002F\u002Fgithub.com\u002FFuxiaoLiu\u002FLRV-Instruction?tab=readme-ov-file) |\n| [**Valley：具有大型语言模型增强能力的视频助手**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.07207.pdf) | arXiv | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002FRupertLuo\u002FValley) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fluoruipu1\u002FValley-Instruct-73k)|\n| [**DetGPT：通过推理检测你需要的内容**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.14167.pdf) | EMNLP | 2023-05 | [Github](https:\u002F\u002Fdetgpt.github.io) | \n| [**视觉指令微调：关于大型语言模型视觉指令微调的全面研究**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.08485) | NeurIPS | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fhaotian-liu\u002FLLaVA) | [数据](https:\u002F\u002Fgithub.com\u002Fhaotian-liu\u002FLLaVA\u002Fblob\u002Fmain\u002Fdocs\u002FData.md)|\n\n### 总结表\n\n\u003Cdiv align=\"center\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FTebmer_Awesome-Knowledge-Distillation-of-LLMs_readme_c22927fea2b5.jpg\">\u003Cbr>\n  \u003Cem>图：技能蒸馏代表性工作的总结。\u003C\u002Fem>\n\u003C\u002Fdiv>\n\u003Cbr>\n\n\n## 垂直领域蒸馏\n### 法律\n\n|  标题  |   地点  |   日期   | 代码 | 数据|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**Fuzi**](https:\u002F\u002Fgithub.com\u002Firlab-sdu\u002Ffuzi.mingcha) | - | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Firlab-sdu\u002Ffuzi.mingcha)\n| [**ChatLaw：集成外部知识库的开源法律大语言模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.16092.pdf) | arXiv | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002FPKU-YuanGroup\u002FChatLaw) |\n| [**Lawyer LLaMA技术报告**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.15062.pdf) | arXiv | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002FAndrewZhe\u002Flawyer-llama) | [数据](https:\u002F\u002Fgithub.com\u002FAndrewZhe\u002Flawyer-llama)|\n\n\n### 医疗与健康\n\n|  标题  |   地点  |   日期   | 代码 | 数据|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**HuatuoGPT-II：针对医疗场景的一站式大语言模型训练**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.09774.pdf) | arXiv | 2023-11 | [Github](https:\u002F\u002Fgithub.com\u002FFreedomIntelligence\u002FHuatuoGPT-II) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FFreedomIntelligence\u002FHuatuoGPT2_sft_instruct_GPT4_50K)|\n| [**AlpaCare：面向医疗应用的指令微调大语言模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.14558.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Fxzhang97666\u002Falpacare) | [数据](https:\u002F\u002Fgithub.com\u002FXZhang97666\u002FAlpaCare\u002Fblob\u002Fmaster\u002Fdata\u002FMedInstruct-52k.json)|\n| [**DISC-MedLLM：连接通用大语言模型与真实世界医疗咨询**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.14346.pdf) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002FFudanDISC\u002FDISC-MedLLM\u002Ftree\u002Fmain) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FFlmc\u002FDISC-Med-SFT)|\n| [**HuatuoGPT：驯服语言模型成为医生**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.15075.pdf) | EMNLP | 2023-05 | [Github](https:\u002F\u002Fgithub.com\u002FFreedomIntelligence\u002FHuatuoGPT) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FFreedomIntelligence\u002FHuatuoGPT-sft-data-v1)|\n| [**DoctorGLM：微调你的中文医生并非难事**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2304.01097.pdf) | arXiv | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fxionghonglin\u002FDoctorGLM) | [数据](https:\u002F\u002Fgithub.com\u002FToyhom\u002FChinese-medical-dialogue-data)|\n| [**Huatuo：用中医知识微调大语言模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2304.06975.pdf) | arXiv | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002FSCIR-HI\u002FHuatuo-Llama-Med-Chinese) | \n| [**MedAlpaca：一个开源的医疗对话AI模型及训练数据集合**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2304.08247.pdf) | arXiv | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fkbressem\u002FmedAlpaca) | [数据](https:\u002F\u002Fgithub.com\u002Fkbressem\u002FmedAlpaca)\n| [**PMC-LLaMA：在医学论文上进一步微调LLaMA**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2304.14454.pdf) | arXiv | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fchaoyi-wu\u002FPMC-LLaMA) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Faxiong\u002Fpmc_llama_instructions)|\n| [**ChatDoctor：基于大型语言模型Meta-AI（LLaMA）并结合医学领域知识进行微调的医疗聊天模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2303.14070.pdf) | arXiv | 2023-03 | [Github](https:\u002F\u002Fgithub.com\u002FKent0n-Li\u002FChatDoctor) |\n\n\n### 金融\n\n|  标题  |   地点  |   日期   | 代码 | 数据|\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**XuanYuan 2.0：一款拥有数千亿参数的大型中文金融聊天模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2305.12002.pdf) | CIKM | 2023-05 |\n\n### 科学\n\n| 标题 | 会议\u002F平台 | 发表日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**MuseGraph：面向图的大语言模型指令微调，用于通用图挖掘**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2403.04780.pdf) | arXiv | 2024-03 | \n| [**SciGLM：基于自我反思式指令标注与微调的科学语言模型训练**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.07950.pdf) | arXiv | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002FTHUDM\u002FSciGLM) | \n| [**AstroLLaMA-Chat：利用对话式和多样化数据集扩展 AstroLLaMA**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.01916.pdf) | arXiv | 2024-01 \n| [**GeoGalactica：地球科学领域的科学大语言模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2401.00434.pdf) | arXiv | 2024-01 | [Github](https:\u002F\u002Fgithub.com\u002Fgeobrain-ai\u002Fgeogalactica) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fdaven3\u002Fgeobench)|\n| [**InstructMol：多模态融合，构建药物发现中通用且可靠的分子助手**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2311.16208.pdf) | arXiv | 2023-11 | [Github](https:\u002F\u002Fgithub.com\u002FIDEA-XL\u002FInstructMol) | \n| [**LLM-Prop：从文本描述中预测晶体固体的物理和电子性质**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.14029.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Fvertaix\u002FLLM-Prop) | \n| [**OceanGPT：面向海洋科学任务的大语言模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.02031.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Fzjunlp\u002FKnowLM) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fzjunlp\u002FOceanBench)|\n| [**MarineGPT：向公众揭示海洋奥秘**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2310.13596.pdf) | arXiv | 2023-10 | [Github](https:\u002F\u002Fgithub.com\u002Fhkust-vgd\u002FMarineGPT) \n| [**Mammoth：通过混合指令微调构建数学通才模型**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2309.05653) | arXiv | 2023-09 | [Github](https:\u002F\u002Ftiger-ai-lab.github.io\u002FMAmmoTH\u002F) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FTIGER-Lab\u002FMathInstruct)| \n| [**ToRA：用于数学问题解决的工具集成推理代理**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2309.17452.pdf) | ICLR | 2023-09 | [Github](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FToRA)\n| [**DARWIN系列：面向自然科学的领域专用大语言模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.13565.pdf) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002FMasterAI-EAM\u002FDarwin) | \n| [**Wizardmath：通过强化进化指令微调增强大语言模型的数学推理能力**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2308.09583) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002Fnlpxucan\u002FWizardLM)|\n| [**Biomedgpt：面向生物医学的开源多模态生成式预训练Transformer**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.09442.pdf) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002FPharMolix\u002FOpenBioMed) | [数据](https:\u002F\u002Fgithub.com\u002FPharMolix\u002FOpenBioMed)|\n| [**Prot2Text：结合GNN和Transformer的多模态蛋白质功能生成**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2307.14367.pdf) | NeurIPS | 2023-07 | \n| [**xTrimoPGLM：统一的1000亿参数级预训练Transformer，用于破译蛋白质的语言**](https:\u002F\u002Fwww.biorxiv.org\u002Fcontent\u002F10.1101\u002F2023.07.05.547496v1.full.pdf) | bioRxiv | 2023-07 |\n| [**GIMLET：基于指令的分子零样本学习的统一图-文本模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.13089.pdf) | NeurIPS | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002Fzhao-ht\u002FGIMLET) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fhaitengzhao\u002Fmolecule_property_instruction)|\n| [**K2：用于地球科学知识理解和利用的基础语言模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2306.05064.pdf) | arXiv | 2023-06 | [Github](https:\u002F\u002Fgithub.com\u002Fdavendw49\u002Fk2) \n| [**视觉指令微调：大语言模型视觉指令微调的全面研究**](https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.08485) | NeurIPS | 2023-04 | [Github](https:\u002F\u002Fgithub.com\u002Fhaotian-liu\u002FLLaVA) | [数据](https:\u002F\u002Fgithub.com\u002Fhaotian-liu\u002FLLaVA\u002Fblob\u002Fmain\u002Fdocs\u002FData.md)|\n\n\n\n\n### 杂项\n\n| 标题 | 会议\u002F平台 | 发表日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**OWL：面向IT运维的大语言模型**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2309.09298.pdf) | arXiv | 2023-09 | [Github](https:\u002F\u002Fgithub.com\u002FHC-Guo\u002FOwl) | [数据](https:\u002F\u002Fgithub.com\u002FHC-Guo\u002FOwl\u002Ftree\u002Fmain\u002FOWL-Instruct\u002Fdata)|\n| [**EduChat：基于大规模语言模型的智能教育聊天机器人系统**](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2308.02773.pdf) | arXiv | 2023-08 | [Github](https:\u002F\u002Fgithub.com\u002FECNU-ICALK\u002FEduChat) | [数据](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fecnu-icalk\u002Feduchat-sft-002-data-osm)    |\n\n\n## 基于编码器的蒸馏\n\n> **注**：本综述主要关注生成式大语言模型，因此未包含基于编码器的蒸馏内容。然而，我们对此主题也十分感兴趣，并将持续更新该领域的最新研究成果。\n\n| 标题 | 会议\u002F平台 | 发表日期 | 代码 | 数据 |\n|:--------|:--------:|:--------:|:--------:|:--------:|\n| [**掩码潜在语义建模：一种高效的掩码语言模型替代预训练方法**](https:\u002F\u002Faclanthology.org\u002F2023.findings-acl.876.pdf) | ACL Findings | 2023-08 |\n| [**协同增效：联合使用掩码潜在语义建模和掩码语言模型进行高效样本预训练**](https:\u002F\u002Faclanthology.org\u002F2023.conll-babylm.26.pdf) | CoNLL | 2023-08 |\n\n\n## 待办事项\n- [ ] 添加关于O1类蒸馏的相关工作。敬请期待！ \n\n\n## 引用\n如果您觉得本仓库有所帮助，请考虑引用以下论文：\n\n```\n@misc{xu2024survey,\n      title={大语言模型知识蒸馏综述}, \n      author={Xiaohan Xu、Ming Li、Chongyang Tao、Tao Shen、Reynold Cheng、Jinyang Li、Can Xu、Dacheng Tao、Tianyi Zhou},\n      year={2024},\n      eprint={2402.13116},\n      archivePrefix={arXiv},\n      primaryClass={cs.CL}\n}\n``````\n\n\n\n## 星标历史\n\n[![星标历史图表](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FTebmer_Awesome-Knowledge-Distillation-of-LLMs_readme_b95f7dc1bd0e.png)](https:\u002F\u002Fstar-history.com\u002F#Tebmer\u002FAwesome-Knowledge-Distillation-of-LLMs&Date)","# Awesome-Knowledge-Distillation-of-LLMs 快速上手指南\n\n**Awesome-Knowledge-Distillation-of-LLMs** 并非一个可直接安装运行的软件库或框架，而是一个**精选论文与资源合集**。它旨在帮助开发者系统性地了解大语言模型（LLM）知识蒸馏（Knowledge Distillation, KD）领域的最新研究、算法分类及应用场景。\n\n本指南将指导你如何高效利用该仓库获取核心资源、复现经典算法及构建自己的蒸馏方案。\n\n## 环境准备\n\n由于本仓库主要提供论文列表、代码链接和数据集引用，无需特定的系统环境即可浏览内容。但若要复现其中列出的算法，建议准备以下基础环境：\n\n*   **操作系统**: Linux (推荐 Ubuntu 20.04+) 或 macOS\n*   **Python**: 3.8 或更高版本\n*   **深度学习框架**: PyTorch (主流选择) 或 TensorFlow\n*   **依赖管理**: `pip` 或 `conda`\n*   **GPU**: 推荐 NVIDIA GPU (显存根据学生模型大小而定，通常 16GB+ 适合大多数蒸馏实验)\n*   **网络工具**: `git`, `wget` 或 `curl` (用于下载代码和数据)\n\n> **提示**: 具体每个论文的复现环境请参考其对应的 GitHub 仓库（表中 \"Code\" 列），不同算法对 CUDA 版本和特定库（如 `transformers`, `deepspeed`）的要求可能不同。\n\n## 获取与安装步骤\n\n本仓库的使用方式是“克隆”以获取最新的论文列表和索引，而非通过包管理器安装。\n\n### 1. 克隆仓库\n使用 Git 将仓库克隆到本地，以便随时查看更新（该仓库每周更新）。\n\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002FHKUNLP\u002FAwesome-Knowledge-Distillation-of-LLMs.git\ncd Awesome-Knowledge-Distillation-of-LLMs\n```\n\n### 2. 访问核心综述论文\n强烈建议先阅读团队发布的综述论文，以建立完整的知识体系。\n*   **论文标题**: A Survey on Knowledge Distillation of Large Language Models\n*   **ArXiv 链接**: [https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.13116](https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.13116)\n*   **国内加速访问**: 若 ArXiv 访问缓慢，可尝试通过国内镜像站（如 [arXiv.org.cn](https:\u002F\u002Farxiv.org.cn)）搜索论文 ID `2402.13116`。\n\n### 3. 定位目标算法代码\n在 `README.md` 文件中，根据需求查找对应的论文条目。表格中提供了直接的 **Code** (GitHub) 和 **Data** (HuggingFace\u002FData) 链接。\n\n例如，若要复现 \"Distilling Step-by-Step\" 算法：\n```bash\n# 直接从论文对应的 GitHub 仓库克隆代码\ngit clone https:\u002F\u002Fgithub.com\u002Fgoogle-research\u002Fdistilling-step-by-step.git\ncd distilling-step-by-step\n\n# 安装该特定项目所需的依赖 (参考其 requirements.txt)\npip install -r requirements.txt\n```\n\n## 基本使用流程\n\n使用该资源库的核心流程是：**查阅分类 -> 定位论文 -> 复现代码**。\n\n### 第一步：确定蒸馏方向\n根据 `README` 中的分类体系，找到你的研究或应用方向：\n\n1.  **KD Algorithms (核心算法)**:\n    *   **Knowledge Elicitation (知识提取)**: 如何从教师模型获取知识（如：Labeling, Chain-of-Thought, Self-Knowledge）。\n    *   **Distillation Algorithms (蒸馏算法)**: 如何将知识注入学生模型（如：SFT, Divergence\u002FSimilarity, RL, Rank Optimization）。\n2.  **Skill Distillation (技能蒸馏)**: 针对特定能力优化，如指令遵循 (Instruction Following)、推理 (Reasoning)、多轮对话、代码生成等。\n3.  **Verticalization Distillation (垂直领域蒸馏)**: 针对法律、医疗、金融、科学等特定行业的模型蒸馏。\n\n### 第二步：筛选与复现示例\n假设你想让一个小模型学习大模型的**思维链推理能力 (Chain-of-Thought)**：\n\n1.  在 `README` 的 **[Knowledge Elicitation] -> [Labeling]** 或 **[Skill Distillation] -> [Context Following]** 部分查找相关论文。\n2.  找到论文：**\"Aligning Large and Small Language Models via Chain-of-Thought Reasoning\"** (EACL 2024)。\n3.  点击表格中的 `[Github]` 链接进入项目页面。\n4.  按照该项目具体的 `README` 进行训练。典型命令结构如下（仅为示例，具体以项目为准）：\n\n```bash\n# 示例：运行蒸馏训练脚本\npython train_distill.py \\\n    --teacher_model_name_or_path \"meta-llama\u002FLlama-2-70b-hf\" \\\n    --student_model_name_or_path \"meta-llama\u002FLlama-2-7b-hf\" \\\n    --dataset_path \"path\u002Fto\u002Fcot_dataset\" \\\n    --output_dir \".\u002Fdistilled_cot_model\" \\\n    --per_device_train_batch_size 4 \\\n    --gradient_accumulation_steps 4\n```\n\n### 第三步：数据准备与合规性检查\n*   **数据获取**: 许多论文提供了生成的数据集链接（见表格 \"Data\" 列），可直接通过 HuggingFace `datasets` 库加载。\n*   **法律合规**: ⚠️ **重要提示**。在使用 ChatGPT、Llama 等模型输出作为蒸馏数据时，务必遵守原模型提供商的服务条款（Terms of Use）。\n    *   例如：OpenAI 对使用其输出开发竞争性模型有限制；Meta Llama 系列有特定的许可证约束。\n    *   请在使用前仔细阅读各模型官方的 License 文件。\n\n## 贡献与更新\n该仓库由香港大学、马里兰大学、微软等机构的研究者共同维护，每周更新。\n*   **追踪更新**: 点击仓库右上角的 **Star ⭐️** 按钮。\n*   **提交论文**: 若发现遗漏的重要论文，可通过提交 **Issue\u002FPR** 或发送邮件至维护者（如 `shawnxxh@gmail.com`）进行补充。","某医疗科技初创团队试图将 GPT-4 级别的临床问诊能力迁移到本地部署的轻量级模型中，以满足数据隐私合规要求并降低推理成本。\n\n### 没有 Awesome-Knowledge-Distillation-of-LLMs 时\n- **技术路线迷茫**：团队在海量论文中难以筛选出适合“垂直领域（医疗）”的知识蒸馏算法，缺乏系统性的分类指引，导致试错成本极高。\n- **能力迁移低效**：仅简单模仿输出结果，忽略了“技能蒸馏”机制，使得小模型虽能对话但缺乏深层医学逻辑推理能力，误诊风险高。\n- **数据增强缺失**：不懂得利用大模型生成高质量的合成数据来扩充稀缺的医疗语料，导致小模型训练数据不足，泛化能力差。\n- **法律合规隐患**：对使用 proprietary 模型（如 GPT-4）输出进行训练的版权和条款限制认识模糊，面临潜在的法律纠纷风险。\n\n### 使用 Awesome-Knowledge-Distillation-of-LLMs 后\n- **精准定位方案**：依托其清晰的“算法 - 技能 - 垂直化”三维分类体系，团队迅速锁定了针对医疗垂直领域的最新蒸馏论文，研发周期缩短 50%。\n- **深度技能复刻**：参考“技能蒸馏”板块，成功将大模型的诊断思维链（Chain-of-Thought）迁移至小模型，显著提升了复杂病例的分析准确率。\n- **数据质量飞跃**：利用收录的数据增强（DA）结合 KD 的前沿范式，生成了大量上下文丰富且符合医学伦理的训练数据，解决了数据饥渴问题。\n- **规避法律风险**：通过仓库中的法律声明提示，团队严格遵循了源模型的使用条款，确保了商业化落地的合规性与安全性。\n\nAwesome-Knowledge-Distillation-of-LLMs 不仅是一张论文清单，更是连接顶尖大模型能力与低成本落地应用的关键桥梁，让中小企业也能安全、高效地拥有专属的行业大模型。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FTebmer_Awesome-Knowledge-Distillation-of-LLMs_6c7847ee.png","Tebmer","Shawn Xu","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002FTebmer_e8404d25.jpg","PhD in CS at HKU",null,"shawnxxh","https:\u002F\u002Fgithub.com\u002FTebmer",1270,72,"2026-03-30T12:18:11",1,"","未说明",{"notes":89,"python":87,"dependencies":90},"该项目是一个关于大语言模型知识蒸馏的论文综述列表（Awesome List），并非可执行的软件工具或代码库，因此 README 中未提供具体的运行环境、硬件需求或依赖库信息。用户需参考列表中各篇具体论文及其关联的代码仓库以获取相应的环境配置要求。",[],[13,26,60],[93,94,95,96,97,98,99,100,101,102,103,104,105,106,107],"data-augmentation","instruction-following","kd","knowledge-distillation","large-language-model","llm","self-training","survey","compression","data-synthesis","feedback","multi-modal","self-distillation","alignment","supervised-finetuning","2026-03-27T02:49:30.150509","2026-04-06T14:06:49.905464",[],[]]