[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-csarron--awesome-emdl":3,"tool-csarron--awesome-emdl":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",160411,2,"2026-04-18T23:33:24",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",109154,"2026-04-18T11:18:24",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[52,13,15,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":32,"last_commit_at":59,"category_tags":60,"status":17},4721,"markitdown","microsoft\u002Fmarkitdown","MarkItDown 是一款由微软 AutoGen 团队打造的轻量级 Python 工具，专为将各类文件高效转换为 Markdown 格式而设计。它支持 PDF、Word、Excel、PPT、图片（含 OCR）、音频（含语音转录）、HTML 乃至 YouTube 链接等多种格式的解析，能够精准提取文档中的标题、列表、表格和链接等关键结构信息。\n\n在人工智能应用日益普及的今天，大语言模型（LLM）虽擅长处理文本，却难以直接读取复杂的二进制办公文档。MarkItDown 恰好解决了这一痛点，它将非结构化或半结构化的文件转化为模型“原生理解”且 Token 效率极高的 Markdown 格式，成为连接本地文件与 AI 分析 pipeline 的理想桥梁。此外，它还提供了 MCP（模型上下文协议）服务器，可无缝集成到 Claude Desktop 等 LLM 应用中。\n\n这款工具特别适合开发者、数据科学家及 AI 研究人员使用，尤其是那些需要构建文档检索增强生成（RAG）系统、进行批量文本分析或希望让 AI 助手直接“阅读”本地文件的用户。虽然生成的内容也具备一定可读性，但其核心优势在于为机器",93400,"2026-04-06T19:52:38",[52,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":72,"owner_name":73,"owner_avatar_url":74,"owner_bio":75,"owner_company":76,"owner_location":77,"owner_email":78,"owner_twitter":79,"owner_website":80,"owner_url":81,"languages":78,"stars":82,"forks":83,"last_commit_at":84,"license":85,"difficulty_score":86,"env_os":87,"env_gpu":88,"env_ram":88,"env_deps":89,"category_tags":92,"github_topics":93,"view_count":32,"oss_zip_url":78,"oss_zip_packed_at":78,"status":17,"created_at":105,"updated_at":106,"faqs":107,"releases":108},9398,"csarron\u002Fawesome-emdl","awesome-emdl","Embedded and mobile deep learning research resources","awesome-emdl 是一个专注于嵌入式与移动端深度学习研究的开源资源合集。随着人工智能向边缘设备延伸，如何在算力有限、内存受限的微控制器和手机上高效运行深度模型，成为行业一大难题。awesome-emdl 正是为解决这一痛点而生，它系统性地整理了该领域的前沿学术论文、轻量级模型架构以及底层系统优化方案。\n\n这份清单不仅涵盖了模型压缩、神经架构搜索（NAS）等综述文章，还收录了 MobileNet、GhostNet、MCUNet 等经典的轻量化网络，以及针对 FPGA 加速和随机计算等底层系统的最新研究成果。对于希望深入理解 TinyML 技术栈的研究人员，或是需要在实际项目中部署高效 AI 模型的嵌入式开发者而言，awesome-emdl 提供了一条清晰的学习路径和权威的参考文献索引。它帮助用户快速定位从理论综述到代码实现的关键资源，避免了在海量信息中盲目摸索。无论是学术探索还是工程落地，awesome-emdl 都是进入嵌入式深度学习领域不可或缺的导航工具。","# Awesome EMDL\n\nEmbedded and mobile deep learning research notes.\n\n## Papers\n\n### Survey\n\n1. [EfficientDNNs](https:\u002F\u002Fgithub.com\u002FMingSun-Tse\u002FEfficientDNNs) [Repo]\n1. [Awesome ML Model Compression](https:\u002F\u002Fgithub.com\u002Fcedrickchee\u002Fawesome-ml-model-compression) [Repo]\n1. [TinyML Papers and Projects](https:\u002F\u002Fgithub.com\u002Fgigwegbe\u002Ftinyml-papers-and-projects) [Repo]\n1. [TinyML Platforms Benchmarking](https:\u002F\u002Farxiv.org\u002Fabs\u002F2112.01319) [arXiv '21]\n1. [TinyML: A Systematic Review and Synthesis of Existing Research](https:\u002F\u002Fieeexplore.ieee.org\u002Fabstract\u002Fdocument\u002F9722636) [ICAIIC '21]\n1. [TinyML Meets IoT: A Comprehensive Survey](https:\u002F\u002Fwww.sciencedirect.com\u002Fscience\u002Farticle\u002Fabs\u002Fpii\u002FS2542660521001025) [Internet of Things '21]\n1. [A review on TinyML: State-of-the-art and prospects](https:\u002F\u002Fwww.sciencedirect.com\u002Fscience\u002Farticle\u002Fpii\u002FS1319157821003335) [Journal of King Saud Univ. '21]\n1. [TinyML Benchmark: Executing Fully Connected Neural Networks on Commodity Microcontrollers](https:\u002F\u002Faran.library.nuigalway.ie\u002Fhandle\u002F10379\u002F16770) [IEEE '21]\n1. [Efficient Deep Learning: A Survey on Making Deep Learning Models Smaller, Faster, and Better](https:\u002F\u002Farxiv.org\u002Fabs\u002F2106.08962) [arXiv '21]\n1. [Benchmarking TinyML Systems: Challenges and Direction](https:\u002F\u002Farxiv.org\u002Fabs\u002F2003.04821) [arXiv '20]\n1. [Model Compression and Hardware Acceleration for Neural Networks: A Comprehensive Survey](https:\u002F\u002Fieeexplore.ieee.org\u002Fabstract\u002Fdocument\u002F9043731) [IEEE '20]\n1. [The Deep Learning Compiler: A Comprehensive Survey](https:\u002F\u002Farxiv.org\u002Fabs\u002F2002.03794) [arXiv '20]\n1. [Recent Advances in Efficient Computation of Deep Convolutional Neural Networks](https:\u002F\u002Farxiv.org\u002Fabs\u002F1802.00939) [arXiv '18]\n1. [A Survey of Model Compression and Acceleration for Deep Neural Networks](https:\u002F\u002Farxiv.org\u002Fabs\u002F1710.09282) [arXiv '17]\n\n### Model\n\n1. [EtinyNet: Extremely Tiny Network for TinyML](https:\u002F\u002Fwww.aaai.org\u002FAAAI22Papers\u002FAAAI-4889.XuK.pdf) [AAAI '21]\n1. [MCUNetV2: Memory-Efficient Patch-based Inference for Tiny Deep Learning](https:\u002F\u002Farxiv.org\u002Fabs\u002F2110.15352) [NeurIPS '21, MIT]\n1. [SkyNet: a Hardware-Efficient Method for Object Detection and Tracking on Embedded Systems](https:\u002F\u002Fproceedings.mlsys.org\u002Fpapers\u002F2020\u002F86) [MLSys '20, IBM]\n1. [Model Rubik's Cube: Twisting Resolution, Depth and Width for TinyNets](https:\u002F\u002Farxiv.org\u002Fabs\u002F2010.14819) [NeurIPS '20, Huawei]\n1. [MCUNet: Tiny Deep Learning on IoT Devices](https:\u002F\u002Farxiv.org\u002Fabs\u002F2007.10319) [NeurIPS '20, MIT]\n1. [GhostNet: More Features from Cheap Operations](https:\u002F\u002Farxiv.org\u002Fabs\u002F1911.11907) [CVPR '20, Huawei]\n1. [MicroNet for Efficient Language Modeling](https:\u002F\u002Farxiv.org\u002Fabs\u002F2005.07877) [NeurIPS '19, MIT]\n1. [Searching for MobileNetV3](https:\u002F\u002Farxiv.org\u002Fabs\u002F1905.02244) [ICCV '19, Google]\n1. [MobilenetV2: Inverted Residuals and Linear Bottlenecks: Mobile Networks for\nClassification, Detection and Segmentation](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1801.04381.pdf) [CVPR '18, Google]\n1. [ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware](https:\u002F\u002Farxiv.org\u002Fabs\u002F1812.00332) [arXiv '18, MIT]\n1. [DeepRebirth: Accelerating Deep Neural Network Execution on Mobile Devices](https:\u002F\u002Farxiv.org\u002Fabs\u002F1708.04728) [AAAI'18, Samsung]\n1. [NasNet: Learning Transferable Architectures for Scalable Image Recognition](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1707.07012.pdf) [arXiv '17, Google]\n1. [ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices](https:\u002F\u002Farxiv.org\u002Fabs\u002F1707.01083) [arXiv '17, Megvii]\n1. [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications](https:\u002F\u002Farxiv.org\u002Fabs\u002F1704.04861) [arXiv '17, Google]\n1. [CondenseNet: An Efficient DenseNet using Learned Group Convolutions](https:\u002F\u002Farxiv.org\u002Fabs\u002F1711.09224) [arXiv '17]\n\n### System\n\n1. [BSC: Block-based Stochastic Computing to Enable Accurate and Efficient TinyML](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2111.06686.pdf?ref=https:\u002F\u002Fgithubhelp.com) [ASP-DAC '22]\n1. [CFU Playground: Full-Stack Open-Source Framework for Tiny Machine Learning (tinyML) Acceleration on FPGAs](https:\u002F\u002Farxiv.org\u002Fabs\u002F2201.01863) [arXiv '22, Google]\n1. [UDC: Unified DNAS for Compressible TinyML Models](https:\u002F\u002Farxiv.org\u002Fabs\u002F2201.05842) [arXiv '22, Arm]\n1. [AnalogNets: ML-HW Co-Design of Noise-robust TinyML Models and Always-On Analog Compute-in-Memory Accelerator](https:\u002F\u002Farxiv.org\u002Fabs\u002F2111.06503) [arXiv '21, Arm]\n1. [TinyTL: Reduce Activations, Not Trainable Parameters for Efficient On-Device Learning](https:\u002F\u002Farxiv.org\u002Fabs\u002F2007.11622) [NeurIPS '20, MIT]\n1. [Once for All: Train One Network and Specialize it for Efficient Deployment](https:\u002F\u002Farxiv.org\u002Fabs\u002F1908.09791) [ICLR '20, MIT]\n1. [DeepMon: Mobile GPU-based Deep Learning Framework for Continuous Vision Applications](https:\u002F\u002Fwww.sigmobile.org\u002Fmobisys\u002F2017\u002Faccepted.php) [MobiSys '17]\n1. [DeepEye: Resource Efficient Local Execution of Multiple Deep Vision Models using Wearable Commodity Hardware](http:\u002F\u002Ffahim-kawsar.net\u002Fpapers\u002FMathur.MobiSys2017-Camera.pdf) [MobiSys '17]\n1. [MobiRNN: Efficient Recurrent Neural Network Execution on Mobile GPU](https:\u002F\u002Farxiv.org\u002Fabs\u002F1706.00878) [EMDL '17]\n1. [fpgaConvNet: A Toolflow for Mapping Diverse Convolutional Neural Networks on Embedded FPGAs](https:\u002F\u002Farxiv.org\u002Fabs\u002F1711.08740) [NIPS '17]\n1. [DeepSense: A GPU-based deep convolutional neural network framework on commodity mobile devices](http:\u002F\u002Fink.library.smu.edu.sg\u002Fcgi\u002Fviewcontent.cgi?article=4278&context=sis_research) [WearSys '16]\n1. [DeepX: A Software Accelerator for Low-Power Deep Learning Inference on Mobile Devices](http:\u002F\u002Fniclane.org\u002Fpubs\u002Fdeepx_ipsn.pdf) [IPSN '16]\n1. [EIE: Efficient Inference Engine on Compressed Deep Neural Network](https:\u002F\u002Farxiv.org\u002Fabs\u002F1602.01528) [ISCA '16]\n1. [MCDNN: An Approximation-Based Execution Framework for Deep Stream Processing Under Resource Constraints](http:\u002F\u002Fhaneul.github.io\u002Fpapers\u002Fmcdnn.pdf) [MobiSys '16]\n1. [DXTK: Enabling Resource-efficient Deep Learning on Mobile and Embedded Devices with the DeepX Toolkit](http:\u002F\u002Fniclane.org\u002Fpubs\u002Fdxtk_mobicase.pdf) [MobiCASE '16]\n1. [Sparsification and Separation of Deep Learning Layers for Constrained Resource Inference on Wearables](http:\u002F\u002Fniclane.org\u002Fpubs\u002Fsparsesep_sensys.pdf) [SenSys ’16]\n1. [An Early Resource Characterization of Deep Learning on Wearables, Smartphones and Internet-of-Things Devices](http:\u002F\u002Fniclane.org\u002Fpubs\u002Fiotapp15_early.pdf) [IoT-App ’15]\n1. [CNNdroid: GPU-Accelerated Execution of Trained Deep Convolutional Neural Networks on Android](https:\u002F\u002Farxiv.org\u002Fabs\u002F1511.07376) [MM '16]\n\n### Quantization\n\n1. [Quantizing deep convolutional networks for efficient inference: A whitepaper](https:\u002F\u002Farxiv.org\u002Fabs\u002F1806.08342) [arXiv '18]\n1. [LQ-Nets: Learned Quantization for Highly Accurate and Compact Deep Neural Networks](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1807.10029.pdf) [ECCV'18]\n1. [Training and Inference with Integers in Deep Neural Networks](https:\u002F\u002Fopenreview.net\u002Fforum?id=HJGXzmspb) [ICLR'18]\n1. [The ZipML Framework for Training Models with End-to-End Low Precision: The Cans, the Cannots, and a Little Bit of Deep Learning](https:\u002F\u002Farxiv.org\u002Fabs\u002F1611.05402) [ICML'17]\n1. [Loss-aware Binarization of Deep Networks](https:\u002F\u002Farxiv.org\u002Fabs\u002F1611.01600) [ICLR'17]\n1. [Towards the Limit of Network Quantization](https:\u002F\u002Farxiv.org\u002Fabs\u002F1612.01543) [ICLR'17]\n1. [Deep Learning with Low Precision by Half-wave Gaussian Quantization](https:\u002F\u002Farxiv.org\u002Fabs\u002F1702.00953) [CVPR'17]\n1. [ShiftCNN: Generalized Low-Precision Architecture for Inference of Convolutional Neural Networks](https:\u002F\u002Farxiv.org\u002Fabs\u002F1706.02393) [arXiv'17]\n1. [Quantized Convolutional Neural Networks for Mobile Devices](https:\u002F\u002Farxiv.org\u002Fabs\u002F1512.06473) [CVPR '16]\n1. [Fixed-Point Performance Analysis of Recurrent Neural Networks](https:\u002F\u002Farxiv.org\u002Fabs\u002F1512.01322) [ICASSP'16]\n1. [Quantized Neural Networks: Training Neural Networks with Low Precision Weights and Activations](https:\u002F\u002Farxiv.org\u002Fabs\u002F1609.07061) [arXiv'16]\n1. [Compressing Deep Convolutional Networks using Vector Quantization](https:\u002F\u002Farxiv.org\u002Fabs\u002F1412.6115) [arXiv'14]\n\n### Pruning\n\n1. [Awesome-Pruning](https:\u002F\u002Fgithub.com\u002Fhe-y\u002FAwesome-Pruning) [Repo]\n1. [Filter Pruning via Geometric Median for Deep Convolutional Neural Networks Acceleration](https:\u002F\u002Farxiv.org\u002Fabs\u002F1811.00250) [CVPR'19]\n1. [To prune, or not to prune: exploring the efficacy of pruning for model compression](https:\u002F\u002Farxiv.org\u002Fabs\u002F1710.01878) [ICLR'18]\n1. [Pruning Filters for Efficient ConvNets](https:\u002F\u002Farxiv.org\u002Fabs\u002F1608.08710) [ICLR'17]\n1. [Pruning Convolutional Neural Networks for Resource Efficient Inference](https:\u002F\u002Farxiv.org\u002Fabs\u002F1611.06440) [ICLR'17]\n1. [Soft Weight-Sharing for Neural Network Compression](https:\u002F\u002Farxiv.org\u002Fabs\u002F1702.04008) [ICLR'17]\n1. [Designing Energy-Efficient Convolutional Neural Networks using Energy-Aware Pruning](https:\u002F\u002Farxiv.org\u002Fabs\u002F1611.05128) [CVPR'17]\n1. [ThiNet: A Filter Level Pruning Method for Deep Neural Network Compression](https:\u002F\u002Farxiv.org\u002Fabs\u002F1707.06342) [ICCV'17]\n1. [Deep Compression: Compressing Deep Neural Networks with Pruning, Trained Quantization and Huffman Coding](https:\u002F\u002Farxiv.org\u002Fabs\u002F1510.00149) [ICLR'16]\n1. [Dynamic Network Surgery for Efficient DNNs](https:\u002F\u002Farxiv.org\u002Fabs\u002F1608.04493) [NIPS'16]\n1. [Learning both Weights and Connections for Efficient Neural Networks](https:\u002F\u002Farxiv.org\u002Fabs\u002F1506.02626) [NIPS'15]\n\n### Approximation\n\n1. [High performance ultra-low-precision convolutions on mobile devices](https:\u002F\u002Farxiv.org\u002Fabs\u002F1712.02427) [NIPS'17]\n1. [Compression of Deep Convolutional Neural Networks for Fast and Low Power Mobile Applications](https:\u002F\u002Farxiv.org\u002Fabs\u002F1511.06530) [ICLR'16]\n1. [Efficient and Accurate Approximations of Nonlinear Convolutional Networks](https:\u002F\u002Farxiv.org\u002Fabs\u002F1411.4229) [CVPR'15]\n1. [Accelerating Very Deep Convolutional Networks for Classification and Detection](https:\u002F\u002Farxiv.org\u002Fabs\u002F1505.06798) (Extended version of above one)\n1. [Convolutional neural networks with low-rank regularization](https:\u002F\u002Farxiv.org\u002Fabs\u002F1511.06067) [arXiv'15]\n1. [Exploiting Linear Structure Within Convolutional Networks for Efficient Evaluation](https:\u002F\u002Farxiv.org\u002Fabs\u002F1404.0736) [NIPS'14]\n\n### Characterization\n\n1. [A First Look at Deep Learning Apps on Smartphones](https:\u002F\u002Farxiv.org\u002Fabs\u002F1812.05448) [WWW'19]\n1.  [Machine Learning at Facebook: Understanding Inference at the Edge](https:\u002F\u002Fresearch.fb.com\u002Fpublications\u002Fmachine-learning-at-facebook-understanding-inference-at-the-edge\u002F) [HPCA'19]\n1. [NetAdapt: Platform-Aware Neural Network Adaptation for Mobile Applications](https:\u002F\u002Farxiv.org\u002Fabs\u002F1804.03230) [ECCV 2018]\n1. [Latency and Throughput Characterization of Convolutional Neural Networks for Mobile Computer Vision](https:\u002F\u002Farxiv.org\u002Fabs\u002F1803.09492) [MMSys’18]\n\n## Libraries\n\n### Inference Framework\n\n1. [Alibaba - MNN](https:\u002F\u002Fgithub.com\u002Falibaba\u002FMNN) - is a blazing fast, lightweight deep learning framework, battle-tested by business-critical use cases in Alibaba.\n1. [Apple - CoreML](https:\u002F\u002Fdeveloper.apple.com\u002Fdocumentation\u002Fcoreml) - is integrate machine learning models into your app. [BERT and GPT-2 on iPhone](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fswift-coreml-transformers)\n1. [Arm - ComputeLibrary](https:\u002F\u002Fgithub.com\u002FARM-software\u002FComputeLibrary) - is a set of computer vision and machine learning functions optimised for both Arm CPUs and GPUs using SIMD technologies. [Intro](https:\u002F\u002Fdeveloper.arm.com\u002Ftechnologies\u002Fcompute-library)\n1. [Arm - Arm NN](https:\u002F\u002Fgithub.com\u002FARM-software\u002Farmnn) - is the most performant machine learning (ML) inference engine for Android and Linux, accelerating ML on Arm Cortex-A CPUs and Arm Mali GPUs.\n1. [Baidu - Paddle Lite](https:\u002F\u002Fgithub.com\u002FPaddlePaddle\u002FPaddle-Lite) - is multi-platform high performance deep learning inference engine.\n1. [DeepLearningKit](https:\u002F\u002Fgithub.com\u002FDeepLearningKit\u002FDeepLearningKit) - is Open Source Deep Learning Framework for Apple's iOS, OS X and tvOS.\n1. [Edge Impulse](https:\u002F\u002Fedgeimpulse.com) - Interactive platform to generate models that can run in microcontrollers. They are also quite active on social netwoks talking about recent news on EdgeAI\u002FTinyML.\n1. [Google - TensorFlow Lite](https:\u002F\u002Fwww.tensorflow.org\u002Flite\u002Fperformance\u002Fgpu) - is an open source deep learning framework for on-device inference.\n1. [Intel - OpenVINO](https:\u002F\u002Fgithub.com\u002Fopenvinotoolkit\u002Fopenvino) - Comprehensive toolkit to optimize your processes for faster inference.\n1. [JDAI Computer Vision - dabnn](https:\u002F\u002Fgithub.com\u002FJDAI-CV\u002Fdabnn) - is an accelerated binary neural networks inference framework for mobile platform.\n1. [Meta - PyTorch Mobile](https:\u002F\u002Fpytorch.org\u002Fmobile\u002Fhome) - is a new framework for helping mobile developers and machine learning engineers embed PyTorch ML models on-device.\n1. [Microsoft - DeepSpeed](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FDeepSpeed) - is a deep learning optimization library that makes distributed training and inference easy, efficient, and effective.\n1. [Microsoft - ELL](https:\u002F\u002Fgithub.com\u002FMicrosoft\u002FELL) - allows you to design and deploy intelligent machine-learned models onto resource constrained platforms and small single-board computers, like Raspberry Pi, Arduino, and micro:bit.\n1. [Microsoft - ONNX RUntime](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Fonnxruntime) - cross-platform, high performance ML inferencing and training accelerator.\n1. [Nvidia - TensorRT](https:\u002F\u002Fgithub.com\u002FNVIDIA\u002FTensorRT) - is a C++ library for high performance inference on NVIDIA GPUs and deep learning accelerators.\n1. [OAID - Tengine](https:\u002F\u002Fgithub.com\u002FOAID\u002FTengine) - is a lite, high performance, modular inference engine for embedded device\n1. [Qualcomm - Neural Processing SDK for AI](https:\u002F\u002Fdeveloper.qualcomm.com\u002Fsoftware\u002Fqualcomm-neural-processing-sdk) - Libraries to developers run NN models on Snapdragon mobile platforms taking advantage of the CPU, GPU and\u002For DSP.\n1. [Tencent - ncnn](https:\u002F\u002Fgithub.com\u002FTencent\u002Fncnn) - is a high-performance neural network inference framework optimized for the mobile platform.\n1. [uTensor](https:\u002F\u002Fgithub.com\u002FuTensor\u002FuTensor) - AI inference library based on mbed (an RTOS for ARM chipsets) and TensorFlow.\n1. [XiaoMi - Mace](https:\u002F\u002Fgithub.com\u002FXiaoMi\u002Fmace) - is a deep learning inference framework optimized for mobile heterogeneous computing platforms. \n1. [xmartlabs - Bender](https:\u002F\u002Fgithub.com\u002Fxmartlabs\u002FBender) - Easily craft fast Neural Networks on iOS! Use TensorFlow models. Metal under the hood.\n\n### Optimization Tools\n\n1. [Neural Network Distiller](https:\u002F\u002Fgithub.com\u002FNervanaSystems\u002Fdistiller) -  Python package for neural network compression research.\n1. [PocketFlow](https:\u002F\u002Fgithub.com\u002FTencent\u002FPocketFlow) - An Automatic Model Compression (AutoMC) framework for developing smaller and faster AI applications.\n\n### Research Demos\n\n1. [RSTensorFlow](https:\u002F\u002Fnesl.github.io\u002FRSTensorFlow) - GPU Accelerated TensorFlow for Commodity Android Devices.\n\n### Web\n\n1. [mil-tokyo\u002Fwebdnn](https:\u002F\u002Fgithub.com\u002Fmil-tokyo\u002Fwebdnn) - Fastest DNN Execution Framework on Web Browser.\n\n## General\n\n1. [Caffe2 AICamera](https:\u002F\u002Fgithub.com\u002Fbwasti\u002FAICamera)\n1. [TensorFlow Android Camera Demo](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Ftensorflow\u002Ftree\u002Fmaster\u002Ftensorflow\u002Fexamples\u002Fandroid)\n1. [TensorFlow iOS Example](https:\u002F\u002Fgithub.com\u002Fhollance\u002FTensorFlow-iOS-Example)\n1. [TensorFlow OpenMV Camera Module](https:\u002F\u002Fgithub.com\u002Fopenmv\u002Fopenmv)\n\n\n### Edge \u002F Tiny MLOps\n\n1. [Tiny-MLOps: a framework for orchestrating ML applications at the far edge of IoT systems](https:\u002F\u002Fieeexplore.ieee.org\u002Fabstract\u002Fdocument\u002F9787703\u002Fauthors#authors) [EAIS '22]\n1. [MLOps for TinyML: Challenges & Directions in Operationalizing TinyML at Scale](https:\u002F\u002Fcms.tinyml.org\u002Fwp-content\u002Fuploads\u002Ftalks2022\u002FtinyML_Talks_Vijay_Janapa_Reddi_220524.pdf) [TinyML Talks '22]\n1. [TinyMLOps: Operational Challenges for Widespread Edge AI Adoption](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2203.10923.pdf) [arXiv '22]\n1. [A TinyMLaaS Ecosystem for Machine Learning in IoT: Overview and Research Challenges](https:\u002F\u002Fieeexplore.ieee.org\u002Fdocument\u002F9427352) [VLSI-DAT '21]\n1. [SOLIS: The MLOps journey from data acquisition to actionable insights](https:\u002F\u002Farxiv.org\u002Fabs\u002F2112.11925) [arXiv '21]\n1. [Edge MLOps: An Automation Framework for AIoT Applications](https:\u002F\u002Fwww.computer.org\u002Fcsdl\u002Fproceedings-article\u002Fic2e\u002F2021\u002F497000a191\u002F1yJZ8cHPTkQ) [IC2E '21]\n1. [SensiX++: Bringing MLOPs and Multi-tenant Model Serving to Sensory Edge Devices](https:\u002F\u002Farxiv.org\u002Fabs\u002F2109.03947) [arXiv '21, Nokia]\n\n### Vulkan\n\n1. [Vulkan API Examples and Demos](https:\u002F\u002Fgithub.com\u002FSaschaWillems\u002FVulkan)\n1. [Neural Machine Translation on Android](https:\u002F\u002Fgithub.com\u002Fharvardnlp\u002Fnmt-android)\n\n### OpenCL\n\n1. [DeepMon](https:\u002F\u002Fgithub.com\u002FJC1DA\u002FDeepMon)\n\n### RenderScript\n\n1. [Mobile_ConvNet: RenderScript CNN for Android](https:\u002F\u002Fgithub.com\u002Fmtmd\u002FMobile_ConvNet)\n\n## Tutorials\n\n### General\n\n1. [Squeezing Deep Learning Into Mobile Phones](https:\u002F\u002Fwww.slideshare.net\u002Fanirudhkoul\u002Fsqueezing-deep-learning-into-mobile-phones)\n1. [Deep Learning – Tutorial and Recent Trends](https:\u002F\u002Fwww.dropbox.com\u002Fs\u002Fp7lvelt0aihrwtl\u002FFPGA%2717%20tutorial%20Song%20Han.pdf?dl=0)\n1. [Tutorial on Hardware Architectures for Deep Neural Networks](http:\u002F\u002Feyeriss.mit.edu\u002Ftutorial.html)\n1. [Efficient Convolutional Neural Network Inference on Mobile GPUs](https:\u002F\u002Fwww.slideshare.net\u002Fembeddedvision\u002Fefficient-convolutional-neural-network-inference-on-mobile-gpus-a-presentation-from-imagination-technologies)\n\n### NEON\n\n1. [NEON™ Programmer’s Guide](https:\u002F\u002Fdeveloper.arm.com\u002Fdocs\u002Fden0018\u002Flatest\u002Fneontm-version-10-programmers-guide)\n\n### OpenCL\n\n1. [ARM® Mali™ GPU OpenCL Developer Guide](http:\u002F\u002Finfocenter.arm.com\u002Fhelp\u002Findex.jsp?topic=\u002Fcom.arm.doc.100614_0303_00_en\u002Fada1432742770595.html), [pdf](http:\u002F\u002Finfocenter.arm.com\u002Fhelp\u002Ftopic\u002Fcom.arm.doc.100614_0303_00_en\u002Farm_mali_gpu_opencl_developer_guide_100614_0303_00_en.pdf)\n1. [Optimal Compute on ARM Mali™ GPUs](http:\u002F\u002Fwww.cs.bris.ac.uk\u002Fhome\u002Fsimonm\u002Fmontblanc\u002FOpenCL_on_Mali.pdf)\n1. [GPU Compute for Mobile Devices](http:\u002F\u002Fwww.iwocl.org\u002Fwp-content\u002Fuploads\u002Fiwocl-2014-workshop-Tim-Hartley.pdf)\n1. [Compute for Mobile Devices Performance focused](http:\u002F\u002Fkesen.realtimerendering.com\u002FCompute_for_Mobile_Devices5.pdf)\n1. [Hands On OpenCL](https:\u002F\u002Fhandsonopencl.github.io\u002F)\n1. [Adreno OpenCL Programming Guide](https:\u002F\u002Fdeveloper.qualcomm.com\u002Fdownload\u002Fadrenosdk\u002Fadreno-opencl-programming-guide.pdf)\n1. [Better OpenCL Performance on Qualcomm Adreno GPU](https:\u002F\u002Fdeveloper.qualcomm.com\u002Fblog\u002Fbetter-opencl-performance-qualcomm-adreno-gpu-memory-optimization)\n\n## Courses\n\n1. [UW Deep learning **systems**](http:\u002F\u002Fdlsys.cs.washington.edu\u002Fschedule)\n1. [Berkeley Machine Learning Systems](https:\u002F\u002Fucbrise.github.io\u002Fcs294-ai-sys-fa19\u002F)\n\n\n## Tools\n\n### GPU\n\n1. [Bifrost GPU architecture and ARM Mali-G71 GPU](https:\u002F\u002Fwww.hotchips.org\u002Fwp-content\u002Fuploads\u002Fhc_archives\u002Fhc28\u002FHC28.22-Monday-Epub\u002FHC28.22.10-GPU-HPC-Epub\u002FHC28.22.110-Bifrost-JemDavies-ARM-v04-9.pdf)\n1. [Midgard GPU Architecture](http:\u002F\u002Fmalideveloper.arm.com\u002Fdownloads\u002FARM_Game_Developer_Days\u002FPDFs\u002F2-Mali-GPU-architecture-overview-and-tile-local-storage.pdf), [ARM Mali-T880 GPU](https:\u002F\u002Fwww.hotchips.org\u002Fwp-content\u002Fuploads\u002Fhc_archives\u002Fhc27\u002FHC27.25-Tuesday-Epub\u002FHC27.25.50-GPU-Epub\u002FHC27.25.531-Mali-T880-Bratt-ARM-2015_08_23.pdf)\n1. [Mobile GPU market share](https:\u002F\u002Fhwstats.unity3d.com\u002Fmobile\u002Fgpu.html)\n\n### Driver\n\n1. [Adreno] [csarron\u002Fqcom_vendor_binaries: Common Proprietary Qualcomm Binaries](https:\u002F\u002Fgithub.com\u002Fcsarron\u002Fqcom_vendor_binaries)\n1. [Mali] [Fevax\u002Fvendor_samsung_hero2ltexx: Blobs from s7 Edge G935F](https:\u002F\u002Fgithub.com\u002FFevax\u002Fvendor_samsung_hero2ltexx)\n\n\n## Related Repos\n+ [EfficientDNNs](https:\u002F\u002Fgithub.com\u002FMingSun-Tse\u002FEfficientDNNs) by @MingSun-Tse ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FMingSun-Tse\u002FEfficientDNNs?style=social) ![GitHub last commit](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Flast-commit\u002FMingSun-Tse\u002FEfficientDNNs.svg)\n+ [Awesome ML Model Compression](https:\u002F\u002Fgithub.com\u002Fcedrickchee\u002Fawesome-ml-model-compression) by @cedrickchee ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcedrickchee\u002Fawesome-ml-model-compression?style=social) ![GitHub last commit](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Flast-commit\u002Fcedrickchee\u002Fawesome-ml-model-compression.svg)\n+ [Awesome Pruning](https:\u002F\u002Fgithub.com\u002Fhe-y\u002FAwesome-Pruning) by @he-y ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcedrickchee\u002Fawesome-ml-model-compression?style=social) ![GitHub last commit](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Flast-commit\u002Fcedrickchee\u002Fawesome-ml-model-compression.svg)\n+ [Model Compression](https:\u002F\u002Fgithub.com\u002Fj-marple-dev\u002Fmodel_compression) by @j-marple-dev ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fj-marple-dev\u002Fmodel_compression?style=social) ![GitHub last commit](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Flast-commit\u002Fj-marple-dev\u002Fmodel_compression.svg)\n+ [awesome-AutoML-and-Lightweight-Models](https:\u002F\u002Fgithub.com\u002Fguan-yuan\u002Fawesome-AutoML-and-Lightweight-Models) by @guan-yuan ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fguan-yuan\u002Fawesome-AutoML-and-Lightweight-Models?style=social) ![GitHub last commit](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Flast-commit\u002Fguan-yuan\u002Fawesome-AutoML-and-Lightweight-Models.svg)\n+ [knowledge-distillation-papers](https:\u002F\u002Fgithub.com\u002Flhyfst\u002Fknowledge-distillation-papers) by @lhyfst ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flhyfst\u002Fknowledge-distillation-papers?style=social) ![GitHub last commit](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Flast-commit\u002Flhyfst\u002Fknowledge-distillation-papers.svg)\n+ [Awesome-model-compression-and-acceleration](https:\u002F\u002Fgithub.com\u002Fmemoiry\u002FAwesome-model-compression-and-acceleration) by @memoiry ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmemoiry\u002FAwesome-model-compression-and-acceleration?style=social) ![GitHub last commit](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Flast-commit\u002Fmemoiry\u002FAwesome-model-compression-and-acceleration.svg)\n+ [Embedded Neural Network](https:\u002F\u002Fgithub.com\u002FZhishengWang\u002FEmbedded-Neural-Network) by @ZhishengWang ![GitHub stars](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FZhishengWang\u002FEmbedded-Neural-Network?style=social) ![GitHub last commit](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Flast-commit\u002FZhishengWang\u002FEmbedded-Neural-Network.svg)\n","# 令人惊叹的 EMDL\n\n嵌入式与移动深度学习研究笔记。\n\n## 论文\n\n### 综述\n\n1. [EfficientDNNs](https:\u002F\u002Fgithub.com\u002FMingSun-Tse\u002FEfficientDNNs) [仓库]\n1. [Awesome ML Model Compression](https:\u002F\u002Fgithub.com\u002Fcedrickchee\u002Fawesome-ml-model-compression) [仓库]\n1. [TinyML 论文与项目](https:\u002F\u002Fgithub.com\u002Fgigwegbe\u002Ftinyml-papers-and-projects) [仓库]\n1. [TinyML 平台基准测试](https:\u002F\u002Farxiv.org\u002Fabs\u002F2112.01319) [arXiv '21]\n1. [TinyML：现有研究的系统性综述与整合](https:\u002F\u002Fieeexplore.ieee.org\u002Fabstract\u002Fdocument\u002F9722636) [ICAIIC '21]\n1. [TinyML 遇见物联网：全面综述](https:\u002F\u002Fwww.sciencedirect.com\u002Fscience\u002Farticle\u002Fabs\u002Fpii\u002FS2542660521001025) [物联网 '21]\n1. [关于 TinyML 的综述：现状与展望](https:\u002F\u002Fwww.sciencedirect.com\u002Fscience\u002Farticle\u002Fpii\u002FS1319157821003335) [沙特国王大学期刊 '21]\n1. [TinyML 基准测试：在通用微控制器上执行全连接神经网络](https:\u002F\u002Faran.library.nuigalway.ie\u002Fhandle\u002F10379\u002F16770) [IEEE '21]\n1. [高效深度学习：使深度学习模型更小、更快、更好的综述](https:\u002F\u002Farxiv.org\u002Fabs\u002F2106.08962) [arXiv '21]\n1. [TinyML 系统的基准测试：挑战与方向](https:\u002F\u002Farxiv.org\u002Fabs\u002F2003.04821) [arXiv '20]\n1. [神经网络的模型压缩与硬件加速：全面综述](https:\u002F\u002Fieeexplore.ieee.org\u002Fabstract\u002Fdocument\u002F9043731) [IEEE '20]\n1. [深度学习编译器：全面综述](https:\u002F\u002Farxiv.org\u002Fabs\u002F2002.03794) [arXiv '20]\n1. [深度卷积神经网络高效计算的最新进展](https:\u002F\u002Farxiv.org\u002Fabs\u002F1802.00939) [arXiv '18]\n1. [深度神经网络的模型压缩与加速综述](https:\u002F\u002Farxiv.org\u002Fabs\u002F1710.09282) [arXiv '17]\n\n### 模型\n\n1. [EtinyNet：适用于 TinyML 的极小型网络](https:\u002F\u002Fwww.aaai.org\u002FAAAI22Papers\u002FAAAI-4889.XuK.pdf) [AAAI '21]\n1. [MCUNetV2：面向 Tiny 深度学习的内存高效补丁式推理](https:\u002F\u002Farxiv.org\u002Fabs\u002F2110.15352) [NeurIPS '21，MIT]\n1. [SkyNet：一种在嵌入式系统上进行目标检测和跟踪的硬件高效方法](https:\u002F\u002Fproceedings.mlsys.org\u002Fpapers\u002F2020\u002F86) [MLSys '20，IBM]\n1. [模型魔方：通过扭转分辨率、深度和宽度优化 TinyNets](https:\u002F\u002Farxiv.org\u002Fabs\u002F2010.14819) [NeurIPS '20，华为]\n1. [MCUNet：物联网设备上的微型深度学习](https:\u002F\u002Farxiv.org\u002Fabs\u002F2007.10319) [NeurIPS '20，MIT]\n1. [GhostNet：以低成本操作获得更多特征](https:\u002F\u002Farxiv.org\u002Fabs\u002F1911.11907) [CVPR '20，华为]\n1. [MicroNet 用于高效的语言建模](https:\u002F\u002Farxiv.org\u002Fabs\u002F2005.07877) [NeurIPS '19，MIT]\n1. [MobileNetV3 的搜索](https:\u002F\u002Farxiv.org\u002Fabs\u002F1905.02244) [ICCV '19，谷歌]\n1. [MobilenetV2：倒残差与线性瓶颈：用于分类、检测和分割的移动网络](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1801.04381.pdf) [CVPR '18，谷歌]\n1. [ProxylessNAS：针对目标任务和硬件的直接神经架构搜索](https:\u002F\u002Farxiv.org\u002Fabs\u002F1812.00332) [arXiv '18，MIT]\n1. [DeepRebirth：加速移动设备上的深度神经网络执行](https:\u002F\u002Farxiv.org\u002Fabs\u002F1708.04728) [AAAI'18，三星]\n1. [NasNet：学习可迁移的架构以实现可扩展的图像识别](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1707.07012.pdf) [arXiv '17，谷歌]\n1. [ShuffleNet：一种极其高效的移动设备用卷积神经网络](https:\u002F\u002Farxiv.org\u002Fabs\u002F1707.01083) [arXiv '17，Megvii]\n1. [MobileNets：用于移动视觉应用的高效卷积神经网络](https:\u002F\u002Farxiv.org\u002Fabs\u002F1704.04861) [arXiv '17，谷歌]\n1. [CondenseNet：使用学习到的组卷积构建的高效 DenseNet](https:\u002F\u002Farxiv.org\u002Fabs\u002F1711.09224) [arXiv '17]\n\n### 系统\n\n1. [BSC：基于块的随机计算，以实现准确且高效的 TinyML](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2111.06686.pdf?ref=https:\u002F\u002Fgithubhelp.com) [ASP-DAC '22]\n1. [CFU Playground：用于在 FPGA 上加速微型机器学习（tinyML）的全栈开源框架](https:\u002F\u002Farxiv.org\u002Fabs\u002F2201.01863) [arXiv '22，谷歌]\n1. [UDC：用于可压缩 TinyML 模型的统一 DNAS](https:\u002F\u002Farxiv.org\u002Fabs\u002F2201.05842) [arXiv '22，Arm]\n1. [AnalogNets：噪声鲁棒型 TinyML 模型与始终开启的模拟存内计算加速器的软硬件协同设计](https:\u002F\u002Farxiv.org\u002Fabs\u002F2111.06503) [arXiv '21，Arm]\n1. [TinyTL：减少激活而非可训练参数，以实现高效的设备端学习](https:\u002F\u002Farxiv.org\u002Fabs\u002F2007.11622) [NeurIPS '20，MIT]\n1. [Once for All：训练一个网络并将其专门化以实现高效部署](https:\u002F\u002Farxiv.org\u002Fabs\u002F1908.09791) [ICLR '20，MIT]\n1. [DeepMon：基于移动 GPU 的深度学习框架，用于连续视觉应用](https:\u002F\u002Fwww.sigmobile.org\u002Fmobisys\u002F2017\u002Faccepted.php) [MobiSys '17]\n1. [DeepEye：利用可穿戴通用硬件资源高效地本地执行多个深度视觉模型](http:\u002F\u002Ffahim-kawsar.net\u002Fpapers\u002FMathur.MobiSys2017-Camera.pdf) [MobiSys '17]\n1. [MobiRNN：在移动 GPU 上高效执行循环神经网络](https:\u002F\u002Farxiv.org\u002Fabs\u002F1706.00878) [EMDL '17]\n1. [fpgaConvNet：将多种卷积神经网络映射到嵌入式 FPGA 上的工具流程](https:\u002F\u002Farxiv.org\u002Fabs\u002F1711.08740) [NIPS '17]\n1. [DeepSense：基于 GPU 的深度卷积神经网络框架，适用于通用移动设备](http:\u002F\u002Fink.library.smu.edu.sg\u002Fcgi\u002Fviewcontent.cgi?article=4278&context=sis_research) [WearSys '16]\n1. [DeepX：用于低功耗移动设备深度学习推理的软件加速器](http:\u002F\u002Fniclane.org\u002Fpubs\u002Fdeepx_ipsn.pdf) [IPSN '16]\n1. [EIE：压缩深度神经网络上的高效推理引擎](https:\u002F\u002Farxiv.org\u002Fabs\u002F1602.01528) [ISCA '16]\n1. [MCDNN：基于近似值的执行框架，用于资源受限下的深度流处理](http:\u002F\u002Fhaneul.github.io\u002Fpapers\u002Fmcdnn.pdf) [MobiSys '16]\n1. [DXTK：借助 DeepX 工具包，在移动和嵌入式设备上实现资源高效的深度学习](http:\u002F\u002Fniclane.org\u002Fpubs\u002Fdxtk_mobicase.pdf) [MobiCASE '16]\n1. [为可穿戴设备上的受限资源推理而进行的深度学习层稀疏化与分离](http:\u002F\u002Fniclane.org\u002Fpubs\u002Fsparsesep_sensys.pdf) [SenSys ’16]\n1. [对可穿戴设备、智能手机和物联网设备上深度学习的早期资源特性分析](http:\u002F\u002Fniclane.org\u002Fpubs\u002Fiotapp15_early.pdf) [IoT-App ’15]\n1. [CNNdroid：在 Android 设备上加速执行已训练的深度卷积神经网络](https:\u002F\u002Farxiv.org\u002Fabs\u002F1511.07376) [MM '16]\n\n### 量化\n\n1. [用于高效推理的深度卷积网络量化：白皮书](https:\u002F\u002Farxiv.org\u002Fabs\u002F1806.08342) [arXiv '18]\n1. [LQ-Nets：为高精度和紧凑型深度神经网络设计的可学习量化方法](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1807.10029.pdf) [ECCV'18]\n1. [深度神经网络中的整数训练与推理](https:\u002F\u002Fopenreview.net\u002Fforum?id=HJGXzmspb) [ICLR'18]\n1. [ZipML框架：端到端低精度模型训练——可行方案、不可行情况及一点深度学习](https:\u002F\u002Farxiv.org\u002Fabs\u002F1611.05402) [ICML'17]\n1. [损失感知的深度网络二值化](https:\u002F\u002Farxiv.org\u002Fabs\u002F1611.01600) [ICLR'17]\n1. [迈向网络量化极限](https:\u002F\u002Farxiv.org\u002Fabs\u002F1612.01543) [ICLR'17]\n1. [基于半波高斯量化的低精度深度学习](https:\u002F\u002Farxiv.org\u002Fabs\u002F1702.00953) [CVPR'17]\n1. [ShiftCNN：面向卷积神经网络推理的通用低精度架构](https:\u002F\u002Farxiv.org\u002Fabs\u002F1706.02393) [arXiv'17]\n1. [面向移动设备的量化卷积神经网络](https:\u002F\u002Farxiv.org\u002Fabs\u002F1512.06473) [CVPR '16]\n1. [循环神经网络的定点性能分析](https:\u002F\u002Farxiv.org\u002Fabs\u002F1512.01322) [ICASSP'16]\n1. [量化神经网络：使用低精度权重和激活进行神经网络训练](https:\u002F\u002Farxiv.org\u002Fabs\u002F1609.07061) [arXiv'16]\n1. [利用向量量化压缩深度卷积网络](https:\u002F\u002Farxiv.org\u002Fabs\u002F1412.6115) [arXiv'14]\n\n### 剪枝\n\n1. [Awesome-Pruning](https:\u002F\u002Fgithub.com\u002Fhe-y\u002FAwesome-Pruning) [仓库]\n1. [基于几何中位数的滤波器剪枝用于加速深度卷积神经网络](https:\u002F\u002Farxiv.org\u002Fabs\u002F1811.00250) [CVPR'19]\n1. [剪枝还是不剪枝：探索剪枝在模型压缩中的有效性](https:\u002F\u002Farxiv.org\u002Fabs\u002F1710.01878) [ICLR'18]\n1. [用于高效ConvNet的滤波器剪枝](https:\u002F\u002Farxiv.org\u002Fabs\u002F1608.08710) [ICLR'17]\n1. [为资源高效推理而剪枝卷积神经网络](https:\u002F\u002Farxiv.org\u002Fabs\u002F1611.06440) [ICLR'17]\n1. [用于神经网络压缩的软权值共享](https:\u002F\u002Farxiv.org\u002Fabs\u002F1702.04008) [ICLR'17]\n1. [利用能耗感知剪枝设计节能卷积神经网络](https:\u002F\u002Farxiv.org\u002Fabs\u002F1611.05128) [CVPR'17]\n1. [ThiNet：一种用于深度神经网络压缩的滤波器级剪枝方法](https:\u002F\u002Farxiv.org\u002Fabs\u002F1707.06342) [ICCV'17]\n1. [深度压缩：通过剪枝、训练量化和霍夫曼编码压缩深度神经网络](https:\u002F\u002Farxiv.org\u002Fabs\u002F1510.00149) [ICLR'16]\n1. [用于高效DNN的动态网络手术](https:\u002F\u002Farxiv.org\u002Fabs\u002F1608.04493) [NIPS'16]\n1. [同时学习权重和连接以构建高效神经网络](https:\u002F\u002Farxiv.org\u002Fabs\u002F1506.02626) [NIPS'15]\n\n### 近似\n\n1. [移动设备上的高性能超低精度卷积](https:\u002F\u002Farxiv.org\u002Fabs\u002F1712.02427) [NIPS'17]\n1. [面向快速且低功耗移动应用的深度卷积神经网络压缩](https:\u002F\u002Farxiv.org\u002Fabs\u002F1511.06530) [ICLR'16]\n1. [非线性卷积网络的高效精确近似](https:\u002F\u002Farxiv.org\u002Fabs\u002F1411.4229) [CVPR'15]\n1. [加速用于分类和检测的超深卷积神经网络](https:\u002F\u002Farxiv.org\u002Fabs\u002F1505.06798)（上述论文的扩展版）\n1. [带有低秩正则化的卷积神经网络](https:\u002F\u002Farxiv.org\u002Fabs\u002F1511.06067) [arXiv'15]\n1. [利用卷积网络内的线性结构实现高效评估](https:\u002F\u002Farxiv.org\u002Fabs\u002F1404.0736) [NIPS'14]\n\n### 特征分析\n\n1. [智能手机上深度学习应用初探](https:\u002F\u002Farxiv.org\u002Fabs\u002F1812.05448) [WWW'19]\n1. [Facebook的机器学习：理解边缘推理](https:\u002F\u002Fresearch.fb.com\u002Fpublications\u002Fmachine-learning-at-facebook-understanding-inference-at-the-edge\u002F) [HPCA'19]\n1. [NetAdapt：面向移动应用的平台感知型神经网络自适应](https:\u002F\u002Farxiv.org\u002Fabs\u002F1804.03230) [ECCV 2018]\n1. [移动计算机视觉中卷积神经网络的延迟与吞吐量特征分析](https:\u002F\u002Farxiv.org\u002Fabs\u002F1803.09492) [MMSys’18]\n\n## 库\n\n### 推理框架\n\n1. [阿里巴巴 - MNN](https:\u002F\u002Fgithub.com\u002Falibaba\u002FMNN) - 是一个极速、轻量级的深度学习框架，已在阿里巴巴的关键业务场景中经过严格考验。\n1. [苹果 - CoreML](https:\u002F\u002Fdeveloper.apple.com\u002Fdocumentation\u002Fcoreml) - 用于将机器学习模型集成到你的应用中。[BERT和GPT-2在iPhone上的实现](https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fswift-coreml-transformers)\n1. [Arm - ComputeLibrary](https:\u002F\u002Fgithub.com\u002FARM-software\u002FComputeLibrary) - 是一套针对Arm CPU和GPU优化的计算机视觉和机器学习函数库，采用了SIMD技术。[简介](https:\u002F\u002Fdeveloper.arm.com\u002Ftechnologies\u002Fcompute-library)\n1. [Arm - Arm NN](https:\u002F\u002Fgithub.com\u002FARM-software\u002Farmnn) - 是Android和Linux平台上性能最优的机器学习推理引擎，可在Arm Cortex-A CPU和Arm Mali GPU上加速机器学习任务。\n1. [百度 - Paddle Lite](https:\u002F\u002Fgithub.com\u002FPaddlePaddle\u002FPaddle-Lite) - 是一个多平台的高性能深度学习推理引擎。\n1. [DeepLearningKit](https:\u002F\u002Fgithub.com\u002FDeepLearningKit\u002FDeepLearningKit) - 是适用于苹果iOS、OS X和tvOS的开源深度学习框架。\n1. [Edge Impulse](https:\u002F\u002Fedgeimpulse.com) - 一个交互式平台，用于生成可在微控制器上运行的模型。他们在社交媒体上也非常活跃，分享关于边缘AI\u002F TinyML的最新动态。\n1. [谷歌 - TensorFlow Lite](https:\u002F\u002Fwww.tensorflow.org\u002Flite\u002Fperformance\u002Fgpu) - 是一个面向设备端推理的开源深度学习框架。\n1. [英特尔 - OpenVINO](https:\u002F\u002Fgithub.com\u002Fopenvinotoolkit\u002Fopenvino) - 是一个全面的工具包，用于优化你的流程以实现更快的推理速度。\n1. [JDAI计算机视觉 - dabnn](https:\u002F\u002Fgithub.com\u002FJDAI-CV\u002Fdabnn) - 是一个针对移动平台加速二值神经网络推理的框架。\n1. [Meta - PyTorch Mobile](https:\u002F\u002Fpytorch.org\u002Fmobile\u002Fhome) - 是一个新框架，旨在帮助移动开发者和机器学习工程师将PyTorch的ML模型嵌入到设备端。\n1. [微软 - DeepSpeed](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FDeepSpeed) - 是一个深度学习优化库，使分布式训练和推理变得简单、高效且有效。\n1. [微软 - ELL](https:\u002F\u002Fgithub.com\u002FMicrosoft\u002FELL) - 允许你设计并部署智能机器学习模型到资源受限的平台和小型单板计算机上，如Raspberry Pi、Arduino和micro:bit。\n1. [微软 - ONNX Runtime](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Fonnxruntime) - 是一个跨平台、高性能的ML推理和训练加速器。\n1. [英伟达 - TensorRT](https:\u002F\u002Fgithub.com\u002FNVIDIA\u002FTensorRT) - 是一个C++库，用于在NVIDIA GPU和深度学习加速器上实现高性能推理。\n1. [OAID - Tengine](https:\u002F\u002Fgithub.com\u002FOAID\u002FTengine) - 是一个轻量级、高性能、模块化的嵌入式设备推理引擎。\n1. [高通 - AI神经处理SDK](https:\u002F\u002Fdeveloper.qualcomm.com\u002Fsoftware\u002Fqualcomm-neural-processing-sdk) - 提供给开发者的库，用于在骁龙移动平台上运行NN模型，充分利用CPU、GPU和\u002F或DSP。\n1. [腾讯 - ncnn](https:\u002F\u002Fgithub.com\u002FTencent\u002Fncnn) - 是一个为移动平台优化的高性能神经网络推理框架。\n1. [uTensor](https:\u002F\u002Fgithub.com\u002FuTensor\u002FuTensor) - 基于mbed（一种针对ARM芯片组的RTOS）和TensorFlow的AI推理库。\n1. [小米 - Mace](https:\u002F\u002Fgithub.com\u002FXiaoMi\u002Fmace) - 是一个为移动异构计算平台优化的深度学习推理框架。\n1. [xmartlabs - Bender](https:\u002F\u002Fgithub.com\u002Fxmartlabs\u002FBender) - 轻松在iOS上构建快速的神经网络！使用TensorFlow模型。底层采用Metal技术。\n\n### 优化工具\n\n1. [Neural Network Distiller](https:\u002F\u002Fgithub.com\u002FNervanaSystems\u002Fdistiller) - 用于神经网络压缩研究的Python软件包。\n1. [PocketFlow](https:\u002F\u002Fgithub.com\u002FTencent\u002FPocketFlow) - 一个自动模型压缩（AutoMC）框架，用于开发更小、更快的AI应用。\n\n### 研究演示\n\n1. [RSTensorFlow](https:\u002F\u002Fnesl.github.io\u002FRSTensorFlow) - 面向普通安卓设备的GPU加速TensorFlow。\n\n### Web\n\n1. [mil-tokyo\u002Fwebdnn](https:\u002F\u002Fgithub.com\u002Fmil-tokyo\u002Fwebdnn) - 在Web浏览器上最快的DNN执行框架。\n\n## 通用\n\n1. [Caffe2 AICamera](https:\u002F\u002Fgithub.com\u002Fbwasti\u002FAICamera)\n1. [TensorFlow Android相机演示](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Ftensorflow\u002Ftree\u002Fmaster\u002Ftensorflow\u002Fexamples\u002Fandroid)\n1. [TensorFlow iOS示例](https:\u002F\u002Fgithub.com\u002Fhollance\u002FTensorFlow-iOS-Example)\n1. [TensorFlow OpenMV相机模块](https:\u002F\u002Fgithub.com\u002Fopenmv\u002Fopenmv)\n\n\n### 边缘 \u002F Tiny MLOps\n\n1. [Tiny-MLOps：一个用于在物联网系统远端编排ML应用的框架](https:\u002F\u002Fieeexplore.ieee.org\u002Fabstract\u002Fdocument\u002F9787703\u002Fauthors#authors) [EAIS '22]\n1. [TinyML的MLOps：大规模部署TinyML的挑战与方向](https:\u002F\u002Fcms.tinyml.org\u002Fwp-content\u002Fuploads\u002Ftalks2022\u002FtinyML_Talks_Vijay_Janapa_Reddi_220524.pdf) [TinyML Talks '22]\n1. [TinyMLOps：广泛采用边缘AI面临的运营挑战](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2203.10923.pdf) [arXiv '22]\n1. [用于物联网中机器学习的TinyMLaaS生态系统：概述与研究挑战](https:\u002F\u002Fieeexplore.ieee.org\u002Fdocument\u002F9427352) [VLSI-DAT '21]\n1. [SOLIS：从数据采集到可操作洞察的MLOps之旅](https:\u002F\u002Farxiv.org\u002Fabs\u002F2112.11925) [arXiv '21]\n1. [边缘MLOps：面向AIoT应用的自动化框架](https:\u002F\u002Fwww.computer.org\u002Fcsdl\u002Fproceedings-article\u002Fic2e\u002F2021\u002F497000a191\u002F1yJZ8cHPTkQ) [IC2E '21]\n1. [SensiX++：将MLOPs和多租户模型服务引入感官边缘设备](https:\u002F\u002Farxiv.org\u002Fabs\u002F2109.03947) [arXiv '21，诺基亚]\n\n### Vulkan\n\n1. [Vulkan API示例和演示](https:\u002F\u002Fgithub.com\u002FSaschaWillems\u002FVulkan)\n1. [Android上的神经机器翻译](https:\u002F\u002Fgithub.com\u002Fharvardnlp\u002Fnmt-android)\n\n### OpenCL\n\n1. [DeepMon](https:\u002F\u002Fgithub.com\u002FJC1DA\u002FDeepMon)\n\n### RenderScript\n\n1. [Mobile_ConvNet：Android上的RenderScript CNN](https:\u002F\u002Fgithub.com\u002Fmtmd\u002FMobile_ConvNet)\n\n## 教程\n\n### 通用\n\n1. [将深度学习塞进手机](https:\u002F\u002Fwww.slideshare.net\u002Fanirudhkoul\u002Fsqueezing-deep-learning-into-mobile-phones)\n1. [深度学习——教程与最新趋势](https:\u002F\u002Fwww.dropbox.com\u002Fs\u002Fp7lvelt0aihrwtl\u002FFPGA%2717%20tutorial%20Song%20Han.pdf?dl=0)\n1. [深度神经网络硬件架构教程](http:\u002F\u002Feyeriss.mit.edu\u002Ftutorial.html)\n1. [在移动GPU上高效进行卷积神经网络推理](https:\u002F\u002Fwww.slideshare.net\u002Fembeddedvision\u002Fefficient-convolutional-neural-network-inference-on-mobile-gpus-a-presentation-from-imagination-technologies)\n\n### NEON\n\n1. [NEON™程序员指南](https:\u002F\u002Fdeveloper.arm.com\u002Fdocs\u002Fden0018\u002Flatest\u002Fneontm-version-10-programmers-guide)\n\n### OpenCL\n\n1. [ARM® Mali™ GPU OpenCL 开发者指南](http:\u002F\u002Finfocenter.arm.com\u002Fhelp\u002Findex.jsp?topic=\u002Fcom.arm.doc.100614_0303_00_en\u002Fada1432742770595.html), [pdf](http:\u002F\u002Finfocenter.arm.com\u002Fhelp\u002Ftopic\u002Fcom.arm.doc.100614_0303_00_en\u002Farm_mali_gpu_opencl_developer_guide_100614_0303_00_en.pdf)\n1. [ARM Mali™ GPU 上的最优计算](http:\u002F\u002Fwww.cs.bris.ac.uk\u002Fhome\u002Fsimonm\u002Fmontblanc\u002FOpenCL_on_Mali.pdf)\n1. [移动设备上的 GPU 计算](http:\u002F\u002Fwww.iwocl.org\u002Fwp-content\u002Fuploads\u002Fiwocl-2014-workshop-Tim-Hartley.pdf)\n1. [面向性能的移动设备计算](http:\u002F\u002Fkesen.realtimerendering.com\u002FCompute_for_Mobile_Devices5.pdf)\n1. [动手学 OpenCL](https:\u002F\u002Fhandsonopencl.github.io\u002F)\n1. [Adreno OpenCL 编程指南](https:\u002F\u002Fdeveloper.qualcomm.com\u002Fdownload\u002Fadrenosdk\u002Fadreno-opencl-programming-guide.pdf)\n1. [高通 Adreno GPU 上更好的 OpenCL 性能](https:\u002F\u002Fdeveloper.qualcomm.com\u002Fblog\u002Fbetter-opencl-performance-qualcomm-adreno-gpu-memory-optimization)\n\n## 课程\n\n1. [华盛顿大学深度学习 **系统**](http:\u002F\u002Fdlsys.cs.washington.edu\u002Fschedule)\n1. [伯克利机器学习系统](https:\u002F\u002Fucbrise.github.io\u002Fcs294-ai-sys-fa19\u002F)\n\n\n## 工具\n\n### GPU\n\n1. [Bifrost GPU 架构与 ARM Mali-G71 GPU](https:\u002F\u002Fwww.hotchips.org\u002Fwp-content\u002Fuploads\u002Fhc_archives\u002Fhc28\u002FHC28.22-Monday-Epub\u002FHC28.22.10-GPU-HPC-Epub\u002FHC28.22.110-Bifrost-JemDavies-ARM-v04-9.pdf)\n1. [Midgard GPU 架构](http:\u002F\u002Fmalideveloper.arm.com\u002Fdownloads\u002FARM_Game_Developer_Days\u002FPDFs\u002F2-Mali-GPU-architecture-overview-and-tile-local-storage.pdf), [ARM Mali-T880 GPU](https:\u002F\u002Fwww.hotchips.org\u002Fwp-content\u002Fuploads\u002Fhc_archives\u002Fhc27\u002FHC27.25-Tuesday-Epub\u002FHC27.25.50-GPU-Epub\u002FHC27.25.531-Mali-T880-Bratt-ARM-2015_08_23.pdf)\n1. [移动 GPU 市场份额](https:\u002F\u002Fhwstats.unity3d.com\u002Fmobile\u002Fgpu.html)\n\n### 驱动程序\n\n1. [Adreno] [csarron\u002Fqcom_vendor_binaries: 常用高通专有二进制文件](https:\u002F\u002Fgithub.com\u002Fcsarron\u002Fqcom_vendor_binaries)\n1. [Mali] [Fevax\u002Fvendor_samsung_hero2ltexx: 来自 s7 Edge G935F 的固件 blob](https:\u002F\u002Fgithub.com\u002FFevax\u002Fvendor_samsung_hero2ltexx)\n\n\n## 相关仓库\n+ [EfficientDNNs](https:\u002F\u002Fgithub.com\u002FMingSun-Tse\u002FEfficientDNNs) 由 @MingSun-Tse 维护 ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FMingSun-Tse\u002FEfficientDNNs?style=social) ![GitHub 最后一次提交](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Flast-commit\u002FMingSun-Tse\u002FEfficientDNNs.svg)\n+ [Awesome ML 模型压缩](https:\u002F\u002Fgithub.com\u002Fcedrickchee\u002Fawesome-ml-model-compression) 由 @cedrickchee 维护 ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcedrickchee\u002Fawesome-ml-model-compression?style=social) ![GitHub 最后一次提交](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Flast-commit\u002Fcedrickchee\u002Fawesome-ml-model-compression.svg)\n+ [Awesome Pruning](https:\u002F\u002Fgithub.com\u002Fhe-y\u002FAwesome-Pruning) 由 @he-y 维护 ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fcedrickchee\u002Fawesome-ml-model-compression?style=social) ![GitHub 最后一次提交](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Flast-commit\u002Fcedrickchee\u002Fawesome-ml-model-compression.svg)\n+ [模型压缩](https:\u002F\u002Fgithub.com\u002Fj-marple-dev\u002Fmodel_compression) 由 @j-marple-dev 维护 ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fj-marple-dev\u002Fmodel_compression?style=social) ![GitHub 最后一次提交](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Flast-commit\u002Fj-marple-dev\u002Fmodel_compression.svg)\n+ [awesome-AutoML-and-Lightweight-Models](https:\u002F\u002Fgithub.com\u002Fguan-yuan\u002Fawesome-AutoML-and-Lightweight-Models) 由 @guan-yuan 维护 ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fguan-yuan\u002Fawesome-AutoML-and-Lightweight-Models?style=social) ![GitHub 最后一次提交](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Flast-commit\u002Fguan-yuan\u002Fawesome-AutoML-and-Lightweight-Models.svg)\n+ [knowledge-distillation-papers](https:\u002F\u002Fgithub.com\u002Flhyfst\u002Fknowledge-distillation-papers) 由 @lhyfst 维护 ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Flhyfst\u002Fknowledge-distillation-papers?style=social) ![GitHub 最后一次提交](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Flast-commit\u002Flhyfst\u002Fknowledge-distillation-papers.svg)\n+ [Awesome-model-compression-and-acceleration](https:\u002F\u002Fgithub.com\u002Fmemoiry\u002FAwesome-model-compression-and-acceleration) 由 @memoiry 维护 ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002Fmemoiry\u002FAwesome-model-compression-and-acceleration?style=social) ![GitHub 最后一次提交](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Flast-commit\u002Fmemoiry\u002FAwesome-model-compression-and-acceleration.svg)\n+ [嵌入式神经网络](https:\u002F\u002Fgithub.com\u002FZhishengWang\u002FEmbedded-Neural-Network) 由 @ZhishengWang 维护 ![GitHub 星标](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fstars\u002FZhishengWang\u002FEmbedded-Neural-Network?style=social) ![GitHub 最后一次提交](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Flast-commit\u002FZhishengWang\u002FEmbedded-Neural-Network.svg)","# awesome-emdl 快速上手指南\n\n`awesome-emdl` 并非一个可直接安装运行的软件库或框架，而是一个**嵌入式与移动端深度学习（Embedded & Mobile Deep Learning）的研究资源清单**。它汇集了该领域最新的综述论文、高效模型架构、系统优化方案、量化剪枝技术以及主流推理框架。\n\n本指南旨在帮助中国开发者如何利用该清单快速构建知识体系并选择合适的工具链进行开发。\n\n## 环境准备\n\n由于本项目是资源索引，无需特定的运行时环境。但为了复现清单中提到的模型或使用相关框架，建议准备以下基础环境：\n\n*   **操作系统**：Linux (Ubuntu 18.04\u002F20.04 推荐), macOS, 或 Windows (WSL2)\n*   **编程语言**：Python 3.6+\n*   **核心依赖**：\n    *   PyTorch 或 TensorFlow (用于模型训练与转换)\n    *   Git (用于克隆仓库)\n*   **硬件目标（可选）**：\n    *   移动端：Android\u002FiOS 设备\n    *   嵌入式：树莓派 (Raspberry Pi), NVIDIA Jetson 系列，或 STM32 等微控制器\n*   **网络加速**：\n    *   访问 GitHub 源码时，建议使用国内镜像源（如 Gitee 搜索对应项目镜像）或配置代理。\n    *   下载论文数据集时，可使用 ArXiv 国内镜像。\n\n## 获取资源\n\n该项目没有传统的“安装”步骤，主要通过克隆仓库获取资源列表。\n\n```bash\n# 克隆仓库到本地\ngit clone https:\u002F\u002Fgithub.com\u002Faihacker\u002Fawesome-emdl.git\n\n# 进入目录\ncd awesome-emdl\n\n# 查看资源列表\ncat README.md\n```\n\n> **提示**：如果 GitHub 连接缓慢，可尝试在 Gitee 上搜索 `awesome-emdl` 查找国内镜像仓库进行克隆。\n\n## 基本使用\n\n使用 `awesome-emdl` 的核心在于根据你的具体需求（如：模型压缩、特定硬件部署、低精度推理），从清单中定位对应的论文或开源框架。\n\n### 场景一：寻找适合移动端的轻量级模型\n如果你需要在手机或嵌入式设备上运行目标检测或分类任务，请参考 **Model** 章节：\n1.  查阅 **MobileNetV3**, **GhostNet**, 或 **MCUNet** 相关论文链接。\n2.  根据论文标题旁的 `[Repo]` 标识（如有）或论文名称，在 GitHub 搜索官方实现代码。\n3.  例如，搜索 `MobileNetV3 pytorch` 获取预训练模型。\n\n### 场景二：选择端侧推理引擎\n如果你已经训练好模型，需要将其部署到特定硬件，请参考 **Libraries -> Inference Framework** 章节：\n*   **Android\u002FLinux (ARM 架构)**: 推荐使用 **MNN** (阿里开源，国内文档完善) 或 **Arm NN**。\n    *   MNN 地址：`https:\u002F\u002Fgithub.com\u002Falibaba\u002FMNN`\n*   **iOS\u002FmacOS**: 直接使用 **CoreML**。\n*   **微控制器 (MCU)**: 参考 **Edge Impulse** 平台或 **TinyML** 相关论文。\n\n### 场景三：模型压缩与加速\n如果需要减小模型体积或提升推理速度，请参考 **Quantization** (量化) 和 **Pruning** (剪枝) 章节：\n1.  阅读 `Deep Compression` 或 `Quantizing deep convolutional networks` 等综述了解原理。\n2.  查找 **Awesome-Pruning** 仓库获取具体的剪枝算法实现代码。\n3.  利用 PyTorch Quantization 或 TensorFlow Lite 内置工具进行实验。\n\n### 示例工作流\n假设你要在树莓派上部署一个图像分类模型：\n1.  **选型**：在 **Model** 部分选择 `MobileNetV2` 或 `ShuffleNet`。\n2.  **训练**：使用 PyTorch 训练模型。\n3.  **转换**：在 **Libraries** 部分选择 `MNN` 或 `TFLite` (虽未列出但属同类)，将模型转换为 `.mnn` 或 `.tflite` 格式。\n4.  **部署**：参考对应框架的 C++ 或 Python API 文档在树莓派上加载模型并推理。\n\n通过浏览 `awesome-emdl` 的分类目录，你可以快速找到上述每个环节的最优学术方案和工业界实现。","某嵌入式团队正致力于在低功耗微控制器上部署实时跌倒检测算法，以用于独居老人的智能监护手环。\n\n### 没有 awesome-emdl 时\n- **文献检索如大海捞针**：工程师需手动在 arXiv、IEEE 及各大会议网站分散搜索\"TinyML\"或“模型压缩”相关论文，耗时数周仍难以覆盖最新成果。\n- **选型缺乏硬件依据**：面对众多轻量级网络，无法快速找到针对特定微控制器（如 ARM Cortex-M）的基准测试数据，导致模型在内存占用或推理速度上不达标。\n- **复现成本极高**：缺少系统整理的开源代码库链接，团队需从零复现经典算法（如 MobileNet 变体），常因细节缺失而陷入调试困境。\n- **技术视野受限**：容易忽略跨领域的创新方案（如基于 FPGA 的加速框架或随机计算新方法），错失优化系统能效的最佳路径。\n\n### 使用 awesome-emdl 后\n- **一站式获取前沿资源**：直接通过分类清晰的列表，快速获取包括 NeurIPS、CVPR 在内的顶会最新综述与核心论文，将调研周期从数周缩短至两天。\n- **精准匹配硬件约束**：利用收录的基准测试报告（如 TinyML Platforms Benchmarking），迅速锁定适合手环芯片的极致轻量模型（如 EtinyNet 或 MCUNet）。\n- **加速落地验证**：通过列表中提供的官方 Repo 链接，直接复用经过验证的代码实现，大幅降低开发门槛并提升原型迭代速度。\n- **激发系统级创新**：接触到 BSC 块随机计算等前沿系统优化方案，成功在保持精度的同时将功耗进一步降低 30%，延长了设备续航。\n\nawesome-emdl 将碎片化的嵌入式深度学习研究资源整合为结构化知识图谱，帮助开发者在资源受限的边缘设备上高效实现从理论到落地的跨越。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fcsarron_awesome-emdl_bc774085.png","csarron","Qingqing Cao","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fcsarron_f16bd24e.png","AI Research @Apple . Prev Postdoc @UWNLP. ","Apple AIML","Seattle",null,"awk_ai","https:\u002F\u002Fawk.ai\u002F","https:\u002F\u002Fgithub.com\u002Fcsarron",767,167,"2026-04-16T14:08:58","MIT",1,"","未说明",{"notes":90,"python":88,"dependencies":91},"该仓库（awesome-emdl）并非一个可执行的软件工具或框架，而是一个嵌入式和移动深度学习（TinyML\u002FEMDL）领域的论文、项目和库的资源列表（Awesome List）。README 内容主要包含相关研究论文链接以及现有的推理框架（如 MNN, CoreML, Arm NN, Paddle Lite 等）的引用。因此，该仓库本身没有特定的操作系统、GPU、内存、Python 版本或依赖库要求。用户若需运行列表中提到的具体项目或使用列出的框架，需参考各自项目的文档以获取具体的环境需求。",[],[14],[94,95,96,97,98,99,100,101,102,103,104],"efficient-neural-networks","deep-neural-networks","deep-learning","mobile-ai","embedded-ai","mobile-inference","mobile-deep-learning","pruning","quantization","neural-network-compression","inference","2026-03-27T02:49:30.150509","2026-04-19T09:39:18.535238",[],[]]