[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-floodsung--Deep-Learning-Papers-Reading-Roadmap":3,"tool-floodsung--Deep-Learning-Papers-Reading-Roadmap":61},[4,18,26,36,44,52],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",141543,2,"2026-04-06T11:32:54",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",107888,"2026-04-06T11:32:50",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":10,"last_commit_at":50,"category_tags":51,"status":17},4487,"LLMs-from-scratch","rasbt\u002FLLMs-from-scratch","LLMs-from-scratch 是一个基于 PyTorch 的开源教育项目，旨在引导用户从零开始一步步构建一个类似 ChatGPT 的大型语言模型（LLM）。它不仅是同名技术著作的官方代码库，更提供了一套完整的实践方案，涵盖模型开发、预训练及微调的全过程。\n\n该项目主要解决了大模型领域“黑盒化”的学习痛点。许多开发者虽能调用现成模型，却难以深入理解其内部架构与训练机制。通过亲手编写每一行核心代码，用户能够透彻掌握 Transformer 架构、注意力机制等关键原理，从而真正理解大模型是如何“思考”的。此外，项目还包含了加载大型预训练权重进行微调的代码，帮助用户将理论知识延伸至实际应用。\n\nLLMs-from-scratch 特别适合希望深入底层原理的 AI 开发者、研究人员以及计算机专业的学生。对于不满足于仅使用 API，而是渴望探究模型构建细节的技术人员而言，这是极佳的学习资源。其独特的技术亮点在于“循序渐进”的教学设计：将复杂的系统工程拆解为清晰的步骤，配合详细的图表与示例，让构建一个虽小但功能完备的大模型变得触手可及。无论你是想夯实理论基础，还是为未来研发更大规模的模型做准备",90106,"2026-04-06T11:19:32",[35,15,13,14],{"id":53,"name":54,"github_repo":55,"description_zh":56,"stars":57,"difficulty_score":10,"last_commit_at":58,"category_tags":59,"status":17},4292,"Deep-Live-Cam","hacksider\u002FDeep-Live-Cam","Deep-Live-Cam 是一款专注于实时换脸与视频生成的开源工具，用户仅需一张静态照片，即可通过“一键操作”实现摄像头画面的即时变脸或制作深度伪造视频。它有效解决了传统换脸技术流程繁琐、对硬件配置要求极高以及难以实时预览的痛点，让高质量的数字内容创作变得触手可及。\n\n这款工具不仅适合开发者和技术研究人员探索算法边界，更因其极简的操作逻辑（仅需三步：选脸、选摄像头、启动），广泛适用于普通用户、内容创作者、设计师及直播主播。无论是为了动画角色定制、服装展示模特替换，还是制作趣味短视频和直播互动，Deep-Live-Cam 都能提供流畅的支持。\n\n其核心技术亮点在于强大的实时处理能力，支持口型遮罩（Mouth Mask）以保留使用者原始的嘴部动作，确保表情自然精准；同时具备“人脸映射”功能，可同时对画面中的多个主体应用不同面孔。此外，项目内置了严格的内容安全过滤机制，自动拦截涉及裸露、暴力等不当素材，并倡导用户在获得授权及明确标注的前提下合规使用，体现了技术发展与伦理责任的平衡。",88924,"2026-04-06T03:28:53",[14,15,13,60],"视频",{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":72,"owner_name":73,"owner_avatar_url":74,"owner_bio":75,"owner_company":76,"owner_location":77,"owner_email":78,"owner_twitter":77,"owner_website":79,"owner_url":80,"languages":81,"stars":86,"forks":87,"last_commit_at":88,"license":77,"difficulty_score":89,"env_os":90,"env_gpu":91,"env_ram":91,"env_deps":92,"category_tags":95,"github_topics":96,"view_count":32,"oss_zip_url":77,"oss_zip_packed_at":77,"status":17,"created_at":98,"updated_at":99,"faqs":100,"releases":121},4604,"floodsung\u002FDeep-Learning-Papers-Reading-Roadmap","Deep-Learning-Papers-Reading-Roadmap","Deep Learning papers reading roadmap for anyone who are eager to learn this amazing tech!","Deep-Learning-Papers-Reading-Roadmap 是一份专为深度学习初学者和进阶者打造的论文阅读指南。面对浩如烟海的学术文献，许多人在入门时往往困惑于“该从哪篇读起”，这份路线图正是为了解决这一痛点而生。它并非简单的列表堆砌，而是依据“从轮廓到细节、从经典到前沿、从通用到专项”四大原则精心构建的学习路径。\n\n内容涵盖深度学习历史基石（如 Bengio 的《深度学习》圣经）、里程碑式突破（如 AlexNet、ResNet）以及语音识别等垂直领域的演进历程。其独特亮点在于不仅收录了奠定学科基础的经典之作，还持续更新极具价值的前沿论文，帮助读者在掌握核心原理的同时紧跟技术潮流。无论是希望系统建立知识体系的开发者、需要追踪最新进展的研究人员，还是对人工智能充满好奇的学习者，都能从中获得清晰的指引，高效开启深度学习之旅。","# Deep Learning Papers Reading Roadmap\n\n>If you are a newcomer to the Deep Learning area, the first question you may have is \"Which paper should I start reading from?\"\n\n>Here is a reading roadmap of Deep Learning papers!\n\nThe roadmap is constructed in accordance with the following four guidelines:\n\n- From outline to detail\n- From old to state-of-the-art\n- from generic to specific areas\n- focus on state-of-the-art\n\nYou will find many papers that are quite new but really worth reading.\n\nI would continue adding papers to this roadmap.\n\n\n---------------------------------------\n\n# 1 Deep Learning History and Basics\n\n## 1.0 Book\n\n**[0]** Bengio, Yoshua, Ian J. Goodfellow, and Aaron Courville. \"**Deep learning**.\" An MIT Press book. (2015). [[html]](http:\u002F\u002Fwww.deeplearningbook.org\u002F) **(Deep Learning Bible, you can read this book while reading following papers.)** :star::star::star::star::star:\n\n## 1.1 Survey\n\n**[1]** LeCun, Yann, Yoshua Bengio, and Geoffrey Hinton. \"**Deep learning**.\" Nature 521.7553 (2015): 436-444. [[pdf]](http:\u002F\u002Fwww.cs.toronto.edu\u002F~hinton\u002Fabsps\u002FNatureDeepReview.pdf) **(Three Giants' Survey)** :star::star::star::star::star:\n\n## 1.2 Deep Belief Network(DBN)(Milestone of Deep Learning Eve)\n\n**[2]** Hinton, Geoffrey E., Simon Osindero, and Yee-Whye Teh. \"**A fast learning algorithm for deep belief nets**.\" Neural computation 18.7 (2006): 1527-1554. [[pdf]](http:\u002F\u002Fwww.cs.toronto.edu\u002F~hinton\u002Fabsps\u002Fncfast.pdf)**(Deep Learning Eve)** :star::star::star:\n\n**[3]** Hinton, Geoffrey E., and Ruslan R. Salakhutdinov. \"**Reducing the dimensionality of data with neural networks**.\" Science 313.5786 (2006): 504-507. [[pdf]](http:\u002F\u002Fwww.cs.toronto.edu\u002F~hinton\u002Fscience.pdf) **(Milestone, Show the promise of deep learning)** :star::star::star:\n\n## 1.3 ImageNet Evolution（Deep Learning broke out from here）\n\n**[4]** Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton. \"**Imagenet classification with deep convolutional neural networks**.\" Advances in neural information processing systems. 2012. [[pdf]](http:\u002F\u002Fpapers.nips.cc\u002Fpaper\u002F4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf) **(AlexNet, Deep Learning Breakthrough)** :star::star::star::star::star:\n\n**[5]** Simonyan, Karen, and Andrew Zisserman. \"**Very deep convolutional networks for large-scale image recognition**.\" arXiv preprint arXiv:1409.1556 (2014). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1409.1556.pdf) **(VGGNet,Neural Networks become very deep!)** :star::star::star:\n\n**[6]** Szegedy, Christian, et al. \"**Going deeper with convolutions**.\" Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2015. [[pdf]](http:\u002F\u002Fwww.cv-foundation.org\u002Fopenaccess\u002Fcontent_cvpr_2015\u002Fpapers\u002FSzegedy_Going_Deeper_With_2015_CVPR_paper.pdf) **(GoogLeNet)** :star::star::star:\n\n**[7]** He, Kaiming, et al. \"**Deep residual learning for image recognition**.\" arXiv preprint arXiv:1512.03385 (2015). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1512.03385.pdf) **(ResNet,Very very deep networks, CVPR best paper)** :star::star::star::star::star:\n\n## 1.4 Speech Recognition Evolution\n\n**[8]** Hinton, Geoffrey, et al. \"**Deep neural networks for acoustic modeling in speech recognition: The shared views of four research groups**.\" IEEE Signal Processing Magazine 29.6 (2012): 82-97. [[pdf]](http:\u002F\u002Fcs224d.stanford.edu\u002Fpapers\u002Fmaas_paper.pdf) **(Breakthrough in speech recognition)**:star::star::star::star:\n\n**[9]** Graves, Alex, Abdel-rahman Mohamed, and Geoffrey Hinton. \"**Speech recognition with deep recurrent neural networks**.\" 2013 IEEE international conference on acoustics, speech and signal processing. IEEE, 2013. [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1303.5778.pdf) **(RNN)**:star::star::star:\n\n**[10]** Graves, Alex, and Navdeep Jaitly. \"**Towards End-To-End Speech Recognition with Recurrent Neural Networks**.\" ICML. Vol. 14. 2014. [[pdf]](http:\u002F\u002Fwww.jmlr.org\u002Fproceedings\u002Fpapers\u002Fv32\u002Fgraves14.pdf):star::star::star:\n\n**[11]** Sak, Haşim, et al. \"**Fast and accurate recurrent neural network acoustic models for speech recognition**.\" arXiv preprint arXiv:1507.06947 (2015). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1507.06947) **(Google Speech Recognition System)** :star::star::star:\n\n**[12]** Amodei, Dario, et al. \"**Deep speech 2: End-to-end speech recognition in english and mandarin**.\" arXiv preprint arXiv:1512.02595 (2015). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1512.02595.pdf) **(Baidu Speech Recognition System)** :star::star::star::star:\n\n**[13]** W. Xiong, J. Droppo, X. Huang, F. Seide, M. Seltzer, A. Stolcke, D. Yu, G. Zweig \"**Achieving Human Parity in Conversational Speech Recognition**.\" arXiv preprint arXiv:1610.05256 (2016). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1610.05256v1) **(State-of-the-art in speech recognition, Microsoft)** :star::star::star::star:\n\n>After reading above papers, you will have a basic understanding of the Deep Learning history, the basic architectures of Deep Learning model(including CNN, RNN, LSTM) and how deep learning can be applied to image and speech recognition issues. The following papers will take you in-depth understanding of the Deep Learning method, Deep Learning in different areas of application and the frontiers. I suggest that you can choose the following papers based on your interests and research direction.\n\n#2 Deep Learning Method\n\n## 2.1 Model\n\n**[14]** Hinton, Geoffrey E., et al. \"**Improving neural networks by preventing co-adaptation of feature detectors**.\" arXiv preprint arXiv:1207.0580 (2012). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1207.0580.pdf) **(Dropout)** :star::star::star:\n\n**[15]** Srivastava, Nitish, et al. \"**Dropout: a simple way to prevent neural networks from overfitting**.\" Journal of Machine Learning Research 15.1 (2014): 1929-1958. [[pdf]](https:\u002F\u002Fwww.cs.toronto.edu\u002F~hinton\u002Fabsps\u002FJMLRdropout.pdf) :star::star::star:\n\n**[16]** Ioffe, Sergey, and Christian Szegedy. \"**Batch normalization: Accelerating deep network training by reducing internal covariate shift**.\" arXiv preprint arXiv:1502.03167 (2015). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1502.03167) **(An outstanding Work in 2015)** :star::star::star::star:\n\n**[17]** Ba, Jimmy Lei, Jamie Ryan Kiros, and Geoffrey E. Hinton. \"**Layer normalization**.\" arXiv preprint arXiv:1607.06450 (2016). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1607.06450.pdf?utm_source=sciontist.com&utm_medium=refer&utm_campaign=promote) **(Update of Batch Normalization)** :star::star::star::star:\n\n**[18]** Courbariaux, Matthieu, et al. \"**Binarized Neural Networks: Training Neural Networks with Weights and Activations Constrained to+ 1 or−1**.\" [[pdf]](https:\u002F\u002Fpdfs.semanticscholar.org\u002Ff832\u002Fb16cb367802609d91d400085eb87d630212a.pdf) **(New Model,Fast)**  :star::star::star:\n\n**[19]** Jaderberg, Max, et al. \"**Decoupled neural interfaces using synthetic gradients**.\" arXiv preprint arXiv:1608.05343 (2016). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1608.05343) **(Innovation of Training Method,Amazing Work)** :star::star::star::star::star:\n\n**[20]** Chen, Tianqi, Ian Goodfellow, and Jonathon Shlens. \"Net2net: Accelerating learning via knowledge transfer.\" arXiv preprint arXiv:1511.05641 (2015). [[pdf]](https:\u002F\u002Farxiv.org\u002Fabs\u002F1511.05641) **(Modify previously trained network to reduce training epochs)** :star::star::star:\n\n**[21]** Wei, Tao, et al. \"Network Morphism.\" arXiv preprint arXiv:1603.01670 (2016). [[pdf]](https:\u002F\u002Farxiv.org\u002Fabs\u002F1603.01670) **(Modify previously trained network to reduce training epochs)** :star::star::star:\n\n## 2.2 Optimization\n\n**[22]** Sutskever, Ilya, et al. \"**On the importance of initialization and momentum in deep learning**.\" ICML (3) 28 (2013): 1139-1147. [[pdf]](http:\u002F\u002Fwww.jmlr.org\u002Fproceedings\u002Fpapers\u002Fv28\u002Fsutskever13.pdf) **(Momentum optimizer)** :star::star:\n\n**[23]** Kingma, Diederik, and Jimmy Ba. \"**Adam: A method for stochastic optimization**.\" arXiv preprint arXiv:1412.6980 (2014). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1412.6980) **(Maybe used most often currently)** :star::star::star:\n\n**[24]** Andrychowicz, Marcin, et al. \"**Learning to learn by gradient descent by gradient descent**.\" arXiv preprint arXiv:1606.04474 (2016). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1606.04474) **(Neural Optimizer,Amazing Work)** :star::star::star::star::star:\n\n**[25]** Han, Song, Huizi Mao, and William J. Dally. \"**Deep compression: Compressing deep neural network with pruning, trained quantization and huffman coding**.\" CoRR, abs\u002F1510.00149 2 (2015). [[pdf]](https:\u002F\u002Fpdfs.semanticscholar.org\u002F5b6c\u002F9dda1d88095fa4aac1507348e498a1f2e863.pdf) **(ICLR best paper, new direction to make NN running fast,DeePhi Tech Startup)** :star::star::star::star::star:\n\n**[26]** Iandola, Forrest N., et al. \"**SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and\u003C 1MB model size**.\" arXiv preprint arXiv:1602.07360 (2016). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1602.07360) **(Also a new direction to optimize NN,DeePhi Tech Startup)** :star::star::star::star:\n\n**[27]** Glorat Xavier, Bengio Yoshua, et al. \"**Understanding the difficulty of training deep forward neural networks**.\" Proceedings of the thirteenth International Conference on Artificial Intelligence and Statistics, PMLR 9:249-256,2010. [[pdf]](http:\u002F\u002Fproceedings.mlr.press\u002Fv9\u002Fglorot10a\u002Fglorot10a.pdf) :star::star::star::star:\n\n## 2.3 Unsupervised Learning \u002F Deep Generative Model\n\n**[28]** Le, Quoc V. \"**Building high-level features using large scale unsupervised learning**.\" 2013 IEEE international conference on acoustics, speech and signal processing. IEEE, 2013. [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1112.6209.pdf&embed) **(Milestone, Andrew Ng, Google Brain Project, Cat)** :star::star::star::star:\n\n\n**[29]** Kingma, Diederik P., and Max Welling. \"**Auto-encoding variational bayes**.\" arXiv preprint arXiv:1312.6114 (2013). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1312.6114) **(VAE)** :star::star::star::star:\n\n**[30]** Goodfellow, Ian, et al. \"**Generative adversarial nets**.\" Advances in Neural Information Processing Systems. 2014. [[pdf]](http:\u002F\u002Fpapers.nips.cc\u002Fpaper\u002F5423-generative-adversarial-nets.pdf) **(GAN,super cool idea)** :star::star::star::star::star:\n\n**[31]** Radford, Alec, Luke Metz, and Soumith Chintala. \"**Unsupervised representation learning with deep convolutional generative adversarial networks**.\" arXiv preprint arXiv:1511.06434 (2015). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.06434) **(DCGAN)** :star::star::star::star:\n\n**[32]** Gregor, Karol, et al. \"**DRAW: A recurrent neural network for image generation**.\" arXiv preprint arXiv:1502.04623 (2015). [[pdf]](http:\u002F\u002Fjmlr.org\u002Fproceedings\u002Fpapers\u002Fv37\u002Fgregor15.pdf) **(VAE with attention, outstanding work)** :star::star::star::star::star:\n\n**[33]** Oord, Aaron van den, Nal Kalchbrenner, and Koray Kavukcuoglu. \"**Pixel recurrent neural networks**.\" arXiv preprint arXiv:1601.06759 (2016). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1601.06759) **(PixelRNN)** :star::star::star::star:\n\n**[34]** Oord, Aaron van den, et al. \"Conditional image generation with PixelCNN decoders.\" arXiv preprint arXiv:1606.05328 (2016). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1606.05328) **(PixelCNN)** :star::star::star::star:\n\n**[34]** S. Mehri et al., \"**SampleRNN: An Unconditional End-to-End Neural Audio Generation Model**.\" arXiv preprint \tarXiv:1612.07837 (2016). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1612.07837.pdf) :star::star::star::star::star:\n\n## 2.4 RNN \u002F Sequence-to-Sequence Model\n\n**[35]** Graves, Alex. \"**Generating sequences with recurrent neural networks**.\" arXiv preprint arXiv:1308.0850 (2013). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1308.0850) **(LSTM, very nice generating result, show the power of RNN)** :star::star::star::star:\n\n**[36]** Cho, Kyunghyun, et al. \"**Learning phrase representations using RNN encoder-decoder for statistical machine translation**.\" arXiv preprint arXiv:1406.1078 (2014). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1406.1078) **(First Seq-to-Seq Paper)** :star::star::star::star:\n\n**[37]** Sutskever, Ilya, Oriol Vinyals, and Quoc V. Le. \"**Sequence to sequence learning with neural networks**.\" Advances in neural information processing systems. 2014. [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1409.3215.pdf) **(Outstanding Work)** :star::star::star::star::star:\n\n**[38]** Bahdanau, Dzmitry, KyungHyun Cho, and Yoshua Bengio. \"**Neural Machine Translation by Jointly Learning to Align and Translate**.\" arXiv preprint arXiv:1409.0473 (2014). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1409.0473v7.pdf) :star::star::star::star:\n\n**[39]** Vinyals, Oriol, and Quoc Le. \"**A neural conversational model**.\" arXiv preprint arXiv:1506.05869 (2015). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.05869.pdf%20(http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.05869.pdf)) **(Seq-to-Seq on Chatbot)** :star::star::star:\n\n## 2.5 Neural Turing Machine\n\n**[40]** Graves, Alex, Greg Wayne, and Ivo Danihelka. \"**Neural turing machines**.\" arXiv preprint arXiv:1410.5401 (2014). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1410.5401.pdf) **(Basic Prototype of Future Computer)** :star::star::star::star::star:\n\n**[41]** Zaremba, Wojciech, and Ilya Sutskever. \"**Reinforcement learning neural Turing machines**.\" arXiv preprint arXiv:1505.00521 362 (2015). [[pdf]](https:\u002F\u002Fpdfs.semanticscholar.org\u002Ff10e\u002F071292d593fef939e6ef4a59baf0bb3a6c2b.pdf) :star::star::star:\n\n**[42]** Weston, Jason, Sumit Chopra, and Antoine Bordes. \"**Memory networks**.\" arXiv preprint arXiv:1410.3916 (2014). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1410.3916) :star::star::star:\n\n\n**[43]** Sukhbaatar, Sainbayar, Jason Weston, and Rob Fergus. \"**End-to-end memory networks**.\" Advances in neural information processing systems. 2015. [[pdf]](http:\u002F\u002Fpapers.nips.cc\u002Fpaper\u002F5846-end-to-end-memory-networks.pdf) :star::star::star::star:\n\n**[44]** Vinyals, Oriol, Meire Fortunato, and Navdeep Jaitly. \"**Pointer networks**.\" Advances in Neural Information Processing Systems. 2015. [[pdf]](http:\u002F\u002Fpapers.nips.cc\u002Fpaper\u002F5866-pointer-networks.pdf) :star::star::star::star:\n\n**[45]** Graves, Alex, et al. \"**Hybrid computing using a neural network with dynamic external memory**.\" Nature (2016). [[pdf]](https:\u002F\u002Fwww.dropbox.com\u002Fs\u002F0a40xi702grx3dq\u002F2016-graves.pdf) **(Milestone,combine above papers' ideas)** :star::star::star::star::star:\n\n## 2.6 Deep Reinforcement Learning\n\n**[46]** Mnih, Volodymyr, et al. \"**Playing atari with deep reinforcement learning**.\" arXiv preprint arXiv:1312.5602 (2013). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1312.5602.pdf)) **(First Paper named deep reinforcement learning)** :star::star::star::star:\n\n**[47]** Mnih, Volodymyr, et al. \"**Human-level control through deep reinforcement learning**.\" Nature 518.7540 (2015): 529-533. [[pdf]](https:\u002F\u002Fstorage.googleapis.com\u002Fdeepmind-data\u002Fassets\u002Fpapers\u002FDeepMindNature14236Paper.pdf) **(Milestone)** :star::star::star::star::star:\n\n**[48]** Wang, Ziyu, Nando de Freitas, and Marc Lanctot. \"**Dueling network architectures for deep reinforcement learning**.\" arXiv preprint arXiv:1511.06581 (2015). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.06581) **(ICLR best paper,great idea)**  :star::star::star::star:\n\n**[49]** Mnih, Volodymyr, et al. \"**Asynchronous methods for deep reinforcement learning**.\" arXiv preprint arXiv:1602.01783 (2016). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1602.01783) **(State-of-the-art method)** :star::star::star::star::star:\n\n**[50]** Lillicrap, Timothy P., et al. \"**Continuous control with deep reinforcement learning**.\" arXiv preprint arXiv:1509.02971 (2015). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1509.02971) **(DDPG)** :star::star::star::star:\n\n**[51]** Gu, Shixiang, et al. \"**Continuous Deep Q-Learning with Model-based Acceleration**.\" arXiv preprint arXiv:1603.00748 (2016). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1603.00748) **(NAF)** :star::star::star::star:\n\n**[52]** Schulman, John, et al. \"**Trust region policy optimization**.\" CoRR, abs\u002F1502.05477 (2015). [[pdf]](http:\u002F\u002Fwww.jmlr.org\u002Fproceedings\u002Fpapers\u002Fv37\u002Fschulman15.pdf) **(TRPO)** :star::star::star::star:\n\n**[53]** Silver, David, et al. \"**Mastering the game of Go with deep neural networks and tree search**.\" Nature 529.7587 (2016): 484-489. [[pdf]](http:\u002F\u002Fwillamette.edu\u002F~levenick\u002Fcs448\u002FgoNature.pdf) **(AlphaGo)** :star::star::star::star::star:\n\n## 2.7 Deep Transfer Learning \u002F Lifelong Learning \u002F especially for RL\n\n**[54]** Bengio, Yoshua. \"**Deep Learning of Representations for Unsupervised and Transfer Learning**.\" ICML Unsupervised and Transfer Learning 27 (2012): 17-36. [[pdf]](http:\u002F\u002Fwww.jmlr.org\u002Fproceedings\u002Fpapers\u002Fv27\u002Fbengio12a\u002Fbengio12a.pdf) **(A Tutorial)** :star::star::star:\n\n**[55]** Silver, Daniel L., Qiang Yang, and Lianghao Li. \"**Lifelong Machine Learning Systems: Beyond Learning Algorithms**.\" AAAI Spring Symposium: Lifelong Machine Learning. 2013. [[pdf]](http:\u002F\u002Fciteseerx.ist.psu.edu\u002Fviewdoc\u002Fdownload?doi=10.1.1.696.7800&rep=rep1&type=pdf) **(A brief discussion about lifelong learning)**  :star::star::star:\n\n**[56]** Hinton, Geoffrey, Oriol Vinyals, and Jeff Dean. \"**Distilling the knowledge in a neural network**.\" arXiv preprint arXiv:1503.02531 (2015). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1503.02531) **(Godfather's Work)** :star::star::star::star:\n\n**[57]** Rusu, Andrei A., et al. \"**Policy distillation**.\" arXiv preprint arXiv:1511.06295 (2015). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.06295) **(RL domain)** :star::star::star:\n\n**[58]** Parisotto, Emilio, Jimmy Lei Ba, and Ruslan Salakhutdinov. \"**Actor-mimic: Deep multitask and transfer reinforcement learning**.\" arXiv preprint arXiv:1511.06342 (2015). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.06342) **(RL domain)** :star::star::star:\n\n**[59]** Rusu, Andrei A., et al. \"**Progressive neural networks**.\" arXiv preprint arXiv:1606.04671 (2016). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1606.04671) **(Outstanding Work, A novel idea)** :star::star::star::star::star:\n\n\n## 2.8 One Shot Deep Learning\n\n**[60]** Lake, Brenden M., Ruslan Salakhutdinov, and Joshua B. Tenenbaum. \"**Human-level concept learning through probabilistic program induction**.\" Science 350.6266 (2015): 1332-1338. [[pdf]](http:\u002F\u002Fclm.utexas.edu\u002Fcompjclub\u002Fwp-content\u002Fuploads\u002F2016\u002F02\u002Flake2015.pdf) **(No Deep Learning,but worth reading)** :star::star::star::star::star:\n\n**[61]** Koch, Gregory, Richard Zemel, and Ruslan Salakhutdinov. \"**Siamese Neural Networks for One-shot Image Recognition**.\"(2015) [[pdf]](http:\u002F\u002Fwww.cs.utoronto.ca\u002F~gkoch\u002Ffiles\u002Fmsc-thesis.pdf) :star::star::star:\n\n**[62]** Santoro, Adam, et al. \"**One-shot Learning with Memory-Augmented Neural Networks**.\" arXiv preprint arXiv:1605.06065 (2016). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1605.06065) **(A basic step to one shot learning)** :star::star::star::star:\n\n**[63]** Vinyals, Oriol, et al. \"**Matching Networks for One Shot Learning**.\" arXiv preprint arXiv:1606.04080 (2016). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1606.04080) :star::star::star:\n\n**[64]** Hariharan, Bharath, and Ross Girshick. \"**Low-shot visual object recognition**.\" arXiv preprint arXiv:1606.02819 (2016). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1606.02819) **(A step to large data)** :star::star::star::star:\n\n\n# 3 Applications\n\n## 3.1 NLP(Natural Language Processing)\n\n**[1]** Antoine Bordes, et al. \"**Joint Learning of Words and Meaning Representations for Open-Text Semantic Parsing**.\" AISTATS(2012) [[pdf]](https:\u002F\u002Fwww.hds.utc.fr\u002F~bordesan\u002Fdokuwiki\u002Flib\u002Fexe\u002Ffetch.php?id=en%3Apubli&cache=cache&media=en:bordes12aistats.pdf) :star::star::star::star:\n\n**[2]** Mikolov, et al. \"**Distributed representations of words and phrases and their compositionality**.\" ANIPS(2013): 3111-3119 [[pdf]](http:\u002F\u002Fpapers.nips.cc\u002Fpaper\u002F5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf) **(word2vec)** :star::star::star:\n\n**[3]** Sutskever, et al. \"**“Sequence to sequence learning with neural networks**.\" ANIPS(2014) [[pdf]](http:\u002F\u002Fpapers.nips.cc\u002Fpaper\u002F5346-sequence-to-sequence-learning-with-neural-networks.pdf) :star::star::star:\n\n**[4]** Ankit Kumar, et al. \"**“Ask Me Anything: Dynamic Memory Networks for Natural Language Processing**.\" arXiv preprint arXiv:1506.07285(2015) [[pdf]](https:\u002F\u002Farxiv.org\u002Fabs\u002F1506.07285) :star::star::star::star:\n\n**[5]** Yoon Kim, et al. \"**Character-Aware Neural Language Models**.\" NIPS(2015) arXiv preprint arXiv:1508.06615(2015) [[pdf]](https:\u002F\u002Farxiv.org\u002Fabs\u002F1508.06615) :star::star::star::star:\n\n**[6]** Jason Weston, et al. \"**Towards AI-Complete Question Answering: A Set of Prerequisite Toy Tasks**.\" arXiv preprint arXiv:1502.05698(2015) [[pdf]](https:\u002F\u002Farxiv.org\u002Fabs\u002F1502.05698) **(bAbI tasks)** :star::star::star:\n\n**[7]** Karl Moritz Hermann, et al. \"**Teaching Machines to Read and Comprehend**.\" arXiv preprint arXiv:1506.03340(2015) [[pdf]](https:\u002F\u002Farxiv.org\u002Fabs\u002F1506.03340) **(CNN\u002FDailyMail cloze style questions)** :star::star:\n\n**[8]** Alexis Conneau, et al. \"**Very Deep Convolutional Networks for Natural Language Processing**.\" arXiv preprint arXiv:1606.01781(2016) [[pdf]](https:\u002F\u002Farxiv.org\u002Fabs\u002F1606.01781) **(state-of-the-art in text classification)** :star::star::star:\n\n**[9]** Armand Joulin, et al. \"**Bag of Tricks for Efficient Text Classification**.\" arXiv preprint arXiv:1607.01759(2016) [[pdf]](https:\u002F\u002Farxiv.org\u002Fabs\u002F1607.01759) **(slightly worse than state-of-the-art, but a lot faster)** :star::star::star:\n\n## 3.2 Object Detection\n\n**[1]** Szegedy, Christian, Alexander Toshev, and Dumitru Erhan. \"**Deep neural networks for object detection**.\" Advances in Neural Information Processing Systems. 2013. [[pdf]](http:\u002F\u002Fpapers.nips.cc\u002Fpaper\u002F5207-deep-neural-networks-for-object-detection.pdf) :star::star::star:\n\n**[2]** Girshick, Ross, et al. \"**Rich feature hierarchies for accurate object detection and semantic segmentation**.\" Proceedings of the IEEE conference on computer vision and pattern recognition. 2014. [[pdf]](http:\u002F\u002Fwww.cv-foundation.org\u002Fopenaccess\u002Fcontent_cvpr_2014\u002Fpapers\u002FGirshick_Rich_Feature_Hierarchies_2014_CVPR_paper.pdf) **(RCNN)** :star::star::star::star::star:\n\n**[3]** He, Kaiming, et al. \"**Spatial pyramid pooling in deep convolutional networks for visual recognition**.\" European Conference on Computer Vision. Springer International Publishing, 2014. [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1406.4729) **(SPPNet)** :star::star::star::star:\n\n**[4]** Girshick, Ross. \"**Fast r-cnn**.\" Proceedings of the IEEE International Conference on Computer Vision. 2015. [[pdf]](https:\u002F\u002Fpdfs.semanticscholar.org\u002F8f67\u002F64a59f0d17081f2a2a9d06f4ed1cdea1a0ad.pdf) :star::star::star::star:\n\n**[5]** Ren, Shaoqing, et al. \"**Faster R-CNN: Towards real-time object detection with region proposal networks**.\" Advances in neural information processing systems. 2015. [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.01497.pdf) :star::star::star::star:\n\n**[6]** Redmon, Joseph, et al. \"**You only look once: Unified, real-time object detection**.\" arXiv preprint arXiv:1506.02640 (2015). [[pdf]](http:\u002F\u002Fhomes.cs.washington.edu\u002F~ali\u002Fpapers\u002FYOLO.pdf) **(YOLO,Oustanding Work, really practical)** :star::star::star::star::star:\n\n**[7]** Liu, Wei, et al. \"**SSD: Single Shot MultiBox Detector**.\" arXiv preprint arXiv:1512.02325 (2015). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1512.02325) :star::star::star:\n\n**[8]** Dai, Jifeng, et al. \"**R-FCN: Object Detection via\nRegion-based Fully Convolutional Networks**.\" arXiv preprint arXiv:1605.06409 (2016). [[pdf]](https:\u002F\u002Farxiv.org\u002Fabs\u002F1605.06409) :star::star::star::star:\n\n**[9]** He, Gkioxari, et al. \"**Mask R-CNN**\" arXiv preprint arXiv:1703.06870 (2017). [[pdf]](https:\u002F\u002Farxiv.org\u002Fabs\u002F1703.06870) :star::star::star::star:\n\n**[10]** Bochkovskiy, Alexey, et al. \"**YOLOv4: Optimal Speed and Accuracy of Object Detection.**\"  arXiv preprint arXiv:2004.10934 (2020). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2004.10934) :star::star::star::star:\n\n\n**[11]** Tan, Mingxing, et al. “**EfficientDet: Scalable and Efficient Object Detection.**\" arXiv preprint arXiv:1911.09070 (2019). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1911.09070) :star::star::star::star::star:\n\n\n## 3.3 Visual Tracking\n\n**[1]** Wang, Naiyan, and Dit-Yan Yeung. \"**Learning a deep compact image representation for visual tracking**.\" Advances in neural information processing systems. 2013. [[pdf]](http:\u002F\u002Fpapers.nips.cc\u002Fpaper\u002F5192-learning-a-deep-compact-image-representation-for-visual-tracking.pdf) **(First Paper to do visual tracking using Deep Learning,DLT Tracker)** :star::star::star:\n\n**[2]** Wang, Naiyan, et al. \"**Transferring rich feature hierarchies for robust visual tracking**.\" arXiv preprint arXiv:1501.04587 (2015). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1501.04587) **(SO-DLT)** :star::star::star::star:\n\n**[3]** Wang, Lijun, et al. \"**Visual tracking with fully convolutional networks**.\" Proceedings of the IEEE International Conference on Computer Vision. 2015. [[pdf]](http:\u002F\u002Fwww.cv-foundation.org\u002Fopenaccess\u002Fcontent_iccv_2015\u002Fpapers\u002FWang_Visual_Tracking_With_ICCV_2015_paper.pdf) **(FCNT)** :star::star::star::star:\n\n**[4]** Held, David, Sebastian Thrun, and Silvio Savarese. \"**Learning to Track at 100 FPS with Deep Regression Networks**.\" arXiv preprint arXiv:1604.01802 (2016). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1604.01802) **(GOTURN,Really fast as a deep learning method,but still far behind un-deep-learning methods)** :star::star::star::star:\n\n**[5]** Bertinetto, Luca, et al. \"**Fully-Convolutional Siamese Networks for Object Tracking**.\" arXiv preprint arXiv:1606.09549 (2016). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1606.09549) **(SiameseFC,New state-of-the-art for real-time object tracking)** :star::star::star::star:\n\n**[6]** Martin Danelljan, Andreas Robinson, Fahad Khan, Michael Felsberg. \"**Beyond Correlation Filters: Learning Continuous Convolution Operators for Visual Tracking**.\" ECCV (2016) [[pdf]](http:\u002F\u002Fwww.cvl.isy.liu.se\u002Fresearch\u002Fobjrec\u002Fvisualtracking\u002Fconttrack\u002FC-COT_ECCV16.pdf) **(C-COT)** :star::star::star::star:\n\n**[7]** Nam, Hyeonseob, Mooyeol Baek, and Bohyung Han. \"**Modeling and Propagating CNNs in a Tree Structure for Visual Tracking**.\" arXiv preprint arXiv:1608.07242 (2016). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1608.07242) **(VOT2016 Winner,TCNN)** :star::star::star::star:\n\n## 3.4 Image Caption\n**[1]** Farhadi,Ali,etal. \"**Every picture tells a story: Generating sentences from images**\". In Computer VisionECCV 2010. Springer Berlin Heidelberg:15-29, 2010. [[pdf]](https:\u002F\u002Fwww.cs.cmu.edu\u002F~afarhadi\u002Fpapers\u002Fsentence.pdf) :star::star::star:\n\n**[2]** Kulkarni, Girish, et al. \"**Baby talk: Understanding and generating image descriptions**\". In Proceedings of the 24th CVPR, 2011. [[pdf]](http:\u002F\u002Ftamaraberg.com\u002Fpapers\u002Fgeneration_cvpr11.pdf):star::star::star::star:\n\n**[3]** Vinyals, Oriol, et al. \"**Show and tell: A neural image caption generator**\". In arXiv preprint arXiv:1411.4555, 2014. [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1411.4555.pdf):star::star::star:\n\n**[4]** Donahue, Jeff, et al. \"**Long-term recurrent convolutional networks for visual recognition and description**\". In arXiv preprint arXiv:1411.4389 ,2014. [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1411.4389.pdf)\n\n**[5]** Karpathy, Andrej, and Li Fei-Fei. \"**Deep visual-semantic alignments for generating image descriptions**\". In arXiv preprint arXiv:1412.2306, 2014. [[pdf]](https:\u002F\u002Fcs.stanford.edu\u002Fpeople\u002Fkarpathy\u002Fcvpr2015.pdf):star::star::star::star::star:\n\n**[6]** Karpathy, Andrej, Armand Joulin, and Fei Fei F. Li. \"**Deep fragment embeddings for bidirectional image sentence mapping**\". In Advances in neural information processing systems, 2014. [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1406.5679v1.pdf):star::star::star::star:\n\n**[7]** Fang, Hao, et al. \"**From captions to visual concepts and back**\". In arXiv preprint arXiv:1411.4952, 2014. [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1411.4952v3.pdf):star::star::star::star::star:\n\n**[8]** Chen, Xinlei, and C. Lawrence Zitnick. \"**Learning a recurrent visual representation for image caption generation**\". In arXiv preprint arXiv:1411.5654, 2014. [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1411.5654v1.pdf):star::star::star::star:\n\n**[9]** Mao, Junhua, et al. \"**Deep captioning with multimodal recurrent neural networks (m-rnn)**\". In arXiv preprint arXiv:1412.6632, 2014. [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1412.6632v5.pdf):star::star::star:\n\n**[10]** Xu, Kelvin, et al. \"**Show, attend and tell: Neural image caption generation with visual attention**\". In arXiv preprint arXiv:1502.03044, 2015. [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1502.03044v3.pdf):star::star::star::star::star:\n\n## 3.5 Machine Translation\n\n> Some milestone papers are listed in RNN \u002F Seq-to-Seq topic.\n\n**[1]** Luong, Minh-Thang, et al. \"**Addressing the rare word problem in neural machine translation**.\" arXiv preprint arXiv:1410.8206 (2014). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1410.8206) :star::star::star::star:\n\n\n**[2]** Sennrich, et al. \"**Neural Machine Translation of Rare Words with Subword Units**\". In arXiv preprint arXiv:1508.07909, 2015. [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1508.07909.pdf):star::star::star:\n\n**[3]** Luong, Minh-Thang, Hieu Pham, and Christopher D. Manning. \"**Effective approaches to attention-based neural machine translation**.\" arXiv preprint arXiv:1508.04025 (2015). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1508.04025) :star::star::star::star:\n\n**[4]** Chung, et al. \"**A Character-Level Decoder without Explicit Segmentation for Neural Machine Translation**\". In arXiv preprint arXiv:1603.06147, 2016. [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1603.06147.pdf):star::star:\n\n**[5]** Lee, et al. \"**Fully Character-Level Neural Machine Translation without Explicit Segmentation**\". In arXiv preprint arXiv:1610.03017, 2016. [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1610.03017.pdf):star::star::star::star::star:\n\n**[6]** Wu, Schuster, Chen, Le, et al. \"**Google's Neural Machine Translation System: Bridging the Gap between Human and Machine Translation**\". In arXiv preprint arXiv:1609.08144v2, 2016. [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1609.08144v2.pdf) **(Milestone)** :star::star::star::star:\n\n## 3.6 Robotics\n\n**[1]** Koutník, Jan, et al. \"**Evolving large-scale neural networks for vision-based reinforcement learning**.\" Proceedings of the 15th annual conference on Genetic and evolutionary computation. ACM, 2013. [[pdf]](http:\u002F\u002Frepository.supsi.ch\u002F4550\u002F1\u002Fkoutnik2013gecco.pdf) :star::star::star:\n\n**[2]** Levine, Sergey, et al. \"**End-to-end training of deep visuomotor policies**.\" Journal of Machine Learning Research 17.39 (2016): 1-40. [[pdf]](http:\u002F\u002Fwww.jmlr.org\u002Fpapers\u002Fvolume17\u002F15-522\u002F15-522.pdf) :star::star::star::star::star:\n\n**[3]** Pinto, Lerrel, and Abhinav Gupta. \"**Supersizing self-supervision: Learning to grasp from 50k tries and 700 robot hours**.\" arXiv preprint arXiv:1509.06825 (2015). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1509.06825) :star::star::star:\n\n**[4]** Levine, Sergey, et al. \"**Learning Hand-Eye Coordination for Robotic Grasping with Deep Learning and Large-Scale Data Collection**.\" arXiv preprint arXiv:1603.02199 (2016). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1603.02199) :star::star::star::star:\n\n**[5]** Zhu, Yuke, et al. \"**Target-driven Visual Navigation in Indoor Scenes using Deep Reinforcement Learning**.\" arXiv preprint arXiv:1609.05143 (2016). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1609.05143) :star::star::star::star:\n\n**[6]** Yahya, Ali, et al. \"**Collective Robot Reinforcement Learning with Distributed Asynchronous Guided Policy Search**.\" arXiv preprint arXiv:1610.00673 (2016). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1610.00673) :star::star::star::star:\n\n**[7]** Gu, Shixiang, et al. \"**Deep Reinforcement Learning for Robotic Manipulation**.\" arXiv preprint arXiv:1610.00633 (2016). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1610.00633) :star::star::star::star:\n\n**[8]** A Rusu, M Vecerik, Thomas Rothörl, N Heess, R Pascanu, R Hadsell.\"**Sim-to-Real Robot Learning from Pixels with Progressive Nets**.\" arXiv preprint arXiv:1610.04286 (2016). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1610.04286.pdf) :star::star::star::star:\n\n**[9]** Mirowski, Piotr, et al. \"**Learning to navigate in complex environments**.\" arXiv preprint arXiv:1611.03673 (2016). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1611.03673) :star::star::star::star:\n\n## 3.7 Art\n\n**[1]** Mordvintsev, Alexander; Olah, Christopher; Tyka, Mike (2015). \"**Inceptionism: Going Deeper into Neural Networks**\". Google Research. [[html]](https:\u002F\u002Fresearch.googleblog.com\u002F2015\u002F06\u002Finceptionism-going-deeper-into-neural.html) **(Deep Dream)**\n:star::star::star::star:\n\n**[2]** Gatys, Leon A., Alexander S. Ecker, and Matthias Bethge. \"**A neural algorithm of artistic style**.\" arXiv preprint arXiv:1508.06576 (2015). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1508.06576) **(Outstanding Work, most successful method currently)** :star::star::star::star::star:\n\n**[3]** Zhu, Jun-Yan, et al. \"**Generative Visual Manipulation on the Natural Image Manifold**.\" European Conference on Computer Vision. Springer International Publishing, 2016. [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1609.03552) **(iGAN)** :star::star::star::star:\n\n**[4]** Champandard, Alex J. \"**Semantic Style Transfer and Turning Two-Bit Doodles into Fine Artworks**.\" arXiv preprint arXiv:1603.01768 (2016). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1603.01768) **(Neural Doodle)** :star::star::star::star:\n\n**[5]** Zhang, Richard, Phillip Isola, and Alexei A. Efros. \"**Colorful Image Colorization**.\" arXiv preprint arXiv:1603.08511 (2016). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1603.08511) :star::star::star::star:\n\n**[6]** Johnson, Justin, Alexandre Alahi, and Li Fei-Fei. \"**Perceptual losses for real-time style transfer and super-resolution**.\" arXiv preprint arXiv:1603.08155 (2016). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1603.08155.pdf) :star::star::star::star:\n\n**[7]** Vincent Dumoulin, Jonathon Shlens and Manjunath Kudlur. \"**A learned representation for artistic style**.\" arXiv preprint arXiv:1610.07629 (2016). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1610.07629v1.pdf) :star::star::star::star:\n\n**[8]** Gatys, Leon and Ecker, et al.\"**Controlling Perceptual Factors in Neural Style Transfer**.\" arXiv preprint arXiv:1611.07865 (2016). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1611.07865.pdf) **(control style transfer over spatial location,colour information and across spatial scale)**:star::star::star::star:\n\n**[9]** Ulyanov, Dmitry and Lebedev, Vadim, et al. \"**Texture Networks: Feed-forward Synthesis of Textures and Stylized Images**.\" arXiv preprint arXiv:1603.03417(2016). [[pdf]](http:\u002F\u002Farxiv.org\u002Fabs\u002F1603.03417) **(texture generation and style transfer)** :star::star::star::star:\n\n**[10]** Yijun Li, Ming-Yu Liu ,Xueting Li, Ming-Hsuan Yang,Jan Kautz (NVIDIA). \"**A Closed-form Solution to Photorealistic Image Stylization**.\" arXiv preprint arXiv:1802.06474(2018). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1802.06474.pdf) **(Very fast and ultra realistic style transfer)** :star::star::star::star:\n\n## 3.8 Object Segmentation\n\n**[1]** J. Long, E. Shelhamer, and T. Darrell, “**Fully convolutional networks for semantic segmentation**.” in CVPR, 2015. [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1411.4038v2.pdf) :star::star::star::star::star:\n\n**[2]** L.-C. Chen, G. Papandreou, I. Kokkinos, K. Murphy, and A. L. Yuille. \"**Semantic image segmentation with deep convolutional nets and fully connected crfs**.\" In ICLR, 2015. [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1606.00915v1.pdf) :star::star::star::star::star:\n\n**[3]** Pinheiro, P.O., Collobert, R., Dollar, P. \"**Learning to segment object candidates.**\" In: NIPS. 2015. [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.06204v2.pdf) :star::star::star::star:\n\n**[4]** Dai, J., He, K., Sun, J. \"**Instance-aware semantic segmentation via multi-task network cascades**.\" in CVPR. 2016 [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1512.04412v1.pdf) :star::star::star:\n\n**[5]** Dai, J., He, K., Sun, J. \"**Instance-sensitive Fully Convolutional Networks**.\" arXiv preprint arXiv:1603.08678 (2016). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1603.08678v1.pdf) :star::star::star:\n\n\n","# 深度学习论文阅读路线图\n\n>如果你是深度学习领域的初学者，你可能会问的第一个问题是：“我应该从哪篇论文开始读起呢？”\n\n>这里有一份深度学习论文的阅读路线图！\n\n该路线图按照以下四条原则构建：\n\n- 由概览到细节\n- 由经典到最前沿\n- 由通用领域到特定方向\n- 聚焦最前沿\n\n你会发现许多非常新的论文，但都非常值得一读。\n\n我会继续向这份路线图中添加更多论文。\n\n\n---------------------------------------\n\n# 1 深度学习历史与基础\n\n## 1.0 书籍\n\n**[0]** Bengio, Yoshua, Ian J. Goodfellow, and Aaron Courville. \"**Deep learning**.\" 麻省理工学院出版社出版。（2015年）[[html]](http:\u002F\u002Fwww.deeplearningbook.org\u002F) **（深度学习圣经，你可以在阅读后续论文的同时阅读此书。）** :star::star::star::star::star:\n\n## 1.1 综述\n\n**[1]** LeCun, Yann, Yoshua Bengio, and Geoffrey Hinton. \"**Deep learning**.\" Nature 521.7553 (2015): 436-444. [[pdf]](http:\u002F\u002Fwww.cs.toronto.edu\u002F~hinton\u002Fabsps\u002FNatureDeepReview.pdf) **（三位巨头的综述）** :star::star::star::star::star:\n\n## 1.2 深度信念网络(DBN)（深度学习黎明期的里程碑）\n\n**[2]** Hinton, Geoffrey E., Simon Osindero, and Yee-Whye Teh. \"**A fast learning algorithm for deep belief nets**.\" Neural computation 18.7 (2006): 1527-1554. [[pdf]](http:\u002F\u002Fwww.cs.toronto.edu\u002F~hinton\u002Fabsps\u002Fncfast.pdf)**（深度学习黎明期）** :star::star::star:\n\n**[3]** Hinton, Geoffrey E., and Ruslan R. Salakhutdinov. \"**Reducing the dimensionality of data with neural networks**.\" Science 313.5786 (2006): 504-507. [[pdf]](http:\u002F\u002Fwww.cs.toronto.edu\u002F~hinton\u002Fscience.pdf) **（里程碑，展示了深度学习的潜力）** :star::star::star:\n\n## 1.3 ImageNet发展史（深度学习由此爆发）\n\n**[4]** Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton. \"**Imagenet classification with deep convolutional neural networks**.\" 神经信息处理系统进展。2012年。[[pdf]](http:\u002F\u002Fpapers.nips.cc\u002Fpaper\u002F4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf) **（AlexNet，深度学习的突破）** :star::star::star::star::star:\n\n**[5]** Simonyan, Karen, and Andrew Zisserman. \"**Very deep convolutional networks for large-scale image recognition**.\" arXiv预印本 arXiv:1409.1556 (2014). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1409.1556.pdf) **（VGGNet，神经网络变得非常深！）** :star::star::star:\n\n**[6]** Szegedy, Christian, et al. \"**Going deeper with convolutions**.\" IEEE计算机视觉与模式识别会议论文集。2015年。[[pdf]](http:\u002F\u002Fwww.cv-foundation.org\u002Fopenaccess\u002Fcontent_cvpr_2015\u002Fpapers\u002FSzegedy_Going_Deeper_With_2015_CVPR_paper.pdf) **（GoogLeNet）** :star::star::star:\n\n**[7]** He, Kaiming, et al. \"**Deep residual learning for image recognition**.\" arXiv预印本 arXiv:1512.03385 (2015). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1512.03385.pdf) **（ResNet，非常非常深的网络，CVPR最佳论文）** :star::star::star::star::star:\n\n## 1.4 语音识别发展史\n\n**[8]** Hinton, Geoffrey, et al. \"**Deep neural networks for acoustic modeling in speech recognition: The shared views of four research groups**.\" IEEE信号处理杂志 29.6 (2012): 82-97. [[pdf]](http:\u002F\u002Fcs224d.stanford.edu\u002Fpapers\u002Fmaas_paper.pdf) **（语音识别领域的突破）** :star::star::star::star:\n\n**[9]** Graves, Alex, Abdel-rahman Mohamed, and Geoffrey Hinton. \"**Speech recognition with deep recurrent neural networks**.\" 2013 IEEE国际声学、语音和信号处理会议。IEEE，2013年。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1303.5778.pdf) **（RNN）** :star::star::star:\n\n**[10]** Graves, Alex, and Navdeep Jaitly. \"**Towards End-To-End Speech Recognition with Recurrent Neural Networks**.\" ICML. 第14卷。2014年。[[pdf]](http:\u002F\u002Fwww.jmlr.org\u002Fproceedings\u002Fpapers\u002Fv32\u002Fgraves14.pdf) :star::star::star:\n\n**[11]** Sak, Haşim, et al. \"**Fast and accurate recurrent neural network acoustic models for speech recognition**.\" arXiv预印本 arXiv:1507.06947 (2015). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1507.06947) **（谷歌语音识别系统）** :star::star::star:\n\n**[12]** Amodei, Dario, et al. \"**Deep speech 2: End-to-end speech recognition in english and mandarin**.\" arXiv预印本 arXiv:1512.02595 (2015). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1512.02595.pdf) **（百度语音识别系统）** :star::star::star::star:\n\n**[13]** W. Xiong, J. Droppo, X. Huang, F. Seide, M. Seltzer, A. Stolcke, D. Yu, G. Zweig \"**Achieving Human Parity in Conversational Speech Recognition**.\" arXiv预印本 arXiv:1610.05256 (2016). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1610.05256v1) **（语音识别领域的最前沿，微软）** :star::star::star::star:\n\n>阅读完以上论文后，你将对深度学习的历史、深度学习模型的基本架构（包括CNN、RNN、LSTM）以及深度学习如何应用于图像和语音识别问题有一个基本的了解。接下来的几篇论文将带你深入理解深度学习的方法、深度学习在不同应用领域的实践以及该领域的前沿进展。建议你可以根据自己的兴趣和研究方向选择阅读以下论文。\n\n#2 深度学习方法\n\n## 2.1 模型\n\n**[14]** Hinton, Geoffrey E., 等。\"通过防止特征检测器的协同适应来改进神经网络\"。arXiv 预印本 arXiv:1207.0580 (2012)。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1207.0580.pdf) **(Dropout)** :star::star::star:\n\n**[15]** Srivastava, Nitish, 等。\"Dropout：一种简单的方法来防止神经网络过拟合\"。机器学习研究杂志 15.1 (2014)：1929-1958。[[pdf]](https:\u002F\u002Fwww.cs.toronto.edu\u002F~hinton\u002Fabsps\u002FJMLRdropout.pdf) :star::star::star:\n\n**[16]** Ioffe, Sergey, 和 Christian Szegedy。\"批归一化：通过减少内部协变量偏移加速深度网络训练\"。arXiv 预印本 arXiv:1502.03167 (2015)。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1502.03167) **(2015 年杰出工作)** :star::star::star::star:\n\n**[17]** Ba, Jimmy Lei, Jamie Ryan Kiros, 和 Geoffrey E. Hinton。\"层归一化\"。arXiv 预印本 arXiv:1607.06450 (2016)。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1607.06450.pdf?utm_source=sciontist.com&utm_medium=refer&utm_campaign=promote) **(批归一化的升级版)** :star::star::star::star:\n\n**[18]** Courbariaux, Matthieu, 等。\"二值化神经网络：将权重和激活限制为 +1 或 −1 的神经网络训练\"。[[pdf]](https:\u002F\u002Fpdfs.semanticscholar.org\u002Ff832\u002Fb16cb367802609d91d400085eb87d630212a.pdf) **(新模型，速度快)** :star::star::star:\n\n**[19]** Jaderberg, Max, 等。\"使用合成梯度解耦的神经接口\"。arXiv 预印本 arXiv:1608.05343 (2016)。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1608.05343) **(训练方法的创新，令人惊叹的工作)** :star::star::star::star::star:\n\n**[20]** Chen, Tianqi, Ian Goodfellow, 和 Jonathon Shlens。\"Net2net：通过知识迁移加速学习\"。arXiv 预印本 arXiv:1511.05641 (2015)。[[pdf]](https:\u002F\u002Farxiv.org\u002Fabs\u002F1511.05641) **(修改已训练网络以减少训练轮次)** :star::star::star:\n\n**[21]** Wei, Tao, 等。\"网络形态学\"。arXiv 预印本 arXiv:1603.01670 (2016)。[[pdf]](https:\u002F\u002Farxiv.org\u002Fabs\u002F1603.01670) **(修改已训练网络以减少训练轮次)** :star::star::star:\n\n## 2.2 优化\n\n**[22]** Sutskever, Ilya, 等。\"关于深度学习中初始化和动量的重要性\"。ICML (3) 28 (2013)：1139-1147。[[pdf]](http:\u002F\u002Fwww.jmlr.org\u002Fproceedings\u002Fpapers\u002Fv28\u002Fsutskever13.pdf) **(动量优化器)** :star::star:\n\n**[23]** Kingma, Diederik, 和 Jimmy Ba。\"Adam：一种随机优化方法\"。arXiv 预印本 arXiv:1412.6980 (2014)。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1412.6980) **(目前可能使用最广泛)** :star::star::star:\n\n**[24]** Andrychowicz, Marcin, 等。\"通过梯度下降学习梯度下降\"。arXiv 预印本 arXiv:1606.04474 (2016)。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1606.04474) **(神经优化器，令人惊叹的工作)** :star::star::star::star::star:\n\n**[25]** Han, Song, Huizi Mao, 和 William J. Dally。\"深度压缩：通过剪枝、量化训练和霍夫曼编码压缩深度神经网络\"。CoRR, abs\u002F1510.00149 2 (2015)。[[pdf]](https:\u002F\u002Fpdfs.semanticscholar.org\u002F5b6c\u002F9dda1d88095fa4aac1507348e498a1f2e863.pdf) **(ICLR 最佳论文，使神经网络运行更快的新方向，DeePhi 科技初创公司)** :star::star::star::star::star:\n\n**[26]** Iandola, Forrest N., 等。\"SqueezeNet：参数减少 50 倍、模型大小小于 1MB 却达到 AlexNet 级别准确率\"。arXiv 预印本 arXiv:1602.07360 (2016)。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1602.07360) **(也是优化神经网络的新方向，DeePhi 科技初创公司)** :star::star::star::star:\n\n**[27]** Glorat Xavier, Bengio Yoshua, 等。\"理解训练深层前馈神经网络的困难\"。第十三次国际人工智能与统计会议论文集，PMLR 9：249-256，2010。[[pdf]](http:\u002F\u002Fproceedings.mlr.press\u002Fv9\u002Fglorot10a\u002Fglorot10a.pdf) :star::star::star::star:\n\n## 2.3 无监督学习 \u002F 深度生成模型\n\n**[28]** Le, Quoc V。\"利用大规模无监督学习构建高层特征\"。2013 年 IEEE 国际声学、语音和信号处理会议。IEEE，2013。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1112.6209.pdf&embed) **(里程碑，Andrew Ng，Google Brain 项目，猫)** :star::star::star::star:\n\n\n**[29]** Kingma, Diederik P., 和 Max Welling。\"变分自编码器\"。arXiv 预印本 arXiv:1312.6114 (2013)。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1312.6114) **(VAE)** :star::star::star::star:\n\n**[30]** Goodfellow, Ian, 等。\"生成对抗网络\"。神经信息处理系统进展。2014。[[pdf]](http:\u002F\u002Fpapers.nips.cc\u002Fpaper\u002F5423-generative-adversarial-nets.pdf) **(GAN，超酷的想法)** :star::star::star::star::star:\n\n**[31]** Radford, Alec, Luke Metz, 和 Soumith Chintala。\"使用深度卷积生成对抗网络进行无监督表征学习\"。arXiv 预印本 arXiv:1511.06434 (2015)。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.06434) **(DCGAN)** :star::star::star::star:\n\n**[32]** Gregor, Karol, 等。\"DRAW：用于图像生成的循环神经网络\"。arXiv 预印本 arXiv:1502.04623 (2015)。[[pdf]](http:\u002F\u002Fjmlr.org\u002Fproceedings\u002Fpapers\u002Fv37\u002Fgregor15.pdf) **(带有注意力机制的 VAE，杰出工作)** :star::star::star::star::star:\n\n**[33]** Oord, Aaron van den, Nal Kalchbrenner, 和 Koray Kavukcuoglu。\"像素递归神经网络\"。arXiv 预印本 arXiv:1601.06759 (2016)。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1601.06759) **(PixelRNN)** :star::star::star::star:\n\n**[34]** Oord, Aaron van den, 等。\"使用 PixelCNN 解码器进行条件图像生成\"。arXiv 预印本 arXiv:1606.05328 (2016)。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1606.05328) **(PixelCNN)** :star::star::star::star:\n\n**[34]** S. Mehri 等，\"SampleRNN：一种无条件端到端神经音频生成模型\"。arXiv 预印本 arXiv:1612.07837 (2016)。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1612.07837.pdf) :star::star::star::star::star:\n\n## 2.4 RNN \u002F 序列到序列模型\n\n**[35]** 格雷夫斯，亚历克斯。“**利用循环神经网络生成序列**”。arXiv预印本 arXiv:1308.0850（2013）。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1308.0850) **(LSTM，生成效果非常好，展现了RNN的强大能力)** :star::star::star::star:\n\n**[36]** 曹庆贤等。“**使用RNN编码器-解码器学习短语表示以进行统计机器翻译**”。arXiv预印本 arXiv:1406.1078（2014）。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1406.1078) **(第一篇序列到序列论文)** :star::star::star::star:\n\n**[37]** 苏茨克维尔，伊利亚、奥里奥尔·维尼亚尔斯和阮国。 “**基于神经网络的序列到序列学习**”。神经信息处理系统进展。2014年。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1409.3215.pdf) **(杰出工作)** :star::star::star::star::star:\n\n**[38]** 巴达诺、曹庆贤和约书亚·本吉奥。“**通过联合学习对齐与翻译实现神经机器翻译**”。arXiv预印本 arXiv:1409.0473（2014）。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1409.0473v7.pdf) :star::star::star::star:\n\n**[39]** 维尼亚尔斯和阮国。“**神经对话模型**”。arXiv预印本 arXiv:1506.05869（2015）。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.05869.pdf%20(http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.05869.pdf)) **(用于聊天机器人的序列到序列模型)** :star::star::star:\n\n## 2.5 神经图灵机\n\n**[40]** 格雷夫斯，亚历克斯、格雷格·韦恩和伊沃·丹尼赫尔卡。“**神经图灵机**”。arXiv预印本 arXiv:1410.5401（2014）。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1410.5401.pdf) **(未来计算机的基本原型)** :star::star::star::star::star:\n\n**[41]** 扎伦巴，沃伊切赫和苏茨克维尔，伊利亚。“**强化学习神经图灵机**”。arXiv预印本 arXiv:1505.00521 362（2015）。[[pdf]](https:\u002F\u002Fpdfs.semanticscholar.org\u002Ff10e\u002F071292d593fef939e6ef4a59baf0bb3a6c2b.pdf) :star::star::star:\n\n**[42]** 韦斯顿，杰森、苏米特·乔普拉和安托万·博德斯。“**记忆网络**”。arXiv预印本 arXiv:1410.3916（2014）。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1410.3916) :star::star::star:\n\n\n**[43]** 苏赫巴塔尔，赛恩巴亚尔、杰森·韦斯顿和罗布·弗格斯。“**端到端记忆网络**”。神经信息处理系统进展。2015年。[[pdf]](http:\u002F\u002Fpapers.nips.cc\u002Fpaper\u002F5846-end-to-end-memory-networks.pdf) :star::star::star::star:\n\n**[44]** 维尼亚尔斯，奥里奥尔、梅雷·福图纳托和纳夫迪普·贾特利。“**指针网络**”。神经信息处理系统进展。2015年。[[pdf]](http:\u002F\u002Fpapers.nips.cc\u002Fpaper\u002F5866-pointer-networks.pdf) :star::star::star::star:\n\n**[45]** 格雷夫斯，亚历克斯等。“**使用具有动态外部内存的神经网络进行混合计算**”。《自然》杂志（2016）。[[pdf]](https:\u002F\u002Fwww.dropbox.com\u002Fs\u002F0a40xi702grx3dq\u002F2016-graves.pdf) **(里程碑，结合了上述论文的思想)** :star::star::star::star::star:\n\n## 2.6 深度强化学习\n\n**[46]** 米尼，沃洛迪米尔等。“**使用深度强化学习玩雅达利游戏**”。arXiv预印本 arXiv:1312.5602（2013）。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1312.5602.pdf)) **(第一篇名为深度强化学习的论文)** :star::star::star::star:\n\n**[47]** 米尼，沃洛迪米尔等。“**通过深度强化学习达到人类水平的控制**”。《自然》杂志518卷7540期（2015）：529–533页。[[pdf]](https:\u002F\u002Fstorage.googleapis.com\u002Fdeepmind-data\u002Fassets\u002Fpapers\u002FDeepMindNature14236Paper.pdf) **(里程碑)** :star::star::star::star::star:\n\n**[48]** 王子宇、南多·德·弗雷塔斯和马克·兰克托。“**用于深度强化学习的决斗网络架构**”。arXiv预印本 arXiv:1511.06581（2015）。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.06581) **(ICLR最佳论文，极具创意)** :star::star::star::star:\n\n**[49]** 米尼，沃洛迪米尔等。“**深度强化学习的异步方法**”。arXiv预印本 arXiv:1602.01783（2016）。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1602.01783) **(最先进的方法)** :star::star::star::star::star:\n\n**[50]** 利利克拉普，蒂莫西·P.等。“**深度强化学习中的连续控制**”。arXiv预印本 arXiv:1509.02971（2015）。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1509.02971) **(DDPG)** :star::star::star::star:\n\n**[51]** 古世翔等。“**基于模型加速的连续深度Q学习**”。arXiv预印本 arXiv:1603.00748（2016）。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1603.00748) **(NAF)** :star::star::star::star:\n\n**[52]** 舒尔曼，约翰等。“**信任域策略优化**”。CoRR, abs\u002F1502.05477（2015）。[[pdf]](http:\u002F\u002Fwww.jmlr.org\u002Fproceedings\u002Fpapers\u002Fv37\u002Fschulman15.pdf) **(TRPO)** :star::star::star::star:\n\n**[53]** 银行，戴维等。“**利用深度神经网络和树搜索掌握围棋**”。《自然》杂志529卷7587期（2016）：484–489页。[[pdf]](http:\u002F\u002Fwillamette.edu\u002F~levenick\u002Fcs448\u002FgoNature.pdf) **(AlphaGo)** :star::star::star::star::star:\n\n## 2.7 深度迁移学习 \u002F 终身学习 \u002F 尤其是针对RL\n\n**[54]** 本吉奥，约书亚。“**无监督与迁移学习的深度学习表示**”。ICML无监督与迁移学习会议第27次会议（2012）：17–36页。[[pdf]](http:\u002F\u002Fwww.jmlr.org\u002Fproceedings\u002Fpapers\u002Fv27\u002Fbengio12a\u002Fbengio12a.pdf) **(教程)** :star::star::star:\n\n**[55]** 银行，丹尼尔·L.、杨强和李良浩。“**终身机器学习系统：超越学习算法**”。AAAI春季研讨会：终身机器学习。2013年。[[pdf]](http:\u002F\u002Fciteseerx.ist.psu.edu\u002Fviewdoc\u002Fdownload?doi=10.1.1.696.7800&rep=rep1&type=pdf) **(关于终身学习的简要讨论)** :star::star::star:\n\n**[56]** 辛顿，杰弗里、奥里奥尔·维尼亚尔斯和杰夫·迪恩。“**蒸馏神经网络中的知识**”。arXiv预印本 arXiv:1503.02531（2015）。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1503.02531) **(奠基之作)** :star::star::star::star:\n\n**[57]** 鲁苏，安德烈·A.等。“**策略蒸馏**”。arXiv预印本 arXiv:1511.06295（2015）。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.06295) **(强化学习领域)** :star::star::star:\n\n**[58]** 帕里索托，埃米利奥、吉米·雷·巴和鲁斯兰·萨拉胡丁诺夫。“**演员模仿：深度多任务与迁移强化学习**”。arXiv预印本 arXiv:1511.06342（2015）。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.06342) **(强化学习领域)** :star::star::star:\n\n**[59]** 鲁苏，安德烈·A.等。“**渐进式神经网络**”。arXiv预印本 arXiv:1606.04671（2016）。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1606.04671) **(杰出工作，新颖的想法)** :star::star::star::star::star:\n\n## 2.8 一次-shot深度学习\n\n**[60]** Lake, Brenden M., Ruslan Salakhutdinov, and Joshua B. Tenenbaum. \"**通过概率程序归纳实现人类水平的概念学习**.\" Science 350.6266 (2015): 1332-1338. [[pdf]](http:\u002F\u002Fclm.utexas.edu\u002Fcompjclub\u002Fwp-content\u002Fuploads\u002F2016\u002F02\u002Flake2015.pdf) **（无深度学习，但值得一读）** :star::star::star::star::star:\n\n**[61]** Koch, Gregory, Richard Zemel, and Ruslan Salakhutdinov. \"**用于一次-shot图像识别的暹罗神经网络**.\"（2015） [[pdf]](http:\u002F\u002Fwww.cs.utoronto.ca\u002F~gkoch\u002Ffiles\u002Fmsc-thesis.pdf) :star::star::star:\n\n**[62]** Santoro, Adam, et al. \"**基于记忆增强神经网络的一次-shot学习**.\" arXiv预印本 arXiv:1605.06065 (2016). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1605.06065) **（一次-shot学习的基础步骤）** :star::star::star::star:\n\n**[63]** Vinyals, Oriol, et al. \"**用于一次-shot学习的匹配网络**.\" arXiv预印本 arXiv:1606.04080 (2016). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1606.04080) :star::star::star:\n\n**[64]** Hariharan, Bharath, and Ross Girshick. \"**低样本量下的视觉目标识别**.\" arXiv预印本 arXiv:1606.02819 (2016). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1606.02819) **（向大规模数据迈进的一步）** :star::star::star::star:\n\n\n# 3 应用\n\n## 3.1 自然语言处理\n\n**[1]** Antoine Bordes, et al. \"**开放文本语义解析中词与语义表示的联合学习**.\" AISTATS(2012) [[pdf]](https:\u002F\u002Fwww.hds.utc.fr\u002F~bordesan\u002Fdokuwiki\u002Flib\u002Fexe\u002Ffetch.php?id=en%3Apubli&cache=cache&media=en:bordes12aistats.pdf) :star::star::star::star:\n\n**[2]** Mikolov, et al. \"**词与短语的分布式表示及其组合性**.\" ANIPS(2013): 3111-3119 [[pdf]](http:\u002F\u002Fpapers.nips.cc\u002Fpaper\u002F5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf) **（word2vec）** :star::star::star:\n\n**[3]** Sutskever, et al. \"**“序列到序列学习与神经网络**.\" ANIPS(2014) [[pdf]](http:\u002F\u002Fpapers.nips.cc\u002Fpaper\u002F5346-sequence-to-sequence-learning-with-neural-networks.pdf) :star::star::star:\n\n**[4]** Ankit Kumar, et al. \"**“问我任何问题：用于自然语言处理的动态记忆网络**.\" arXiv预印本 arXiv:1506.07285(2015) [[pdf]](https:\u002F\u002Farxiv.org\u002Fabs\u002F1506.07285) :star::star::star::star:\n\n**[5]** Yoon Kim, et al. \"**字符感知型神经语言模型**.\" NIPS(2015) arXiv预印本 arXiv:1508.06615(2015) [[pdf]](https:\u002F\u002Farxiv.org\u002Fabs\u002F1508.06615) :star::star::star::star:\n\n**[6]** Jason Weston, et al. \"**迈向人工智能完备问答：一组先决玩具任务**.\" arXiv预印本 arXiv:1502.05698(2015) [[pdf]](https:\u002F\u002Farxiv.org\u002Fabs\u002F1502.05698) **（bAbI任务）** :star::star::star:\n\n**[7]** Karl Moritz Hermann, et al. \"**教机器阅读和理解**.\" arXiv预印本 arXiv:1506.03340(2015) [[pdf]](https:\u002F\u002Farxiv.org\u002Fabs\u002F1506.03340) **（CNN\u002F每日邮报完形填空式问题）** :star::star:\n\n**[8]** Alexis Conneau, et al. \"**用于自然语言处理的超深卷积网络**.\" arXiv预印本 arXiv:1606.01781(2016) [[pdf]](https:\u002F\u002Farxiv.org\u002Fabs\u002F1606.01781) **（文本分类领域的最新技术）** :star::star::star:\n\n**[9]** Armand Joulin, et al. \"**高效文本分类的技巧集**.\" arXiv预印本 arXiv:1607.01759(2016) [[pdf]](https:\u002F\u002Farxiv.org\u002Fabs\u002F1607.01759) **（略逊于最先进技术，但速度更快）** :star::star::star:\n\n## 3.2 目标检测\n\n**[1]** Szegedy, Christian, Alexander Toshev, and Dumitru Erhan. \"**用于目标检测的深度神经网络**.\" 神经信息处理系统进展. 2013. [[pdf]](http:\u002F\u002Fpapers.nips.cc\u002Fpaper\u002F5207-deep-neural-networks-for-object-detection.pdf) :star::star::star:\n\n**[2]** Girshick, Ross, et al. \"**用于精确目标检测和语义分割的丰富特征层次结构**.\" IEEE计算机视觉与模式识别会议论文集. 2014. [[pdf]](http:\u002F\u002Fwww.cv-foundation.org\u002Fopenaccess\u002Fcontent_cvpr_2014\u002Fpapers\u002FGirshick_Rich_Feature_Hierarchies_2014_CVPR_paper.pdf) **（RCNN）** :star::star::star::star::star:\n\n**[3]** He, Kaiming, et al. \"**用于视觉识别的深度卷积网络中的空间金字塔池化**.\" 欧洲计算机视觉大会. Springer International Publishing, 2014. [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1406.4729) **（SPPNet）** :star::star::star::star:\n\n**[4]** Girshick, Ross. \"**Fast R-CNN**.\" IEEE国际计算机视觉会议论文集. 2015. [[pdf]](https:\u002F\u002Fpdfs.semanticscholar.org\u002F8f67\u002F64a59f0d17081f2a2a9d06f4ed1cdea1a0ad.pdf) :star::star::star::star:\n\n**[5]** Ren, Shaoqing, et al. \"**Faster R-CNN：通过区域建议网络实现实时目标检测**.\" 神经信息处理系统进展. 2015. [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.01497.pdf) :star::star::star::star:\n\n**[6]** Redmon, Joseph, et al. \"**你只需看一眼：统一、实时的目标检测**.\" arXiv预印本 arXiv:1506.02640 (2015). [[pdf]](http:\u002F\u002Fhomes.cs.washington.edu\u002F~ali\u002Fpapers\u002FYOLO.pdf) **（YOLO，杰出工作，非常实用）** :star::star::star::star::star:\n\n**[7]** Liu, Wei, et al. \"**SSD：单次多框检测器**.\" arXiv预印本 arXiv:1512.02325 (2015). [[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1512.02325) :star::star::star:\n\n**[8]** Dai, Jifeng, et al. \"**R-FCN：基于区域的全卷积网络进行目标检测**.\" arXiv预印本 arXiv:1605.06409 (2016). [[pdf]](https:\u002F\u002Farxiv.org\u002Fabs\u002F1605.06409) :star::star::star::star:\n\n**[9]** He, Gkioxari, et al. \"**Mask R-CNN**\" arXiv预印本 arXiv:1703.06870 (2017). [[pdf]](https:\u002F\u002Farxiv.org\u002Fabs\u002F1703.06870) :star::star::star::star:\n\n**[10]** Bochkovskiy, Alexey, et al. \"**YOLOv4：目标检测的最佳速度与精度。**\" arXiv预印本 arXiv:2004.10934 (2020). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2004.10934) :star::star::star::star:\n\n\n**[11]** Tan, Mingxing, et al. “**EfficientDet：可扩展且高效的目标检测。**\" arXiv预印本 arXiv:1911.09070 (2019). [[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1911.09070) :star::star::star::star::star:\n\n## 3.3 视觉跟踪\n\n**[1]** Wang, Naiyan, 和 Dit-Yan Yeung。\"**学习用于视觉跟踪的深度紧凑图像表示**\"。神经信息处理系统进展。2013年。[[pdf]](http:\u002F\u002Fpapers.nips.cc\u002Fpaper\u002F5192-learning-a-deep-compact-image-representation-for-visual-tracking.pdf) **（首个使用深度学习进行视觉跟踪的论文，DLT跟踪器）** :star::star::star:\n\n**[2]** Wang, Naiyan 等。\"**迁移丰富的特征层次结构以实现鲁棒的视觉跟踪**\"。arXiv 预印本 arXiv:1501.04587 (2015)。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1501.04587) **（SO-DLT）** :star::star::star::star:\n\n**[3]** Wang, Lijun 等。\"**基于全卷积网络的视觉跟踪**\"。IEEE 国际计算机视觉会议论文集。2015年。[[pdf]](http:\u002F\u002Fwww.cv-foundation.org\u002Fopenaccess\u002Fcontent_iccv_2015\u002Fpapers\u002FWang_Visual_Tracking_With_ICCV_2015_paper.pdf) **（FCNT）** :star::star::star::star:\n\n**[4]** Held, David, Sebastian Thrun 和 Silvio Savarese。\"**利用深度回归网络以每秒100帧的速度学习跟踪**\"。arXiv 预印本 arXiv:1604.01802 (2016)。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1604.01802) **（GOTURN，作为深度学习方法速度非常快，但仍远落后于非深度学习方法）** :star::star::star::star:\n\n**[5]** Bertinetto, Luca 等。\"**用于目标跟踪的全卷积暹罗网络**\"。arXiv 预印本 arXiv:1606.09549 (2016)。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1606.09549) **（SiameseFC，实时目标跟踪的新 state-of-the-art）** :star::star::star::star:\n\n**[6]** Martin Danelljan、Andreas Robinson、Fahad Khan 和 Michael Felsberg。\"**超越相关滤波器：学习用于视觉跟踪的连续卷积算子**\"。ECCV（2016）。[[pdf]](http:\u002F\u002Fwww.cvl.isy.liu.se\u002Fresearch\u002Fobjrec\u002Fvisualtracking\u002Fconttrack\u002FC-COT_ECCV16.pdf) **（C-COT）** :star::star::star::star:\n\n**[7]** Nam, Hyeonseob、Mooyeol Baek 和 Bohyung Han。\"**在树状结构中建模和传播 CNN 以用于视觉跟踪**\"。arXiv 预印本 arXiv:1608.07242 (2016)。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1608.07242) **（VOT2016 冠军，TCNN）** :star::star::star::star:\n\n## 3.4 图像描述\n**[1]** Farhadi,Ali等。\"**每张图片都讲述一个故事：从图像生成句子**\"。载于计算机视觉ECCV 2010。Springer Berlin Heidelberg：15-29，2010年。[[pdf]](https:\u002F\u002Fwww.cs.cmu.edu\u002F~afarhadi\u002Fpapers\u002Fsentence.pdf) :star::star::star:\n\n**[2]** Kulkarni, Girish 等。\"**婴儿语言：理解和生成图像描述**\"。载于第24届CVPR会议论文集，2011年。[[pdf]](http:\u002F\u002Ftamaraberg.com\u002Fpapers\u002Fgeneration_cvpr11.pdf):star::star::star::star:\n\n**[3]** Vinyals, Oriol 等。\"**展示与讲述：一种神经图像字幕生成器**\"。载于arXiv预印本arXiv:1411.4555，2014年。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1411.4555.pdf):star::star::star:\n\n**[4]** Donahue, Jeff 等。\"**用于视觉识别和描述的长期循环卷积网络**\"。载于arXiv预印本arXiv:1411.4389，2014年。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1411.4389.pdf)\n\n**[5]** Karpathy, Andrej 和 Li Fei-Fei。\"**用于生成图像描述的深度视觉-语义对齐**\"。载于arXiv预印本arXiv:1412.2306，2014年。[[pdf]](https:\u002F\u002Fcs.stanford.edu\u002Fpeople\u002Fkarpathy\u002Fcvpr2015.pdf):star::star::star::star::star:\n\n**[6]** Karpathy, Andrej、Armand Joulin 和 Fei Fei F. Li。\"**用于双向图像-句子映射的深度片段嵌入**\"。载于神经信息处理系统进展，2014年。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1406.5679v1.pdf):star::star::star::star:\n\n**[7]** Fang, Hao 等。\"**从字幕到视觉概念再返回**\"。载于arXiv预印本arXiv:1411.4952，2014年。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1411.4952v3.pdf):star::star::star::star::star:\n\n**[8]** Chen, Xinlei 和 C. Lawrence Zitnick。\"**学习用于图像字幕生成的循环视觉表示**\"。载于arXiv预印本arXiv:1411.5654，2014年。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1411.5654v1.pdf):star::star::star::star:\n\n**[9]** Mao, Junhua 等。\"**使用多模态循环神经网络（m-rnn）进行深度字幕生成**\"。载于arXiv预印本arXiv:1412.6632，2014年。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1412.6632v5.pdf):star::star::star:\n\n**[10]** Xu, Kelvin 等。\"**展示、注意并讲述：带有视觉注意力的神经图像字幕生成**\"。载于arXiv预印本arXiv:1502.03044，2015年。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1502.03044v3.pdf):star::star::star::star::star:\n\n## 3.5 机器翻译\n\n> 一些里程碑式的论文已在 RNN \u002F Seq-to-Seq 主题中列出。\n\n**[1]** Luong, Minh-Thang 等。\"**解决神经机器翻译中的罕见词问题**\"。arXiv 预印本 arXiv:1410.8206 (2014)。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1410.8206) :star::star::star::star:\n\n\n**[2]** Sennrich 等。\"**使用子词单元进行罕见词的神经机器翻译**\"。载于arXiv预印本arXiv:1508.07909，2015年。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1508.07909.pdf):star::star::star:\n\n**[3]** Luong, Minh-Thang、Hieu Pham 和 Christopher D. Manning。\"**基于注意力的神经机器翻译的有效方法**\"。arXiv 预印本 arXiv:1508.04025 (2015)。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1508.04025) :star::star::star::star:\n\n**[4]** Chung 等。\"**用于神经机器翻译的无显式分词字符级解码器**\"。载于arXiv预印本arXiv:1603.06147，2016年。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1603.06147.pdf):star::star:\n\n**[5]** Lee 等。\"**完全基于字符级的无显式分词神经机器翻译**\"。载于arXiv预印本arXiv:1610.03017，2016年。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1610.03017.pdf):star::star::star::star::star:\n\n**[6]** Wu、Schuster、Chen、Le 等。\"**谷歌的神经机器翻译系统：弥合人类与机器翻译之间的差距**\"。载于arXiv预印本arXiv:1609.08144v2，2016年。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1609.08144v2.pdf) **（里程碑）** :star::star::star::star:\n\n## 3.6 机器人学\n\n**[1]** Koutník, Jan, 等。\"基于视觉的强化学习中大规模神经网络的演化\"。第15届遗传与进化计算年会论文集。ACM，2013年。[[pdf]](http:\u002F\u002Frepository.supsi.ch\u002F4550\u002F1\u002Fkoutnik2013gecco.pdf) :star::star::star:\n\n**[2]** Levine, Sergey, 等。\"深度视觉-运动策略的端到端训练\"。机器学习研究杂志 17.39 (2016): 1-40。[[pdf]](http:\u002F\u002Fwww.jmlr.org\u002Fpapers\u002Fvolume17\u002F15-522\u002F15-522.pdf) :star::star::star::star::star:\n\n**[3]** Pinto, Lerrel, 和 Abhinav Gupta。\"自我监督的超大规模扩展：从5万次尝试和700小时机器人数据中学习抓取\"。arXiv预印本 arXiv:1509.06825 (2015)。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1509.06825) :star::star::star:\n\n**[4]** Levine, Sergey, 等。\"利用深度学习和大规模数据收集学习机器人抓取中的手眼协调\"。arXiv预印本 arXiv:1603.02199 (2016)。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1603.02199) :star::star::star::star:\n\n**[5]** Zhu, Yuke, 等。\"基于深度强化学习的室内场景目标驱动视觉导航\"。arXiv预印本 arXiv:1609.05143 (2016)。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1609.05143) :star::star::star::star:\n\n**[6]** Yahya, Ali, 等。\"分布式异步引导策略搜索下的集体机器人强化学习\"。arXiv预印本 arXiv:1610.00673 (2016)。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1610.00673) :star::star::star::star:\n\n**[7]** Gu, Shixiang, 等。\"用于机器人操作的深度强化学习\"。arXiv预印本 arXiv:1610.00633 (2016)。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1610.00633) :star::star::star::star:\n\n**[8]** A Rusu, M Vecerik, Thomas Rothörl, N Heess, R Pascanu, R Hadsell。\"通过渐进式网络实现从像素到现实的机器人学习\"。arXiv预印本 arXiv:1610.04286 (2016)。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1610.04286.pdf) :star::star::star::star:\n\n**[9]** Mirowski, Piotr, 等。\"学习在复杂环境中导航\"。arXiv预印本 arXiv:1611.03673 (2016)。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1611.03673) :star::star::star::star:\n\n## 3.7 艺术\n\n**[1]** Mordvintsev, Alexander；Olah, Christopher；Tyka, Mike（2015）。\"Inceptionism：深入神经网络\"。谷歌研究。[[html]](https:\u002F\u002Fresearch.googleblog.com\u002F2015\u002F06\u002Finceptionism-going-deeper-into-neural.html) **（Deep Dream）**\n:star::star::star::star:\n\n**[2]** Gatys, Leon A., Alexander S. Ecker, 和 Matthias Bethge。\"一种艺术风格的神经算法\"。arXiv预印本 arXiv:1508.06576 (2015)。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1508.06576) **（杰出工作，目前最成功的方法）** :star::star::star::star::star:\n\n**[3]** Zhu, Jun-Yan, 等。\"自然图像流形上的生成式视觉操控\"。欧洲计算机视觉会议。施普林格国际出版，2016年。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1609.03552) **（iGAN）** :star::star::star::star:\n\n**[4]** Champandard, Alex J。\"语义风格迁移及将两比特涂鸦转化为精美艺术品\"。arXiv预印本 arXiv:1603.01768 (2016)。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1603.01768) **（Neural Doodle）** :star::star::star::star:\n\n**[5]** Zhang, Richard，Phillip Isola，和 Alexei A. Efros。\"彩色图像着色\"。arXiv预印本 arXiv:1603.08511 (2016)。[[pdf]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1603.08511) :star::star::star::star:\n\n**[6]** Johnson, Justin，Alexandre Alahi，和 Li Fei-Fei。\"用于实时风格迁移和超分辨率的感知损失\"。arXiv预印本 arXiv:1603.08155 (2016)。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1603.08155.pdf) :star::star::star::star:\n\n**[7]** Vincent Dumoulin，Jonathon Shlens 和 Manjunath Kudlur。\"一种艺术风格的学习表示\"。arXiv预印本 arXiv:1610.07629 (2016)。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1610.07629v1.pdf) :star::star::star::star:\n\n**[8]** Gatys, Leon 和 Ecker 等人。\"控制神经风格迁移中的感知因素\"。arXiv预印本 arXiv:1611.07865 (2016)。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1611.07865.pdf) **（可控制风格迁移的空间位置、色彩信息及空间尺度）** :star::star::star::star:\n\n**[9]** Ulyanov, Dmitry 和 Lebedev, Vadim 等人。\"纹理网络：纹理与风格化图像的前馈合成\"。arXiv预印本 arXiv:1603.03417 (2016)。[[pdf]](http:\u002F\u002Farxiv.org\u002Fabs\u002F1603.03417) **（纹理生成与风格迁移）** :star::star::star::star:\n\n**[10]** Yijun Li，Ming-Yu Liu，Xueting Li，Ming-Hsuan Yang，Jan Kautz（NVIDIA）。\"照片级真实图像风格化的闭式解\"。arXiv预印本 arXiv:1802.06474 (2018)。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1802.06474.pdf) **（非常快速且超逼真的风格迁移）** :star::star::star::star:\n\n## 3.8 目标分割\n\n**[1]** J. Long，E. Shelhamer，和 T. Darrell。\"用于语义分割的全卷积网络\"。CVPR，2015年。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1411.4038v2.pdf) :star::star::star::star::star:\n\n**[2]** L.-C. Chen，G. Papandreou，I. Kokkinos，K. Murphy，和 A. L. Yuille。\"使用深度卷积网络和全连接条件随机场进行语义图像分割\"。ICLR，2015年。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1606.00915v1.pdf) :star::star::star::star::star:\n\n**[3]** Pinheiro, P.O.，Collobert, R.，Dollar, P。\"学习分割目标候选\"。NIPS，2015年。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.06204v2.pdf) :star::star::star::star:\n\n**[4]** Dai, J.，He, K.，Sun, J。\"通过多任务网络级联实现实例感知的语义分割\"。CVPR，2016年。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1512.04412v1.pdf) :star::star::star:\n\n**[5]** Dai, J.，He, K.，Sun, J。\"实例敏感的全卷积网络\"。arXiv预印本 arXiv:1603.08678 (2016)。[[pdf]](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1603.08678v1.pdf) :star::star::star:","# Deep-Learning-Papers-Reading-Roadmap 快速上手指南\n\n本项目并非可执行的软件工具或代码库，而是一份**深度学习论文阅读路线图**。它整理了一份从基础到前沿的经典论文清单，旨在帮助初学者和研究者系统性地掌握深度学习领域。因此，本指南侧重于如何获取、浏览及利用这份资源，而非软件安装。\n\n## 环境准备\n\n由于本项目本质上是文档索引，对环境要求极低：\n\n*   **系统要求**：任何支持现代浏览器的操作系统（Windows, macOS, Linux）。\n*   **前置依赖**：\n    *   **Git**（可选）：用于克隆仓库到本地。\n    *   **PDF 阅读器**：用于阅读链接中的学术论文。\n    *   **学术网络环境**：部分论文源地址（如 arXiv, NIPS, IEEE）在国内访问可能较慢，建议配置好科研网络环境或使用学术镜像。\n\n## 获取与浏览步骤\n\n你可以通过以下两种方式查看路线图：\n\n### 方式一：在线直接浏览（推荐）\n直接访问 GitHub 仓库页面，即可看到渲染好的 Markdown 目录和论文链接。\n\n1.  打开浏览器访问项目主页：\n    `https:\u002F\u002Fgithub.com\u002Ffloodsung\u002FDeep-Learning-Papers-Reading-Roadmap`\n2.  在 `README.md` 中按章节（如历史基础、模型方法、优化算法等）点击论文标题或 `[pdf]` 链接进行阅读。\n\n### 方式二：克隆到本地\n如果你希望离线查看或贡献内容，可以使用 Git 克隆：\n\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Ffloodsung\u002FDeep-Learning-Papers-Reading-Roadmap.git\ncd Deep-Learning-Papers-Reading-Roadmap\n```\n\n*国内加速方案*：如果 `github.com` 访问缓慢，可使用 Gitee 镜像（如有）或通过代理加速克隆：\n```bash\n# 使用国内镜像源示例 (若存在同步镜像)\ngit clone https:\u002F\u002Fgitee.com\u002Fmirrors\u002FDeep-Learning-Papers-Reading-Roadmap.git\n```\n*(注：若无特定镜像，请确保网络通畅后使用官方地址)*\n\n## 基本使用指南\n\n本项目的核心用法是**按图索骥**进行文献阅读。建议遵循以下路径：\n\n### 1. 新手入门路径\n按照 README 中的顺序，从 **第 1 章 (Deep Learning History and Basics)** 开始：\n*   **必读经典**：先阅读 `[0] Deep Learning Book` (花书) 建立理论框架。\n*   **综述概览**：阅读 `[1] Deep learning (Nature 2015)` 了解三大巨头的观点。\n*   **里程碑突破**：重点研读 `[4] AlexNet` 和 `[7] ResNet`，理解 CNN 的演进。\n\n### 2. 进阶研究方向\n具备基础后，根据兴趣跳转至 **第 2 章 (Deep Learning Method)** 的具体细分领域：\n*   **模型优化**：阅读 `[16] Batch Normalization` 和 `[23] Adam`  optimizer。\n*   **生成模型**：深入 `[30] GAN` 和 `[29] VAE` 系列论文。\n*   **序列模型**：研究 `[36] Seq-to-Seq` 和 `[35] LSTM` 相关文献。\n\n### 3. 阅读技巧\n*   **星号标记**：注意文中标记为 `:star::star::star::star::star:` 的论文，这些是作者强烈推荐的必读核心文章。\n*   **对比阅读**：例如在阅读图像识别时，将 `VGGNet` 与 `GoogLeNet` 对比，理解不同架构的设计思路。\n*   **代码复现**：虽然本项目只提供论文链接，但建议在阅读完关键论文（如 ResNet, Transformer 等）后，去 GitHub 搜索对应的开源实现代码进行复现，以加深理解。\n\n---\n*提示：该路线图会持续更新，建议定期 Pull 最新代码或刷新网页以获取最新添加的 SOTA (State-of-the-Art) 论文。*","某高校计算机系研究生李明刚进入深度学习领域，面对海量论文不知从何下手，急需构建系统的知识体系以开展图像识别课题研究。\n\n### 没有 Deep-Learning-Papers-Reading-Roadmap 时\n- **盲目搜索效率低**：在 Google Scholar 或 arXiv 上随机关键词搜索，容易陷入碎片化信息，花费数周仍找不到核心奠基之作。\n- **学习路径混乱**：不清楚应该先读经典的 DBN 还是直接看最新的 Transformer，缺乏“从轮廓到细节、从旧到新”的科学指引，导致基础不牢。\n- **错过关键里程碑**：极易忽略如 Hinton 团队的早期突破或 AlexNet 等转折点论文，难以理解技术演进的内在逻辑。\n- **时间成本高昂**：为了筛选高质量文献，不得不阅读大量低相关度的综述，严重拖慢科研进度和开题报告的准备。\n\n### 使用 Deep-Learning-Papers-Reading-Roadmap 后\n- **按图索骥高效入门**：直接跟随路线图从《Deep Learning》圣经书和三位巨头的综述开始，迅速建立对领域的宏观认知。\n- **循序渐进掌握脉络**：严格遵循从深度信念网络（DBN）到 ImageNet 爆发（AlexNet, ResNet）再到语音识别演进的路径，清晰把握技术迭代规律。\n- **精准锁定核心文献**：路线图明确标注了带星号的关键论文（如 ResNet、GoogLeNet），确保每一篇阅读的材料都是该阶段的精华。\n- **快速形成研究方案**：在短时间内梳理完历史与前沿，将原本需要一个月的文献调研压缩至一周，迅速聚焦到具体的改进算法设计上。\n\nDeep-Learning-Papers-Reading-Roadmap 通过提供一条经过验证的标准化学习路径，将新手从茫茫文海中解救出来，实现了从盲目摸索到系统精进的根本转变。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffloodsung_Deep-Learning-Papers-Reading-Roadmap_f40ccc0c.png","floodsung","Flood Sung","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Ffloodsung_59fb4692.jpg","Focus on AGI, Deep Reinforcement Learning, Meta Learning, Robot Learning, Physics-based Animation","moonshot.ai",null,"floodsung@gmail.com","floodsung.github.io","https:\u002F\u002Fgithub.com\u002Ffloodsung",[82],{"name":83,"color":84,"percentage":85},"Python","#3572A5",100,39480,7312,"2026-04-05T23:55:23",1,"","未说明",{"notes":93,"python":91,"dependencies":94},"该项目并非可运行的软件工具，而是一份深度学习论文的阅读路线图（书单）。它列出了从基础到前沿的经典学术论文及其链接，旨在指导初学者和研究者按顺序阅读。因此，该项目没有操作系统、GPU、内存、Python 版本或依赖库等运行环境需求。用户只需具备阅读 PDF 文档的能力即可。",[],[14],[97],"deep-learning","2026-03-27T02:49:30.150509","2026-04-07T04:00:18.157817",[101,106,111,116],{"id":102,"question_zh":103,"answer_zh":104,"source_url":105},20945,"为什么阅读路线图中包含 Word2Vec？它不算深度学习模型吗？","虽然 Word2Vec 本身只是一个浅层（单隐藏层）线性神经网络，不属于典型的深度模型，但它生成的词向量是许多深度学习模型（尤其是自然语言处理领域）的基础构建块。此外，理解 Word2Vec 及其变体（如 GloVe、fastText）的原理和优势，不仅具有教学意义，还能帮助用户更好地选择预训练词嵌入。因此，作为深度学习的学习路线图而非单纯的论文集合，包含 Word2Vec 是合理的。","https:\u002F\u002Fgithub.com\u002Ffloodsung\u002FDeep-Learning-Papers-Reading-Roadmap\u002Fissues\u002F63",{"id":107,"question_zh":108,"answer_zh":109,"source_url":110},20946,"在 Windows 上运行 download.py 脚本报错或无法工作怎么办？","Windows 系统默认使用 GBK 编码而非 UTF-8，导致读取文件时出错。需要修改脚本第 87 行代码：\n原代码：with open('README.md') as readme:\n修正后：with open('README.md', 'r', encoding='utf-8') as readme:\n添加 encoding='utf-8' 参数即可解决该问题。","https:\u002F\u002Fgithub.com\u002Ffloodsung\u002FDeep-Learning-Papers-Reading-Roadmap\u002Fissues\u002F113",{"id":112,"question_zh":113,"answer_zh":114,"source_url":115},20947,"《Deep Learning》书籍的链接失效了，新地址是什么？","该书的新官方地址已更新为：http:\u002F\u002Fwww.deeplearningbook.org\u002F，请访问此链接获取最新内容。","https:\u002F\u002Fgithub.com\u002Ffloodsung\u002FDeep-Learning-Papers-Reading-Roadmap\u002Fissues\u002F6",{"id":117,"question_zh":118,"answer_zh":119,"source_url":120},20948,"路线图是否计划包含音频处理相关的论文（如 WaveNet）？","是的，维护者确认会将音频主题的相关论文列入路线图。例如 DeepMind 的 WaveNet 论文将被添加到\"Applications\u002FAudio\"（应用\u002F音频）部分，敬请期待更新。","https:\u002F\u002Fgithub.com\u002Ffloodsung\u002FDeep-Learning-Papers-Reading-Roadmap\u002Fissues\u002F19",[]]