[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-kjw0612--awesome-rnn":3,"tool-kjw0612--awesome-rnn":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",155373,2,"2026-04-14T11:34:08",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",108322,"2026-04-10T11:39:34",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[52,13,15,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":10,"last_commit_at":59,"category_tags":60,"status":17},4487,"LLMs-from-scratch","rasbt\u002FLLMs-from-scratch","LLMs-from-scratch 是一个基于 PyTorch 的开源教育项目，旨在引导用户从零开始一步步构建一个类似 ChatGPT 的大型语言模型（LLM）。它不仅是同名技术著作的官方代码库，更提供了一套完整的实践方案，涵盖模型开发、预训练及微调的全过程。\n\n该项目主要解决了大模型领域“黑盒化”的学习痛点。许多开发者虽能调用现成模型，却难以深入理解其内部架构与训练机制。通过亲手编写每一行核心代码，用户能够透彻掌握 Transformer 架构、注意力机制等关键原理，从而真正理解大模型是如何“思考”的。此外，项目还包含了加载大型预训练权重进行微调的代码，帮助用户将理论知识延伸至实际应用。\n\nLLMs-from-scratch 特别适合希望深入底层原理的 AI 开发者、研究人员以及计算机专业的学生。对于不满足于仅使用 API，而是渴望探究模型构建细节的技术人员而言，这是极佳的学习资源。其独特的技术亮点在于“循序渐进”的教学设计：将复杂的系统工程拆解为清晰的步骤，配合详细的图表与示例，让构建一个虽小但功能完备的大模型变得触手可及。无论你是想夯实理论基础，还是为未来研发更大规模的模型做准备",90106,"2026-04-06T11:19:32",[35,15,13,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":72,"owner_name":73,"owner_avatar_url":74,"owner_bio":75,"owner_company":76,"owner_location":77,"owner_email":77,"owner_twitter":77,"owner_website":77,"owner_url":78,"languages":77,"stars":79,"forks":80,"last_commit_at":81,"license":77,"difficulty_score":82,"env_os":83,"env_gpu":84,"env_ram":84,"env_deps":85,"category_tags":97,"github_topics":77,"view_count":32,"oss_zip_url":77,"oss_zip_packed_at":77,"status":17,"created_at":101,"updated_at":102,"faqs":103,"releases":104},7543,"kjw0612\u002Fawesome-rnn","awesome-rnn","Recurrent Neural Network - A curated list of resources dedicated to RNN","awesome-rnn 是一个专为循环神经网络（RNN）打造的精选资源清单，旨在帮助开发者和研究人员系统性地探索这一深度学习核心领域。面对 RNN 相关教程、代码库和论文分散且难以筛选的痛点，它通过人工 curated 的方式，将零散的知识整合为结构清晰的导航指南。\n\n这份清单涵盖了从基础理论到前沿应用的全方位内容：包括 TensorFlow、Theano 等主流框架的代码实现与教程，LSTM 等架构变体的原理解析，以及在自然语言处理（如机器翻译、对话系统）、计算机视觉（如视频分析）和多模态任务中的具体应用案例。此外，它还收录了相关的学术书籍、综述文章、公开数据集及在线演示，为用户提供了从入门学习到深入研究的完整路径。\n\nawesome-rnn 特别适合人工智能领域的开发者、算法工程师及学术研究人员使用。无论是想要快速上手 RNN 项目的新手，还是寻求最新架构灵感或对比不同实现方案的资深专家，都能从中高效获取高质量资源。虽然该项目目前不再活跃维护，但其沉淀的经典资料依然具有极高的参考价值，是理解序列建模技术不可或缺的知识库。","# Awesome Recurrent Neural Networks\n\nA curated list of resources dedicated to recurrent neural networks (closely related to *deep learning*).\n\nMaintainers - [Myungsub Choi](https:\u002F\u002Fgithub.com\u002Fmyungsub), [Taeksoo Kim](https:\u002F\u002Fgithub.com\u002Fjazzsaxmafia), [Jiwon Kim](https:\u002F\u002Fgithub.com\u002Fkjw0612)\n\nWe have pages for other topics: [awesome-deep-vision](https:\u002F\u002Fgithub.com\u002Fkjw0612\u002Fawesome-deep-vision), [awesome-random-forest](https:\u002F\u002Fgithub.com\u002Fkjw0612\u002Fawesome-random-forest)\n\n## Contributing\nPlease feel free to [pull requests](https:\u002F\u002Fgithub.com\u002Fkjw0612\u002Fawesome-rnn\u002Fpulls), email Myungsub Choi (cms6539@gmail.com) or join our chats to add links.\n\nThe project is not actively maintained.\n\n[![Join the chat at https:\u002F\u002Fgitter.im\u002Fkjw0612\u002Fawesome-rnn](https:\u002F\u002Fbadges.gitter.im\u002FJoin%20Chat.svg)](https:\u002F\u002Fgitter.im\u002Fkjw0612\u002Fawesome-rnn?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)\n\n## Sharing\n+ [Share on Twitter](http:\u002F\u002Ftwitter.com\u002Fhome?status=http:\u002F\u002Fjiwonkim.org\u002Fawesome-rnn%0AResources%20for%20Recurrent%20Neural%20Networks)\n+ [Share on Facebook](http:\u002F\u002Fwww.facebook.com\u002Fsharer\u002Fsharer.php?u=https:\u002F\u002Fjiwonkim.org\u002Fawesome-rnn)\n+ [Share on Google Plus](http:\u002F\u002Fplus.google.com\u002Fshare?url=https:\u002F\u002Fjiwonkim.org\u002Fawesome-rnn)\n+ [Share on LinkedIn](http:\u002F\u002Fwww.linkedin.com\u002FshareArticle?mini=true&url=https:\u002F\u002Fjiwonkim.org\u002Fawesome-rnn&title=Awesome%20Recurrent%20Neural&Networks&summary=&source=)\n\n## Table of Contents\n\n- [Codes](#codes)\n- [Theory](#theory)\n  - [Lectures](#lectures)\n  - [Books \u002F Thesis](#books--thesis)\n  - [Architecture Variants](#architecture-variants)\n    - [Structure](#structure)\n    - [Memory](#memory)\n  - [Surveys](#surveys)\n- [Applications](#applications)\n  - [Natural Language Processing](#natural-language-processing)\n    - [Language Modeling](#language-modeling)\n    - [Speech Recognition](#speech-recognition)\n    - [Machine Translation](#machine-translation)\n    - [Conversation Modeling](#conversation-modeling)\n    - [Question Answering](#question-answering)\n  - [Computer Vision](#computer-vision)\n    - [Object Recognition](#object-recognition)\n    - [Image Generation](#image-generation)\n    - [Video Analysis](#video-analysis)\n  - [Multimodal (CV+NLP)](#multimodal-cv--nlp)\n    - [Image Captioning](#image-captioning)\n    - [Video Captioning](#video-captioning)\n    - [Visual Question Answering](#visual-question-answering)\n  - [Turing Machines](#turing-machines)\n  - [Robotics](#robotics)\n  - [Other](#other)\n- [Datasets](#datasets)\n- [Blogs](#blogs)\n- [Online Demos](#online-demos)\n\n## Codes\n* [Tensorflow](https:\u002F\u002Fwww.tensorflow.org\u002F) - Python, C++\n  * [Get started](https:\u002F\u002Fwww.tensorflow.org\u002Fversions\u002Fmaster\u002Fget_started\u002Findex.html), [Tutorials](https:\u002F\u002Fwww.tensorflow.org\u002Fversions\u002Fmaster\u002Ftutorials\u002Findex.html)\n    * [Recurrent Neural Network Tutorial](https:\u002F\u002Fwww.tensorflow.org\u002Fversions\u002Fmaster\u002Ftutorials\u002Frecurrent\u002Findex.html)\n    * [Sequence-to-Sequence Model Tutorial](https:\u002F\u002Fwww.tensorflow.org\u002Fversions\u002Fmaster\u002Ftutorials\u002Fseq2seq\u002Findex.html)\n  * [Tutorials](https:\u002F\u002Fgithub.com\u002Fnlintz\u002FTensorFlow-Tutorials) by nlintz\n  * [Notebook examples](https:\u002F\u002Fgithub.com\u002Faymericdamien\u002FTensorFlow-Examples) by aymericdamien\n  * [Scikit Flow (skflow)](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Fskflow) - Simplified Scikit-learn like Interface for TensorFlow\n  * [Keras](http:\u002F\u002Fkeras.io\u002F) : (Tensorflow \u002F Theano)-based modular deep learning library similar to Torch\n  * [char-rnn-tensorflow](https:\u002F\u002Fgithub.com\u002Fsherjilozair\u002Fchar-rnn-tensorflow) by sherjilozair: char-rnn in tensorflow\n* [Theano](http:\u002F\u002Fdeeplearning.net\u002Fsoftware\u002Ftheano\u002F) - Python\n  * Simple IPython [tutorial on Theano](http:\u002F\u002Fnbviewer.jupyter.org\u002Fgithub\u002Fcraffel\u002Ftheano-tutorial\u002Fblob\u002Fmaster\u002FTheano%20Tutorial.ipynb)\n  * [Deep Learning Tutorials](http:\u002F\u002Fwww.deeplearning.net\u002Ftutorial\u002F)\n    * [RNN for semantic parsing of speech](http:\u002F\u002Fwww.deeplearning.net\u002Ftutorial\u002Frnnslu.html#rnnslu)\n    * [LSTM network for sentiment analysis](http:\u002F\u002Fwww.deeplearning.net\u002Ftutorial\u002Flstm.html#lstm)\n  * [Pylearn2](http:\u002F\u002Fdeeplearning.net\u002Fsoftware\u002Fpylearn2\u002F) : Library that wraps a lot of models and training algorithms in deep learning\n  * [Blocks](https:\u002F\u002Fgithub.com\u002Fmila-udem\u002Fblocks) : modular framework that enables building neural network models\n  * [Keras](http:\u002F\u002Fkeras.io\u002F) : (Tensorflow \u002F Theano)-based modular deep learning library similar to Torch\n  * [Lasagne](https:\u002F\u002Fgithub.com\u002FLasagne\u002FLasagne) : Lightweight library to build and train neural networks in Theano\n  * [theano-rnn](https:\u002F\u002Fgithub.com\u002Fgwtaylor\u002Ftheano-rnn) by Graham Taylor\n  * [Passage](https:\u002F\u002Fgithub.com\u002FIndicoDataSolutions\u002FPassage) : Library for text analysis with RNNs\n  * [Theano-Lights](https:\u002F\u002Fgithub.com\u002FIvaylo-Popov\u002FTheano-Lights) : Contains many generative models\n* [Caffe](https:\u002F\u002Fgithub.com\u002FBVLC\u002Fcaffe) - C++ with MATLAB\u002FPython wrappers\n  * [LRCN](http:\u002F\u002Fjeffdonahue.com\u002Flrcn\u002F) by Jeff Donahue\n* [Torch](http:\u002F\u002Ftorch.ch\u002F) - Lua\n  * [torchnet](https:\u002F\u002Fgithub.com\u002Ftorchnet\u002Ftorchnet) : modular framework that enables building neural network models\n  * [char-rnn](https:\u002F\u002Fgithub.com\u002Fkarpathy\u002Fchar-rnn) by Andrej Karpathy : multi-layer RNN\u002FLSTM\u002FGRU for training\u002Fsampling from character-level language models\n  * [torch-rnn](https:\u002F\u002Fgithub.com\u002Fjcjohnson\u002Ftorch-rnn) by Justin Johnson : reusable RNN\u002FLSTM modules for torch7 - much faster and memory efficient reimplementation of char-rnn\n  * [neuraltalk2](https:\u002F\u002Fgithub.com\u002Fkarpathy\u002Fneuraltalk2) by Andrej Karpathy : Recurrent Neural Network captions image, much faster and better version of the original [neuraltalk](https:\u002F\u002Fgithub.com\u002Fkarpathy\u002Fneuraltalk)\n  * [LSTM](https:\u002F\u002Fgithub.com\u002Fwojzaremba\u002Flstm) by Wojciech Zaremba : Long Short Term Memory Units to train a language model on word level Penn Tree Bank dataset\n  * [Oxford](https:\u002F\u002Fgithub.com\u002Foxford-cs-ml-2015) by Nando de Freitas : Oxford Computer Science - Machine Learning 2015 Practicals\n  * [rnn](https:\u002F\u002Fgithub.com\u002FElement-Research\u002Frnn) by Nicholas Leonard : general library for implementing RNN, LSTM, BRNN and BLSTM (highly unit tested).\n* [PyTorch](http:\u002F\u002Fpytorch.org\u002F) - Python\n  * [Word-level RNN example](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Fexamples\u002Ftree\u002Fmaster\u002Fword_language_model) : demonstrates PyTorch's built in RNN modules for language modeling\n  * [Practical PyTorch tutorials](https:\u002F\u002Fgithub.com\u002Fspro\u002Fpractical-pytorch) by Sean Robertson : focuses on using RNNs for Natural Language Processing\n  * [Deep Learning For NLP In PyTorch](https:\u002F\u002Fgithub.com\u002Frguthrie3\u002FDeepLearningForNLPInPytorch) by Robert Guthrie : written for a Natural Language Processing class at Georgia Tech\n* [DL4J](http:\u002F\u002Fdeeplearning4j.org\u002F) by [Skymind](http:\u002F\u002Fwww.skymind.io\u002F) : Deep Learning library for Java, Scala & Clojure on Hadoop, Spark & GPUs\n  * [Documentation](http:\u002F\u002Fdeeplearning4j.org\u002F) (Also in [Chinese](http:\u002F\u002Fdeeplearning4j.org\u002Fzh-index.html), [Japanese](http:\u002F\u002Fdeeplearning4j.org\u002Fja-index.html), [Korean](http:\u002F\u002Fdeeplearning4j.org\u002Fkr-index.html)) : [RNN](http:\u002F\u002Fdeeplearning4j.org\u002Fusingrnns.html), [LSTM](http:\u002F\u002Fdeeplearning4j.org\u002Flstm.html)\n  * [rnn examples](https:\u002F\u002Fgithub.com\u002Fdeeplearning4j\u002Fdl4j-examples\u002Ftree\u002Fmaster\u002Fdl4j-examples\u002Fsrc\u002Fmain\u002Fjava\u002Forg\u002Fdeeplearning4j\u002Fexamples\u002Frecurrent)\n* Etc.\n  * [Neon](http:\u002F\u002Fneon.nervanasys.com\u002Fdocs\u002Flatest\u002Findex.html): new deep learning library in Python, with support for RNN\u002FLSTM, and a fast image captioning model\n  * [Brainstorm](https:\u002F\u002Fgithub.com\u002FIDSIA\u002Fbrainstorm): deep learning library in Python, developed by IDSIA, thereby including various recurrent structures\n  * [Chainer](http:\u002F\u002Fchainer.org\u002F) : new, flexible deep learning library in Python\n  * [CGT](http:\u002F\u002Fjoschu.github.io\u002F)(Computational Graph Toolkit) : replicates Theano's API, but with very short compilation time and multithreading\n  * [RNNLIB](https:\u002F\u002Fsourceforge.net\u002Fp\u002Frnnl\u002Fwiki\u002FHome\u002F) by Alex Graves : C++ based LSTM library\n  * [RNNLM](http:\u002F\u002Frnnlm.org\u002F) by Tomas Mikolov : C++ based simple code\n  * [faster-RNNLM](https:\u002F\u002Fgithub.com\u002Fyandex\u002Ffaster-rnnlm) of Yandex : C++ based rnnlm implementation aimed to handle huge datasets\n  * [neuraltalk](https:\u002F\u002Fgithub.com\u002Fkarpathy\u002Fneuraltalk) by Andrej Karpathy : numpy-based RNN\u002FLSTM implementation\n  * [gist](https:\u002F\u002Fgist.github.com\u002Fkarpathy\u002F587454dc0146a6ae21fc) by Andrej Karpathy : raw numpy code that implements an efficient batched LSTM\n  * [Recurrentjs](https:\u002F\u002Fgithub.com\u002Fkarpathy\u002Frecurrentjs) by Andrej Karpathy : a beta javascript library for RNN\n  * [DARQN](https:\u002F\u002Fgithub.com\u002F5vision\u002FDARQN) by 5vision : Deep Attention Recurrent Q-Network\n\n## Theory\n### Lectures\n* Stanford NLP ([CS224d](http:\u002F\u002Fcs224d.stanford.edu\u002Findex.html)) by Richard Socher\n  * [Lecture Note 3](http:\u002F\u002Fcs224d.stanford.edu\u002Flecture_notes\u002FLectureNotes3.pdf) : neural network basics\n  * [Lecture Note 4](http:\u002F\u002Fcs224d.stanford.edu\u002Flecture_notes\u002FLectureNotes4.pdf) : RNN language models, bi-directional RNN, GRU, LSTM\n* Stanford vision ([CS231n](http:\u002F\u002Fcs231n.github.io\u002F)) by Andrej Karpathy\n  * About NN basic, and CNN\n* Oxford [Machine Learning](https:\u002F\u002Fwww.cs.ox.ac.uk\u002Fpeople\u002Fnando.defreitas\u002Fmachinelearning\u002F) by Nando de Freitas\n  * [Lecture 12](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=56TYLaQN4N8) : Recurrent neural networks and LSTMs\n  * [Lecture 13](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=-yX1SYeDHbg) : (guest lecture) Alex Graves on Hallucination with RNNs\n\n### Books \u002F Thesis\n* Alex Graves (2008)\n  * [Supervised Sequence Labelling with Recurrent Neural Networks](http:\u002F\u002Fwww.cs.toronto.edu\u002F~graves\u002Fpreprint.pdf)\n* Tomas Mikolov (2012)\n  * [Statistical Language Models based on Neural Networks](http:\u002F\u002Fwww.fit.vutbr.cz\u002F~imikolov\u002Frnnlm\u002Fthesis.pdf)\n* Ilya Sutskever (2013)\n  * [Training Recurrent Neural Networks](http:\u002F\u002Fwww.cs.utoronto.ca\u002F~ilya\u002Fpubs\u002Filya_sutskever_phd_thesis.pdf)\n* Richard Socher (2014)\n  * [Recursive Deep Learning for Natural Language Processing and Computer Vision](http:\u002F\u002Fnlp.stanford.edu\u002F~socherr\u002Fthesis.pdf)\n* Ian Goodfellow, Yoshua Bengio, and Aaron Courville (2016)\n  * [The Deep Learning Book chapter 10](http:\u002F\u002Fwww.deeplearningbook.org\u002Fcontents\u002Frnn.html)\n\n\n### Architecture Variants\n\n#### Structure\n\n* Bi-directional RNN [[Paper](http:\u002F\u002Fwww.di.ufpe.br\u002F~fnj\u002FRNA\u002Fbibliografia\u002FBRNN.pdf)]\n  * Mike Schuster and Kuldip K. Paliwal, *Bidirectional Recurrent Neural Networks*, Trans. on Signal Processing 1997\n* Multi-dimensional RNN [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F0705.2011.pdf)]\n  * Alex Graves, Santiago Fernandez, and Jurgen Schmidhuber, *Multi-Dimensional Recurrent Neural Networks*, ICANN 2007\n* GFRNN [[Paper-arXiv](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1502.02367)] [[Paper-ICML](http:\u002F\u002Fjmlr.org\u002Fproceedings\u002Fpapers\u002Fv37\u002Fchung15.pdf)] [[Supplementary](http:\u002F\u002Fjmlr.org\u002Fproceedings\u002Fpapers\u002Fv37\u002Fchung15-supp.pdf)]\n  * Junyoung Chung, Caglar Gulcehre, Kyunghyun Cho, Yoshua Bengio, *Gated Feedback Recurrent Neural Networks*, arXiv:1502.02367 \u002F ICML 2015\n* Tree-Structured RNNs\n  * Kai Sheng Tai, Richard Socher, and Christopher D. Manning, *Improved Semantic Representations From Tree-Structured Long Short-Term Memory Networks*, arXiv:1503.00075 \u002F ACL 2015 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1503.00075)]\n  * Samuel R. Bowman, Christopher D. Manning, and Christopher Potts, *Tree-structured composition in neural networks without tree-structured architectures*, arXiv:1506.04834 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.04834)]\n* Grid LSTM [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1507.01526)] [[Code](https:\u002F\u002Fgithub.com\u002Fcoreylynch\u002Fgrid-lstm)]\n  * Nal Kalchbrenner, Ivo Danihelka, and Alex Graves, *Grid Long Short-Term Memory*, arXiv:1507.01526\n* Segmental RNN [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.06018v2.pdf)]\n  * Lingpeng Kong, Chris Dyer, Noah Smith, \"Segmental Recurrent Neural Networks\", ICLR 2016.\n* Seq2seq for Sets [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.06391v4.pdf)]\n  * Oriol Vinyals, Samy Bengio, Manjunath Kudlur, \"Order Matters: Sequence to sequence for sets\", ICLR 2016.\n* Hierarchical Recurrent Neural Networks [[Paper](http:\u002F\u002Farxiv.org\u002Fabs\u002F1609.01704)]\n  * Junyoung Chung, Sungjin Ahn, Yoshua Bengio, \"Hierarchical Multiscale Recurrent Neural Networks\", arXiv:1609.01704\n\n#### Memory\n\n* LSTM [[Paper](http:\u002F\u002Fdeeplearning.cs.cmu.edu\u002Fpdfs\u002FHochreiter97_lstm.pdf)]\n  * Sepp Hochreiter and Jurgen Schmidhuber, *Long Short-Term Memory*, Neural Computation 1997\n* GRU (Gated Recurrent Unit) [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1406.1078.pdf)]\n  * Kyunghyun Cho, Bart van Berrienboer, Caglar Gulcehre, Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua Bengio, *Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation*, arXiv:1406.1078 \u002F EMNLP 2014\n* NTM [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1410.5401)]\n  * A.Graves, G. Wayne, and I. Danihelka., *Neural Turing Machines,* arXiv preprint arXiv:1410.5401\n* Neural GPU [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.08228.pdf)]\n  * Łukasz Kaiser, Ilya Sutskever, arXiv:1511.08228 \u002F ICML 2016 (under review)\n* Memory Network [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1410.3916)]\n  * Jason Weston, Sumit Chopra, Antoine Bordes, *Memory Networks,* arXiv:1410.3916\n* Pointer Network [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.03134)]\n  * Oriol Vinyals, Meire Fortunato, and Navdeep Jaitly, *Pointer Networks*, arXiv:1506.03134 \u002F NIPS 2015\n* Deep Attention Recurrent Q-Network [[Paper](http:\u002F\u002Farxiv.org\u002Fabs\u002F1512.01693)]\n  * Ivan Sorokin, Alexey Seleznev, Mikhail Pavlov, Aleksandr Fedorov, Anastasiia Ignateva, *Deep Attention Recurrent Q-Network* , arXiv:1512.01693\n* Dynamic Memory Networks [[Paper](http:\u002F\u002Farxiv.org\u002Fabs\u002F1506.07285)]\n  * Ankit Kumar, Ozan Irsoy, Peter Ondruska, Mohit Iyyer, James Bradbury, Ishaan Gulrajani, Victor Zhong, Romain Paulus, Richard Socher, \"Ask Me Anything: Dynamic Memory Networks for Natural Language Processing\", arXiv:1506.07285\n\n### Surveys\n* Yann LeCun, Yoshua Bengio, and Geoffrey Hinton, [Deep Learning](http:\u002F\u002Fwww.nature.com\u002Fnature\u002Fjournal\u002Fv521\u002Fn7553\u002Fpdf\u002Fnature14539.pdf), Nature 2015\n* Klaus Greff, Rupesh Kumar Srivastava, Jan Koutnik, Bas R. Steunebrink, Jurgen Schmidhuber, [LSTM: A Search Space Odyssey](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1503.04069), arXiv:1503.04069\n* Zachary C. Lipton, [A Critical Review of Recurrent Neural Networks for Sequence Learning](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.00019), arXiv:1506.00019\n* Andrej Karpathy, Justin Johnson, Li Fei-Fei, [Visualizing and Understanding Recurrent Networks](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.02078), arXiv:1506.02078\n* Rafal Jozefowicz, Wojciech Zaremba, Ilya Sutskever, [An Empirical Exploration of Recurrent Network Architectures](http:\u002F\u002Fjmlr.org\u002Fproceedings\u002Fpapers\u002Fv37\u002Fjozefowicz15.pdf), ICML, 2015.\n\n## Applications\n\n### Natural Language Processing\n\n#### Language Modeling\n* Tomas Mikolov, Martin Karafiat, Lukas Burget, Jan \"Honza\" Cernocky, Sanjeev Khudanpur, *Recurrent Neural Network based Language Model*, Interspeech 2010 [[Paper](http:\u002F\u002Fwww.fit.vutbr.cz\u002Fresearch\u002Fgroups\u002Fspeech\u002Fpubli\u002F2010\u002Fmikolov_interspeech2010_IS100722.pdf)]\n* Tomas Mikolov, Stefan Kombrink, Lukas Burget, Jan \"Honza\" Cernocky, Sanjeev Khudanpur, *Extensions of Recurrent Neural Network Language Model*, ICASSP 2011 [[Paper](http:\u002F\u002Fwww.fit.vutbr.cz\u002Fresearch\u002Fgroups\u002Fspeech\u002Fpubli\u002F2011\u002Fmikolov_icassp2011_5528.pdf)]\n* Stefan Kombrink, Tomas Mikolov, Martin Karafiat, Lukas Burget, *Recurrent Neural Network based Language Modeling in Meeting Recognition*, Interspeech 2011 [[Paper](http:\u002F\u002Fwww.fit.vutbr.cz\u002F~imikolov\u002Frnnlm\u002FApplicationOfRNNinMeetingRecognition_IS2011.pdf)]\n* Jiwei Li, Minh-Thang Luong, and Dan Jurafsky, *A Hierarchical Neural Autoencoder for Paragraphs and Documents*, ACL 2015 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.01057)], [[Code](https:\u002F\u002Fgithub.com\u002Fjiweil\u002FHierarchical-Neural-Autoencoder)]\n* Ryan Kiros, Yukun Zhu, Ruslan Salakhutdinov, and Richard S. Zemel, *Skip-Thought Vectors*, arXiv:1506.06726 \u002F NIPS 2015 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.06726.pdf)]\n* Yoon Kim, Yacine Jernite, David Sontag, and Alexander M. Rush, *Character-Aware Neural Language Models*, arXiv:1508.06615 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1508.06615)]\n* Xingxing Zhang, Liang Lu, and Mirella Lapata, *Tree Recurrent Neural Networks with Application to Language Modeling*, arXiv:1511.00060 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.00060.pdf)]\n* Felix Hill, Antoine Bordes, Sumit Chopra, and Jason Weston, *The Goldilocks Principle: Reading children's books with explicit memory representations*, arXiv:1511.0230 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.02301.pdf)]\n\n\n#### Speech Recognition\n* Geoffrey Hinton, Li Deng, Dong Yu, George E. Dahl, Abdel-rahman Mohamed, Navdeep Jaitly, Andrew Senior, Vincent Vanhoucke, Patrick Nguyen, Tara N. Sainath, and Brian Kingsbury, *Deep Neural Networks for Acoustic Modeling in Speech Recognition*, IEEE Signam Processing Magazine 2012 [[Paper](http:\u002F\u002Fcs224d.stanford.edu\u002Fpapers\u002Fmaas_paper.pdf)]\n* Alex Graves, Abdel-rahman Mohamed, and Geoffrey Hinton, *Speech Recognition with Deep Recurrent Neural Networks*, arXiv:1303.5778 \u002F ICASSP 2013 [[Paper](http:\u002F\u002Fwww.cs.toronto.edu\u002F~fritz\u002Fabsps\u002FRNN13.pdf)]\n* Jan Chorowski, Dzmitry Bahdanau, Dmitriy Serdyuk, Kyunghyun Cho, and Yoshua Bengio, *Attention-Based Models for Speech Recognition*, arXiv:1506.07503 \u002F NIPS 2015 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.07503)]\n* Haşim Sak, Andrew Senior, Kanishka Rao, and Françoise Beaufays. *Fast and Accurate Recurrent Neural Network Acoustic Models for Speech Recognition*, arXiv:1507.06947 2015 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1507.06947v1.pdf)].\n\n#### Machine Translation\n* Oxford [[Paper](http:\u002F\u002Fwww.nal.ai\u002Fpapers\u002Fkalchbrennerblunsom_emnlp13)]\n  * Nal Kalchbrenner and Phil Blunsom, *Recurrent Continuous Translation Models*, EMNLP 2013\n* Univ. Montreal\n  * Kyunghyun Cho, Bart van Berrienboer, Caglar Gulcehre, Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua Bengio, *Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation*, arXiv:1406.1078 \u002F EMNLP 2014 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1406.1078)]\n  * Kyunghyun Cho, Bart van Merrienboer, Dzmitry Bahdanau, and Yoshua Bengio, *On the Properties of Neural Machine Translation: Encoder-Decoder Approaches*, SSST-8 2014 [[Paper](http:\u002F\u002Fwww.aclweb.org\u002Fanthology\u002FW14-4012)]\n  * Jean Pouget-Abadie, Dzmitry Bahdanau, Bart van Merrienboer, Kyunghyun Cho, and Yoshua Bengio, *Overcoming the Curse of Sentence Length for Neural Machine Translation using Automatic Segmentation*, SSST-8 2014\n  * Dzmitry Bahdanau, KyungHyun Cho, and Yoshua Bengio, *Neural Machine Translation by Jointly Learning to Align and Translate*, arXiv:1409.0473 \u002F ICLR 2015 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1409.0473)]\n  * Sebastian Jean, Kyunghyun Cho, Roland Memisevic, and Yoshua Bengio, *On using very large target vocabulary for neural machine translation*, arXiv:1412.2007 \u002F ACL 2015 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1412.2007.pdf)]\n* Univ. Montreal + Middle East Tech. Univ. + Univ. Maine [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1503.03535.pdf)]\n  * Caglar Gulcehre, Orhan Firat, Kelvin Xu, Kyunghyun Cho, Loic Barrault, Huei-Chi Lin, Fethi Bougares, Holger Schwenk, and Yoshua Bengio, *On Using Monolingual Corpora in Neural Machine Translation*, arXiv:1503.03535\n* Google [[Paper](http:\u002F\u002Fpapers.nips.cc\u002Fpaper\u002F5346-sequence-to-sequence-learning-with-neural-networks.pdf)]\n  * Ilya Sutskever, Oriol Vinyals, and Quoc V. Le, *Sequence to Sequence Learning with Neural Networks*, arXiv:1409.3215 \u002F NIPS 2014\n* Google + NYU [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1410.8206)]\n  * Minh-Thang Luong, Ilya Sutskever, Quoc V. Le, Oriol Vinyals, and Wojciech Zaremba, *Addressing the Rare Word Problem in Neural Machine Transltaion*, arXiv:1410.8206 \u002F ACL 2015\n* ICT + Huawei [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.06442.pdf)]\n  * Fandong Meng, Zhengdong Lu, Zhaopeng Tu, Hang Li, and Qun Liu, *A Deep Memory-based Architecture for Sequence-to-Sequence Learning*, arXiv:1506.06442\n* Stanford [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1508.04025.pdf)]\n  * Minh-Thang Luong, Hieu Pham, and Christopher D. Manning, *Effective Approaches to Attention-based Neural Machine Translation*, arXiv:1508.04025\n* Middle East Tech. Univ. + NYU + Univ. Montreal [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1601.01073.pdf)]\n  * Orhan Firat, Kyunghyun Cho, and Yoshua Bengio, *Multi-Way, Multilingual Neural Machine Translation with a Shared Attention Mechanism*, arXiv:1601.01073\n\n#### Conversation Modeling\n* Lifeng Shang, Zhengdong Lu, and Hang Li, *Neural Responding Machine for Short-Text Conversation*, arXiv:1503.02364 \u002F ACL 2015 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1503.02364)]\n* Oriol Vinyals and Quoc V. Le, *A Neural Conversational Model*, arXiv:1506.05869 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.05869)]\n* Ryan Lowe, Nissan Pow, Iulian V. Serban, and Joelle Pineau, *The Ubuntu Dialogue Corpus: A Large Dataset for Research in Unstructured Multi-Turn Dialogue Systems*, arXiv:1506.08909 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.08909)]\n* Jesse Dodge, Andreea Gane, Xiang Zhang, Antoine Bordes, Sumit Chopra, Alexander Miller, Arthur Szlam, and Jason Weston, *Evaluating Prerequisite Qualities for Learning End-to-End Dialog Systems*, arXiv:1511.06931 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.06931)]\n* Jason Weston, *Dialog-based Language Learning*, arXiv:1604.06045, [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1604.06045)]\n* Antoine Bordes and Jason Weston, *Learning End-to-End Goal-Oriented Dialog*, arXiv:1605.07683 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1605.07683)]\n\n#### Question Answering\n* FAIR\n  * Jason Weston, Antoine Bordes, Sumit Chopra, Tomas Mikolov, and Alexander M. Rush, *Towards AI-Complete Question Answering: A Set of Prerequisite Toy Tasks*, arXiv:1502.05698 [[Web](https:\u002F\u002Fresearch.facebook.com\u002Fresearchers\u002F1543934539189348)] [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1502.05698.pdf)]\n  * Antoine Bordes, Nicolas Usunier, Sumit Chopra, and Jason Weston, *Simple Question answering with Memory Networks*, arXiv:1506.02075 [[Paper](http:\u002F\u002Farxiv.org\u002Fabs\u002F1506.02075)]\n  * Felix Hill, Antoine Bordes, Sumit Chopra, Jason Weston, \"The Goldilocks Principle: Reading Children's Books with Explicit Memory Representations\", ICLR 2016 [[Paper](http:\u002F\u002Farxiv.org\u002Fabs\u002F1511.02301)]\n* DeepMind + Oxford [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.03340.pdf)]\n  * Karl M. Hermann, Tomas Kocisky, Edward Grefenstette, Lasse Espeholt, Will Kay, Mustafa Suleyman, and Phil Blunsom, *Teaching Machines to Read and Comprehend*, arXiv:1506.03340 \u002F NIPS 2015\n* MetaMind [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.07285.pdf)]\n  * Ankit Kumar, Ozan Irsoy, Jonathan Su, James Bradbury, Robert English, Brian Pierce, Peter Ondruska, Mohit Iyyer, Ishaan Gulrajani, and Richard Socher, *Ask Me Anything: Dynamic Memory Networks for Natural Language Processing*, arXiv:1506.07285\n\n### Computer Vision\n\n#### Object Recognition\n* Pedro Pinheiro and Ronan Collobert, *Recurrent Convolutional Neural Networks for Scene Labeling*, ICML 2014 [[Paper](http:\u002F\u002Fjmlr.org\u002Fproceedings\u002Fpapers\u002Fv32\u002Fpinheiro14.pdf)]\n* Ming Liang and Xiaolin Hu, *Recurrent Convolutional Neural Network for Object Recognition*, CVPR 2015 [[Paper](http:\u002F\u002Fwww.cv-foundation.org\u002Fopenaccess\u002Fcontent_cvpr_2015\u002Fpapers\u002FLiang_Recurrent_Convolutional_Neural_2015_CVPR_paper.pdf)]\n* Wonmin Byeon, Thomas Breuel, Federico Raue1, and Marcus Liwicki1, *Scene Labeling with LSTM Recurrent Neural Networks*, CVPR 2015 [[Paper](http:\u002F\u002Fwww.cv-foundation.org\u002Fopenaccess\u002Fcontent_cvpr_2015\u002Fpapers\u002FByeon_Scene_Labeling_With_2015_CVPR_paper.pdf)]\n* Mircea Serban Pavel, Hannes Schulz, and Sven Behnke, *Recurrent Convolutional Neural Networks for Object-Class Segmentation of RGB-D Video*, IJCNN 2015 [[Paper](http:\u002F\u002Fwww.ais.uni-bonn.de\u002Fpapers\u002FIJCNN_2015_Pavel.pdf)]\n* Shuai Zheng, Sadeep Jayasumana, Bernardino Romera-Paredes, Vibhav Vineet, Zhizhong Su, Dalong Du, Chang Huang, and Philip H. S. Torr, *Conditional Random Fields as Recurrent Neural Networks*, arXiv:1502.03240 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1502.03240)]\n* Xiaodan Liang, Xiaohui Shen, Donglai Xiang, Jiashi Feng, Liang Lin, and Shuicheng Yan, *Semantic Object Parsing with Local-Global Long Short-Term Memory*, arXiv:1511.04510 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.04510.pdf)]\n* Sean Bell, C. Lawrence Zitnick, Kavita Bala, and Ross Girshick, *Inside-Outside Net: Detecting Objects in Context with Skip Pooling and Recurrent Neural Networks*, arXiv:1512.04143 \u002F ICCV 2015 workshop [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1512.04143)]\n\n#### Visual Tracking\n* Quan Gan, Qipeng Guo, Zheng Zhang, and Kyunghyun Cho, *First Step toward Model-Free, Anonymous Object Tracking with Recurrent Neural Networks*, arXiv:1511.06425 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.06425)]\n\n\n#### Image Generation\n* Karol Gregor, Ivo Danihelka, Alex Graves, Danilo J. Rezende, and Daan Wierstra, *DRAW: A Recurrent Neural Network for Image Generation,* ICML 2015 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1502.04623)]\n* Angeliki Lazaridou, Dat T. Nguyen, R. Bernardi, and M. Baroni, *Unveiling the Dreams of Word Embeddings: Towards Language-Driven Image Generation,* arXiv:1506.03500 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.03500)]\n* Lucas Theis and Matthias Bethge, *Generative Image Modeling Using Spatial LSTMs,* arXiv:1506.03478 \u002F NIPS 2015 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.03478)]\n* Aaron van den Oord, Nal Kalchbrenner, and Koray Kavukcuoglu, *Pixel Recurrent Neural Networks,* arXiv:1601.06759 [[Paper](http:\u002F\u002Farxiv.org\u002Fabs\u002F1601.06759)]\n\n#### Video Analysis\n\n* Univ. Toronto [[paper](http:\u002F\u002Farxiv.org\u002Fabs\u002F1502.04681)]\n  * Nitish Srivastava, Elman Mansimov, Ruslan Salakhutdinov, *Unsupervised Learning of Video Representations using LSTMs*, arXiv:1502.04681 \u002F ICML 2015\n* Univ. Cambridge [[paper](http:\u002F\u002Farxiv.org\u002Fabs\u002F1511.06309)]\n  * Viorica Patraucean, Ankur Handa, Roberto Cipolla, *Spatio-temporal video autoencoder with differentiable memory*, arXiv:1511.06309\n\n\n\n### Multimodal (CV + NLP)\n\n#### Image Captioning\n* UCLA + Baidu [[Web](http:\u002F\u002Fwww.stat.ucla.edu\u002F~junhua.mao\u002Fm-RNN.html)] [[Paper-arXiv1](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1410.1090)], [[Paper-arXiv2](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1412.6632)]\n  * Junhua Mao, Wei Xu, Yi Yang, Jiang Wang, and Alan L. Yuille, *Explain Images with Multimodal Recurrent Neural Networks*, arXiv:1410.1090\n  * Junhua Mao, Wei Xu, Yi Yang, Jiang Wang, Zhiheng Huang, and Alan L. Yuille, *Deep Captioning with Multimodal Recurrent Neural Networks (m-RNN)*, arXiv:1412.6632 \u002F ICLR 2015\n* Univ. Toronto [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1411.2539)] [[Web demo](http:\u002F\u002Fdeeplearning.cs.toronto.edu\u002Fi2t)]\n  * Ryan Kiros, Ruslan Salakhutdinov, and Richard S. Zemel, *Unifying Visual-Semantic Embeddings with Multimodal Neural Language Models*, arXiv:1411.2539 \u002F TACL 2015\n* Berkeley [[Web](http:\u002F\u002Fjeffdonahue.com\u002Flrcn\u002F)] [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1411.4389)]\n  * Jeff Donahue, Lisa Anne Hendricks, Sergio Guadarrama, Marcus Rohrbach, Subhashini Venugopalan, Kate Saenko, and Trevor Darrell, *Long-term Recurrent Convolutional Networks for Visual Recognition and Description*, arXiv:1411.4389 \u002F CVPR 2015\n* Google [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1411.4555)]\n  * Oriol Vinyals, Alexander Toshev, Samy Bengio, and Dumitru Erhan, *Show and Tell: A Neural Image Caption Generator*, arXiv:1411.4555 \u002F CVPR 2015\n* Stanford [[Web]](http:\u002F\u002Fcs.stanford.edu\u002Fpeople\u002Fkarpathy\u002Fdeepimagesent\u002F) [[Paper]](http:\u002F\u002Fcs.stanford.edu\u002Fpeople\u002Fkarpathy\u002Fcvpr2015.pdf)\n  * Andrej Karpathy and Li Fei-Fei, *Deep Visual-Semantic Alignments for Generating Image Description*, CVPR 2015\n* Microsoft [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1411.4952)]\n  * Hao Fang, Saurabh Gupta, Forrest Iandola, Rupesh Srivastava, Li Deng, Piotr Dollar, Jianfeng Gao, Xiaodong He, Margaret Mitchell, John C. Platt, Lawrence Zitnick, and Geoffrey Zweig, *From Captions to Visual Concepts and Back*, arXiv:1411.4952 \u002F CVPR 2015\n* CMU + Microsoft [[Paper-arXiv](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1411.5654)], [[Paper-CVPR](http:\u002F\u002Fwww.cs.cmu.edu\u002F~xinleic\u002Fpapers\u002Fcvpr15_rnn.pdf)]\n  * Xinlei Chen, and C. Lawrence Zitnick, *Learning a Recurrent Visual Representation for Image Caption Generation*\n  * Xinlei Chen, and C. Lawrence Zitnick, *Mind’s Eye: A Recurrent Visual Representation for Image Caption Generation*, CVPR 2015\n* Univ. Montreal + Univ. Toronto [[Web](http:\u002F\u002Fkelvinxu.github.io\u002Fprojects\u002Fcapgen.html)] [[Paper](http:\u002F\u002Fwww.cs.toronto.edu\u002F~zemel\u002Fdocuments\u002FcaptionAttn.pdf)]\n  * Kelvin Xu, Jimmy Lei Ba, Ryan Kiros, Kyunghyun Cho, Aaron Courville, Ruslan Salakhutdinov, Richard S. Zemel, and Yoshua Bengio, *Show, Attend, and Tell: Neural Image Caption Generation with Visual Attention*, arXiv:1502.03044 \u002F ICML 2015\n* Idiap + EPFL + Facebook [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1502.03671)]\n  * Remi Lebret, Pedro O. Pinheiro, and Ronan Collobert, *Phrase-based Image Captioning*, arXiv:1502.03671 \u002F ICML 2015\n* UCLA + Baidu [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1504.06692)]\n  * Junhua Mao, Wei Xu, Yi Yang, Jiang Wang, Zhiheng Huang, and Alan L. Yuille, *Learning like a Child: Fast Novel Visual Concept Learning from Sentence Descriptions of Images*, arXiv:1504.06692\n* MS + Berkeley\n  * Jacob Devlin, Saurabh Gupta, Ross Girshick, Margaret Mitchell, and C. Lawrence Zitnick, *Exploring Nearest Neighbor Approaches for Image Captioning*, arXiv:1505.04467 (Note: technically not RNN) [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1505.04467.pdf)]\n  * Jacob Devlin, Hao Cheng, Hao Fang, Saurabh Gupta, Li Deng, Xiaodong He, Geoffrey Zweig, and Margaret Mitchell, *Language Models for Image Captioning: The Quirks and What Works*, arXiv:1505.01809 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1505.01809.pdf)]\n* Adelaide [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.01144.pdf)]\n  * Qi Wu, Chunhua Shen, Anton van den Hengel, Lingqiao Liu, and Anthony Dick, *Image Captioning with an Intermediate Attributes Layer*, arXiv:1506.01144\n* Tilburg [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.03694.pdf)]\n  * Grzegorz Chrupala, Akos Kadar, and Afra Alishahi, *Learning language through pictures*, arXiv:1506.03694\n* Univ. Montreal [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1507.01053.pdf)]\n  * Kyunghyun Cho, Aaron Courville, and Yoshua Bengio, *Describing Multimedia Content using Attention-based Encoder-Decoder Networks*, arXiv:1507.01053\n* Cornell [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1508.02091.pdf)]\n  * Jack Hessel, Nicolas Savva, and Michael J. Wilber, *Image Representations and New Domains in Neural Image Captioning*, arXiv:1508.02091\n\n\n#### Video Captioning\n* Berkeley [[Web](http:\u002F\u002Fjeffdonahue.com\u002Flrcn\u002F)] [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1411.4389)]\n  * Jeff Donahue, Lisa Anne Hendricks, Sergio Guadarrama, Marcus Rohrbach, Subhashini Venugopalan, Kate Saenko, and Trevor Darrell, *Long-term Recurrent Convolutional Networks for Visual Recognition and Description*, arXiv:1411.4389 \u002F CVPR 2015\n* UT Austin + UML + Berkeley [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1412.4729)]\n  * Subhashini Venugopalan, Huijuan Xu, Jeff Donahue, Marcus Rohrbach, Raymond Mooney, and Kate Saenko, *Translating Videos to Natural Language Using Deep Recurrent Neural Networks*, arXiv:1412.4729\n* Microsoft [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1505.01861)]\n  * Yingwei Pan, Tao Mei, Ting Yao, Houqiang Li, and Yong Rui, *Joint Modeling Embedding and Translation to Bridge Video and Language*, arXiv:1505.01861\n* UT Austin + Berkeley + UML [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1505.00487)]\n  * Subhashini Venugopalan, Marcus Rohrbach, Jeff Donahue, Raymond Mooney, Trevor Darrell, and Kate Saenko, *Sequence to Sequence--Video to Text*, arXiv:1505.00487\n* Univ. Montreal + Univ. Sherbrooke [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1502.08029.pdf)]\n  * Li Yao, Atousa Torabi, Kyunghyun Cho, Nicolas Ballas, Christopher Pal, Hugo Larochelle, and Aaron Courville, *Describing Videos by Exploiting Temporal Structure*, arXiv:1502.08029\n* MPI + Berkeley [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.01698.pdf)]\n  * Anna Rohrbach, Marcus Rohrbach, and Bernt Schiele, *The Long-Short Story of Movie Description*, arXiv:1506.01698\n* Univ. Toronto + MIT [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.06724.pdf)]\n  * Yukun Zhu, Ryan Kiros, Richard Zemel, Ruslan Salakhutdinov, Raquel Urtasun, Antonio Torralba, and Sanja Fidler, *Aligning Books and Movies: Towards Story-like Visual Explanations by Watching Movies and Reading Books*, arXiv:1506.06724\n* Univ. Montreal [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1507.01053.pdf)]\n  * Kyunghyun Cho, Aaron Courville, and Yoshua Bengio, *Describing Multimedia Content using Attention-based Encoder-Decoder Networks*, arXiv:1507.01053\n* Zhejiang Univ. + UTS [[Paper](http:\u002F\u002Farxiv.org\u002Fabs\u002F1511.03476)]\n  * Pingbo Pan, Zhongwen Xu, Yi Yang, Fei Wu, Yueting Zhuang, *Hierarchical Recurrent Neural Encoder for Video Representation with Application to Captioning*, arXiv:1511.03476\n* Univ. Montreal + NYU + IBM [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.04590.pdf)]\n  * Li Yao, Nicolas Ballas, Kyunghyun Cho, John R. Smith, and Yoshua Bengio, *Empirical performance upper bounds for image and video captioning*, arXiv:1511.04590\n\n\n#### Visual Question Answering\n\n* Virginia Tech. + MSR [[Web](http:\u002F\u002Fwww.visualqa.org\u002F)] [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1505.00468)]\n  * Stanislaw Antol, Aishwarya Agrawal, Jiasen Lu, Margaret Mitchell, Dhruv Batra, C. Lawrence Zitnick, and Devi Parikh, *VQA: Visual Question Answering*, arXiv:1505.00468 \u002F CVPR 2015 SUNw:Scene Understanding workshop\n* MPI + Berkeley [[Web](https:\u002F\u002Fwww.mpi-inf.mpg.de\u002Fdepartments\u002Fcomputer-vision-and-multimodal-computing\u002Fresearch\u002Fvision-and-language\u002Fvisual-turing-challenge\u002F)] [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1505.01121)]\n  * Mateusz Malinowski, Marcus Rohrbach, and Mario Fritz, *Ask Your Neurons: A Neural-based Approach to Answering Questions about Images*, arXiv:1505.01121\n* Univ. Toronto [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1505.02074)] [[Dataset](http:\u002F\u002Fwww.cs.toronto.edu\u002F~mren\u002Fimageqa\u002Fdata\u002Fcocoqa\u002F)]\n  * Mengye Ren, Ryan Kiros, and Richard Zemel, *Exploring Models and Data for Image Question Answering*, arXiv:1505.02074 \u002F ICML 2015 deep learning workshop\n* Baidu + UCLA [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1505.05612)] [[Dataset]()]\n  * Hauyuan Gao, Junhua Mao, Jie Zhou, Zhiheng Huang, Lei Wang, and Wei Xu, *Are You Talking to a Machine? Dataset and Methods for Multilingual Image Question Answering*, arXiv:1505.05612 \u002F NIPS 2015\n* SNU + NAVER [[Paper](http:\u002F\u002Farxiv.org\u002Fabs\u002F1606.01455)]\n  * Jin-Hwa Kim, Sang-Woo Lee, Dong-Hyun Kwak, Min-Oh Heo, Jeonghee Kim, Jung-Woo Ha, Byoung-Tak Zhang, *Multimodal Residual Learning for Visual QA*, arXiv:1606:01455\n* UC Berkeley + Sony [[Paper](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1606.01847)]\n  * Akira Fukui, Dong Huk Park, Daylen Yang, Anna Rohrbach, Trevor Darrell, and Marcus Rohrbach, *Multimodal Compact Bilinear Pooling for Visual Question Answering and Visual Grounding*, arXiv:1606.01847\n* Postech [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1606.03647.pdf)]\n  * Hyeonwoo Noh and Bohyung Han, *Training Recurrent Answering Units with Joint Loss Minimization for VQA*, arXiv:1606.03647\n* SNU + NAVER [[Paper](http:\u002F\u002Farxiv.org\u002Fabs\u002F1610.04325)]\n  * Jin-Hwa Kim, Kyoung Woon On, Jeonghee Kim, Jung-Woo Ha, Byoung-Tak Zhang, *Hadamard Product for Low-rank Bilinear Pooling*, arXiv:1610.04325\n* Video QA\n  * CMU + UTS [[paper](http:\u002F\u002Farxiv.org\u002Fabs\u002F1511.04670)]\n    * Linchao Zhu, Zhongwen Xu, Yi Yang, Alexander G. Hauptmann, Uncovering Temporal Context for Video Question and Answering, arXiv:1511.04670\n  * KIT + MIT + Univ. Toronto [[Paper](http:\u002F\u002Farxiv.org\u002Fabs\u002F1512.02902)] [[Dataset](http:\u002F\u002Fmovieqa.cs.toronto.edu\u002Fhome\u002F)]\n    * Makarand Tapaswi, Yukun Zhu, Rainer Stiefelhagen, Antonio Torralba, Raquel Urtasun, Sanja Fidler, MovieQA: Understanding Stories in Movies through Question-Answering, arXiv:1512.02902\n\n\n#### Turing Machines\n*  A.Graves, G. Wayne, and I. Danihelka., *Neural Turing Machines,* arXiv preprint arXiv:1410.5401 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1410.5401)]\n* Jason Weston, Sumit Chopra, Antoine Bordes, *Memory Networks,* arXiv:1410.3916 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1410.3916)]\n* Armand Joulin and Tomas Mikolov, *Inferring Algorithmic Patterns with Stack-Augmented Recurrent Nets*, arXiv:1503.01007 \u002F NIPS 2015 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1503.01007)]\n* Sainbayar Sukhbaatar, Arthur Szlam, Jason Weston, and Rob Fergus, *End-To-End Memory Networks*, arXiv:1503.08895 \u002F NIPS 2015 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1503.08895)]\n* Wojciech Zaremba and Ilya Sutskever, *Reinforcement Learning Neural Turing Machines,* arXiv:1505.00521 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1505.00521)]\n* Baolin Peng and Kaisheng Yao, *Recurrent Neural Networks with External Memory for Language Understanding*, arXiv:1506.00195 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.00195.pdf)]\n* Fandong Meng, Zhengdong Lu, Zhaopeng Tu, Hang Li, and Qun Liu, *A Deep Memory-based Architecture for Sequence-to-Sequence Learning*, arXiv:1506.06442 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.06442.pdf)]\n* Arvind Neelakantan, Quoc V. Le, and Ilya Sutskever, *Neural Programmer: Inducing Latent Programs with Gradient Descent*, arXiv:1511.04834 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.04834.pdf)]\n* Scott Reed and Nando de Freitas, *Neural Programmer-Interpreters*, arXiv:1511.06279 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.06279.pdf)]\n* Karol Kurach, Marcin Andrychowicz, and Ilya Sutskever, *Neural Random-Access Machines*, arXiv:1511.06392 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.06392.pdf)]\n* Łukasz Kaiser and Ilya Sutskever, *Neural GPUs Learn Algorithms*, arXiv:1511.08228 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.08228.pdf)]\n* Ethan Caballero, *Skip-Thought Memory Networks*, arXiv:1511.6420 [[Paper](https:\u002F\u002Fpdfs.semanticscholar.org\u002F6b9f\u002F0d695df0ce01d005eb5aa69386cb5fbac62a.pdf)]\n* Wojciech Zaremba, Tomas Mikolov, Armand Joulin, and Rob Fergus, *Learning Simple Algorithms from Examples*, arXiv:1511.07275 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.07275.pdf)]\n\n### Robotics\n\n* Hongyuan Mei, Mohit Bansal, and Matthew R. Walter, *Listen, Attend, and Walk: Neural Mapping of Navigational Instructions to Action Sequences*, arXiv:1506.04089 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.04089.pdf)]\n* Marvin Zhang, Sergey Levine, Zoe McCarthy, Chelsea Finn, and Pieter Abbeel, *Policy Learning with Continuous Memory States for Partially Observed Robotic Control,* arXiv:1507.01273. [[Paper]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1507.01273)\n\n### Other\n* Alex Graves, *Generating Sequences With Recurrent Neural Networks,* arXiv:1308.0850 [[Paper]](http:\u002F\u002Farxiv.org\u002Fabs\u002F1308.0850)\n* Volodymyr Mnih, Nicolas Heess, Alex Graves, and Koray Kavukcuoglu, *Recurrent Models of Visual Attention*, NIPS 2014 \u002F arXiv:1406.6247 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1406.6247.pdf)]\n* Wojciech Zaremba and Ilya Sutskever, *Learning to Execute*, arXiv:1410.4615 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1410.4615.pdf)] [[Code](https:\u002F\u002Fgithub.com\u002Fwojciechz\u002Flearning_to_execute)]\n* Samy Bengio, Oriol Vinyals, Navdeep Jaitly, and Noam Shazeer, *Scheduled Sampling for Sequence Prediction with\nRecurrent Neural Networks*, arXiv:1506.03099 \u002F NIPS 2015 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.03099)]\n* Bing Shuai, Zhen Zuo, Gang Wang, and Bing Wang, *DAG-Recurrent Neural Networks For Scene Labeling*, arXiv:1509.00552 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1509.00552)]\n* Soren Kaae Sonderby, Casper Kaae Sonderby, Lars Maaloe, and Ole Winther, *Recurrent Spatial Transformer Networks*, arXiv:1509.05329 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1509.05329)]\n* Cesar Laurent, Gabriel Pereyra, Philemon Brakel, Ying Zhang, and Yoshua Bengio, *Batch Normalized Recurrent Neural Networks*, arXiv:1510.01378 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1510.01378)]\n* Jiwon Kim, Jung Kwon Lee, Kyoung Mu Lee, *Deeply-Recursive Convolutional Network for Image Super-Resolution*, arXiv:1511.04491 [[Paper]](http:\u002F\u002Farxiv.org\u002Fabs\u002F1511.04491)\n* Quan Gan, Qipeng Guo, Zheng Zhang, and Kyunghyun Cho, *First Step toward Model-Free, Anonymous Object Tracking with Recurrent Neural Networks*, arXiv:1511.06425 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.06425.pdf)]\n* Francesco Visin, Kyle Kastner, Aaron Courville, Yoshua Bengio, Matteo Matteucci, and Kyunghyun Cho, *ReSeg: A Recurrent Neural Network for Object Segmentation*, arXiv:1511.07053 [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.07053.pdf)]\n* Juergen Schmidhuber, *On Learning to Think: Algorithmic Information Theory for Novel Combinations of Reinforcement Learning Controllers and Recurrent Neural World Models*, arXiv:1511.09249 [[Paper]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.09249)\n\n## Datasets\n* Speech Recognition\n  * [OpenSLR](http:\u002F\u002Fwww.openslr.org\u002Fresources.php) (Open Speech and Language Resources)\n    * [LibriSpeech ASR corpus](http:\u002F\u002Fwww.openslr.org\u002F12\u002F)\n  * [VoxForge](http:\u002F\u002Fvoxforge.org\u002Fhome)\n* Image Captioning\n  * [Flickr 8k](http:\u002F\u002Fnlp.cs.illinois.edu\u002FHockenmaierGroup\u002FFraming_Image_Description\u002FKCCA.html)\n  * [Flickr 30k](http:\u002F\u002Fshannon.cs.illinois.edu\u002FDenotationGraph\u002F)\n  * [Microsoft COCO](http:\u002F\u002Fmscoco.org\u002Fhome\u002F)\n* Question Answering\n  * [The bAbI Project](http:\u002F\u002Ffb.ai\u002Fbabi) - Dataset for text understanding and reasoning, by Facebook AI Research. Contains:\n    * The (20) QA bAbI tasks - [[Paper](http:\u002F\u002Farxiv.org\u002Fabs\u002F1502.05698)]\n    * The (6) dialog bAbI tasks - [[Paper](http:\u002F\u002Farxiv.org\u002Fabs\u002F1605.07683)]\n    * The Children's Book Test - [[Paper](http:\u002F\u002Farxiv.org\u002Fabs\u002F1511.02301)]\n    * The Movie Dialog dataset - [[Paper](http:\u002F\u002Farxiv.org\u002Fabs\u002F1511.06931)]\n    * The MovieQA dataset - [[Data](http:\u002F\u002Fwww.thespermwhale.com\u002Fjaseweston\u002Fbabi\u002Fmovie_dialog_dataset.tgz)]\n    * The Dialog-based Language Learning dataset - [[Paper](http:\u002F\u002Farxiv.org\u002Fabs\u002F1604.06045)]\n    * The SimpleQuestions dataset - [[Paper](http:\u002F\u002Farxiv.org\u002Fabs\u002F1506.02075)]\n  * [SQuAD](https:\u002F\u002Fstanford-qa.com\u002F) - Stanford Question Answering Dataset :  [[Paper](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1606.05250)]\n* Image Question Answering\n  * [DAQUAR](https:\u002F\u002Fwww.mpi-inf.mpg.de\u002Fdepartments\u002Fcomputer-vision-and-multimodal-computing\u002Fresearch\u002Fvision-and-language\u002Fvisual-turing-challenge\u002F) - built upon [NYU Depth v2](http:\u002F\u002Fcs.nyu.edu\u002F~silberman\u002Fdatasets\u002Fnyu_depth_v2.html) by N. Silberman et al.\n  * [VQA](http:\u002F\u002Fwww.visualqa.org\u002F) - based on [MSCOCO](http:\u002F\u002Fmscoco.org\u002F) images\n  * [Image QA](http:\u002F\u002Fwww.cs.toronto.edu\u002F~mren\u002Fimageqa\u002Fdata\u002Fcocoqa\u002F) - based on MSCOCO images\n  * [Multilingual Image QA](http:\u002F\u002Fidl.baidu.com\u002FFM-IQA.html) - built from scratch by Baidu - in Chinese, with English translation\n* Action Recognition\n  * [THUMOS](http:\u002F\u002Fwww.thumos.info\u002Fhome.html) : Large-scale action recognition dataset\n  * [MultiTHUMOS](http:\u002F\u002Fai.stanford.edu\u002F~syyeung\u002Fresources\u002Fmultithumos.zip) : Extension of THUMOS '14 action detection dataset with dense multilabele annotation\n\n## Blogs\n* [The Unreasonable Effectiveness of RNNs](http:\u002F\u002Fkarpathy.github.io\u002F2015\u002F05\u002F21\u002Frnn-effectiveness\u002F) by [Andrej Karpathy](http:\u002F\u002Fcs.stanford.edu\u002Fpeople\u002Fkarpathy\u002F)\n* [Understanding LSTM Networks](http:\u002F\u002Fcolah.github.io\u002Fposts\u002F2015-08-Understanding-LSTMs\u002F) in [Colah's blog](http:\u002F\u002Fcolah.github.io\u002F)\n* [WildML](http:\u002F\u002Fwww.wildml.com\u002F) blog's RNN tutorial [[Part1](http:\u002F\u002Fwww.wildml.com\u002F2015\u002F09\u002Frecurrent-neural-networks-tutorial-part-1-introduction-to-rnns\u002F)], [[Part2](http:\u002F\u002Fwww.wildml.com\u002F2015\u002F09\u002Frecurrent-neural-networks-tutorial-part-2-implementing-a-language-model-rnn-with-python-numpy-and-theano\u002F)], [[Part3](http:\u002F\u002Fwww.wildml.com\u002F2015\u002F10\u002Frecurrent-neural-networks-tutorial-part-3-backpropagation-through-time-and-vanishing-gradients\u002F)], [[Part4](http:\u002F\u002Fwww.wildml.com\u002F2015\u002F10\u002Frecurrent-neural-network-tutorial-part-4-implementing-a-grulstm-rnn-with-python-and-theano\u002F)]\n* [RNNs in Tensorflow, a Practical Guide and Undocumented Features](http:\u002F\u002Fwww.wildml.com\u002F2016\u002F08\u002Frnns-in-tensorflow-a-practical-guide-and-undocumented-features\u002F)\n* [Optimizing RNN Performance](https:\u002F\u002Fsvail.github.io\u002F) from Baidu's Silicon Valley AI Lab.\n* [Character Level Language modelling using RNN](http:\u002F\u002Fnbviewer.jupyter.org\u002Fgist\u002Fyoavg\u002Fd76121dfde2618422139) by Yoav Goldberg\n* [Implement an RNN in Python](http:\u002F\u002Fpeterroelants.github.io\u002Fposts\u002Frnn_implementation_part01\u002F).\n* [LSTM Backpropogation](http:\u002F\u002Farunmallya.github.io\u002Fwriteups\u002Fnn\u002Flstm\u002Findex.html#\u002F)\n* [Introduction to Recurrent Networks in TensorFlow](https:\u002F\u002Fdanijar.com\u002Fintroduction-to-recurrent-networks-in-tensorflow\u002F) by Danijar Hafner\n* [Variable Sequence Lengths in TensorFlow](https:\u002F\u002Fdanijar.com\u002Fvariable-sequence-lengths-in-tensorflow\u002F) by Danijar Hafner\n* [Written Memories: Understanding, Deriving and Extending the LSTM](http:\u002F\u002Fr2rt.com\u002Fwritten-memories-understanding-deriving-and-extending-the-lstm.html) by Silviu Pitis\n\n## Online Demos\n* Alex graves, hand-writing generation [[link](http:\u002F\u002Fwww.cs.toronto.edu\u002F~graves\u002Fhandwriting.html)]\n* Ink Poster: Handwritten post-it notes [[link](http:\u002F\u002Fwww.inkposter.com\u002F?)]\n* LSTMVis: Visual Analysis for Recurrent Neural Networks [[link](http:\u002F\u002Flstm.seas.harvard.edu\u002F)]\n","# 令人惊叹的循环神经网络\n\n一个精心整理的资源列表，专门针对循环神经网络（与*深度学习*密切相关）。\n\n维护者 - [Myungsub Choi](https:\u002F\u002Fgithub.com\u002Fmyungsub)、[Taeksoo Kim](https:\u002F\u002Fgithub.com\u002Fjazzsaxmafia)、[Jiwon Kim](https:\u002F\u002Fgithub.com\u002Fkjw0612)\n\n我们还有其他主题的页面：[awesome-deep-vision](https:\u002F\u002Fgithub.com\u002Fkjw0612\u002Fawesome-deep-vision)、[awesome-random-forest](https:\u002F\u002Fgithub.com\u002Fkjw0612\u002Fawesome-random-forest)\n\n## 贡献\n欢迎随时提交[拉取请求](https:\u002F\u002Fgithub.com\u002Fkjw0612\u002Fawesome-rnn\u002Fpulls)，或发送邮件至 Myungsub Choi (cms6539@gmail.com)，亦可加入我们的聊天群，添加相关链接。\n\n该项目目前未处于积极维护状态。\n\n[![加入 https:\u002F\u002Fgitter.im\u002Fkjw0612\u002Fawesome-rnn 的聊天](https:\u002F\u002Fbadges.gitter.im\u002FJoin%20Chat.svg)](https:\u002F\u002Fgitter.im\u002Fkjw0612\u002Fawesome-rnn?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)\n\n## 分享\n+ [在 Twitter 上分享](http:\u002F\u002Ftwitter.com\u002Fhome?status=http:\u002F\u002Fjiwonkim.org\u002Fawesome-rnn%0AResources%20for%20Recurrent%20Neural%20Networks)\n+ [在 Facebook 上分享](http:\u002F\u002Fwww.facebook.com\u002Fsharer\u002Fsharer.php?u=https:\u002F\u002Fjiwonkim.org\u002Fawesome-rnn)\n+ [在 Google Plus 上分享](http:\u002F\u002Fplus.google.com\u002Fshare?url=https:\u002F\u002Fjiwonkim.org\u002Fawesome-rnn)\n+ [在 LinkedIn 上分享](http:\u002F\u002Fwww.linkedin.com\u002FshareArticle?mini=true&url=https:\u002F\u002Fjiwonkim.org\u002Fawesome-rnn&title=Awesome%20Recurrent%20Neural&Networks&summary=&source=)\n\n## 目录\n\n- [代码](#codes)\n- [理论](#theory)\n  - [讲座](#lectures)\n  - [书籍 \u002F 论文](#books--thesis)\n  - [架构变体](#architecture-variants)\n    - [结构](#structure)\n    - [记忆](#memory)\n  - [综述](#surveys)\n- [应用](#applications)\n  - [自然语言处理](#natural-language-processing)\n    - [语言建模](#language-modeling)\n    - [语音识别](#speech-recognition)\n    - [机器翻译](#machine-translation)\n    - [对话建模](#conversation-modeling)\n    - [问答](#question-answering)\n  - [计算机视觉](#computer-vision)\n    - [目标识别](#object-recognition)\n    - [图像生成](#image-generation)\n    - [视频分析](#video-analysis)\n  - [多模态（CV+NLP）](#multimodal-cv--nlp)\n    - [图像字幕生成](#image-captioning)\n    - [视频字幕生成](#video-captioning)\n    - [视觉问答](#visual-question-answering)\n  - [图灵机](#turing-machines)\n  - [机器人技术](#robotics)\n  - [其他](#other)\n- [数据集](#datasets)\n- [博客](#blogs)\n- [在线演示](#online-demos)\n\n## 代码\n* [TensorFlow](https:\u002F\u002Fwww.tensorflow.org\u002F) - Python、C++\n  * [入门](https:\u002F\u002Fwww.tensorflow.org\u002Fversions\u002Fmaster\u002Fget_started\u002Findex.html)、[教程](https:\u002F\u002Fwww.tensorflow.org\u002Fversions\u002Fmaster\u002Ftutorials\u002Findex.html)\n    * [循环神经网络教程](https:\u002F\u002Fwww.tensorflow.org\u002Fversions\u002Fmaster\u002Ftutorials\u002Frecurrent\u002Findex.html)\n    * [序列到序列模型教程](https:\u002F\u002Fwww.tensorflow.org\u002Fversions\u002Fmaster\u002Ftutorials\u002Fseq2seq\u002Findex.html)\n  * [教程](https:\u002F\u002Fgithub.com\u002Fnlintz\u002FTensorFlow-Tutorials) by nlintz\n  * [Notebook 示例](https:\u002F\u002Fgithub.com\u002Faymericdamien\u002FTensorFlow-Examples) by aymericdamien\n  * [Scikit Flow (skflow)](https:\u002F\u002Fgithub.com\u002Ftensorflow\u002Fskflow) - TensorFlow 的简化 Scikit-learn 风格接口\n  * [Keras](http:\u002F\u002Fkeras.io\u002F)：基于 TensorFlow \u002F Theano 的模块化深度学习库，类似于 Torch\n  * [char-rnn-tensorflow](https:\u002F\u002Fgithub.com\u002Fsherjilozair\u002Fchar-rnn-tensorflow) by sherjilozair：TensorFlow 中的字符级 RNN\n* [Theano](http:\u002F\u002Fdeeplearning.net\u002Fsoftware\u002Ftheano\u002F) - Python\n  * 简单的 IPython [Theano 教程](http:\u002F\u002Fnbviewer.jupyter.org\u002Fgithub\u002Fcraffel\u002Ftheano-tutorial\u002Fblob\u002Fmaster\u002FTheano%20Tutorial.ipynb)\n  * [深度学习教程](http:\u002F\u002Fwww.deeplearning.net\u002Ftutorial\u002F)\n    * [用于语音语义解析的 RNN](http:\u002F\u002Fwww.deeplearning.net\u002Ftutorial\u002Frnnslu.html#rnnslu)\n    * [用于情感分析的 LSTM 网络](http:\u002F\u002Fwww.deeplearning.net\u002Ftutorial\u002Flstm.html#lstm)\n  * [Pylearn2](http:\u002F\u002Fdeeplearning.net\u002Fsoftware\u002Fpylearn2\u002F)：封装了大量深度学习模型和训练算法的库\n  * [Blocks](https:\u002F\u002Fgithub.com\u002Fmila-udem\u002Fblocks)：模块化框架，可用于构建神经网络模型\n  * [Keras](http:\u002F\u002Fkeras.io\u002F)：基于 TensorFlow \u002F Theano 的模块化深度学习库，类似于 Torch\n  * [Lasagne](https:\u002F\u002Fgithub.com\u002FLasagne\u002FLasagne)：轻量级库，用于在 Theano 中构建和训练神经网络\n  * [theano-rnn](https:\u002F\u002Fgithub.com\u002Fgwtaylor\u002Ftheano-rnn) by Graham Taylor\n  * [Passage](https:\u002F\u002Fgithub.com\u002FIndicoDataSolutions\u002FPassage)：用于文本分析的 RNN 库\n  * [Theano-Lights](https:\u002F\u002Fgithub.com\u002FIvaylo-Popov\u002FTheano-Lights)：包含许多生成模型\n* [Caffe](https:\u002F\u002Fgithub.com\u002FBVLC\u002Fcaffe) - C++，带有 MATLAB\u002FPython 封装\n  * [LRCN](http:\u002F\u002Fjeffdonahue.com\u002Flrcn\u002F) by Jeff Donahue\n* [Torch](http:\u002F\u002Ftorch.ch\u002F) - Lua\n  * [torchnet](https:\u002F\u002Fgithub.com\u002Ftorchnet\u002Ftorchnet)：模块化框架，可用于构建神经网络模型\n  * [char-rnn](https:\u002F\u002Fgithub.com\u002Fkarpathy\u002Fchar-rnn) by Andrej Karpathy：多层 RNN\u002FLSTM\u002FGRU，用于训练和采样字符级别的语言模型\n  * [torch-rnn](https:\u002F\u002Fgithub.com\u002Fjcjohnson\u002Ftorch-rnn) by Justin Johnson：可重用的 RNN\u002FLSTM 模块，基于 torch7，是对 char-rnn 的更快且更节省内存的重新实现\n  * [neuraltalk2](https:\u002F\u002Fgithub.com\u002Fkarpathy\u002Fneuraltalk2) by Andrej Karpathy：循环神经网络为图像生成描述，是原始 [neuraltalk](https:\u002F\u002Fgithub.com\u002Fkarpathy\u002Fneuraltalk) 的更快更好版本\n  * [LSTM](https:\u002F\u002Fgithub.com\u002Fwojzaremba\u002Flstm) by Wojciech Zaremba：长短期记忆单元，用于在单词级别的 Penn Tree Bank 数据集上训练语言模型\n  * [Oxford](https:\u002F\u002Fgithub.com\u002Foxford-cs-ml-2015) by Nando de Freitas：牛津大学计算机科学系 2015 年机器学习实践课程\n  * [rnn](https:\u002F\u002Fgithub.com\u002FElement-Research\u002Frnn) by Nicholas Leonard：通用的 RNN、LSTM、BRNN 和 BLSTM 实现库（经过高度单元测试）。\n* [PyTorch](http:\u002F\u002Fpytorch.org\u002F) - Python\n  * [基于词的 RNN 示例](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Fexamples\u002Ftree\u002Fmaster\u002Fword_language_model)：展示了 PyTorch 内置的用于语言建模的 RNN 模块\n  * [实用 PyTorch 教程](https:\u002F\u002Fgithub.com\u002Fspro\u002Fpractical-pytorch) by Sean Robertson：专注于使用 RNN 进行自然语言处理\n  * [PyTorch 中的 NLP 深度学习](https:\u002F\u002Fgithub.com\u002Frguthrie3\u002FDeepLearningForNLPInPytorch) by Robert Guthrie：专为佐治亚理工学院的一门自然语言处理课程编写\n* [DL4J](http:\u002F\u002Fdeeplearning4j.org\u002F) by [Skymind](http:\u002F\u002Fwww.skymind.io\u002F)：适用于 Java、Scala 和 Clojure 的深度学习库，支持 Hadoop、Spark 和 GPU\n  * [文档](http:\u002F\u002Fdeeplearning4j.org\u002F)（也有[中文](http:\u002F\u002Fdeeplearning4j.org\u002Fzh-index.html)、[日文](http:\u002F\u002Fdeeplearning4j.org\u002Fja-index.html)、[韩文](http:\u002F\u002Fdeeplearning4j.org\u002Fkr-index.html)版本）：[RNN](http:\u002F\u002Fdeeplearning4j.org\u002Fusingrnns.html)、[LSTM](http:\u002F\u002Fdeeplearning4j.org\u002Flstm.html)\n  * [RNN 示例](https:\u002F\u002Fgithub.com\u002Fdeeplearning4j\u002Fdl4j-examples\u002Ftree\u002Fmaster\u002Fdl4j-examples\u002Fsrc\u002Fmain\u002Fjava\u002Forg\u002Fdeeplearning4j\u002Fexamples\u002Frecurrent)\n* 其他\n  * [Neon](http:\u002F\u002Fneon.nervanasys.com\u002Fdocs\u002Flatest\u002Findex.html)：新的 Python 深度学习库，支持 RNN\u002FLSTM，并提供快速的图像描述模型\n  * [Brainstorm](https:\u002F\u002Fgithub.com\u002FIDSIA\u002Fbrainstorm)：由 IDSIA 开发的 Python 深度学习库，包含多种循环结构\n  * [Chainer](http:\u002F\u002Fchainer.org\u002F)：新的灵活的 Python 深度学习库\n  * [CGT](http:\u002F\u002Fjoschu.github.io\u002F)(计算图工具包)：复制了 Theano 的 API，但编译时间极短，并支持多线程\n  * [RNNLIB](https:\u002F\u002Fsourceforge.net\u002Fp\u002Frnnl\u002Fwiki\u002FHome\u002F) by Alex Graves：基于 C++ 的 LSTM 库\n  * [RNNLM](http:\u002F\u002Frnnlm.org\u002F) by Tomas Mikolov：简单的 C++ 代码\n  * [faster-RNNLM](https:\u002F\u002Fgithub.com\u002Fyandex\u002Ffaster-rnnlm) of Yandex：旨在处理大规模数据集的 C++ RNNLM 实现\n  * [neuraltalk](https:\u002F\u002Fgithub.com\u002Fkarpathy\u002Fneuraltalk) by Andrej Karpathy：基于 numpy 的 RNN\u002FLSTM 实现\n  * [gist](https:\u002F\u002Fgist.github.com\u002Fkarpathy\u002F587454dc0146a6ae21fc) by Andrej Karpathy：高效的批量 LSTM 实现的原始 numpy 代码\n  * [Recurrentjs](https:\u002F\u002Fgithub.com\u002Fkarpathy\u002Frecurrentjs) by Andrej Karpathy：RNN 的 beta 版 JavaScript 库\n  * [DARQN](https:\u002F\u002Fgithub.com\u002F5vision\u002FDARQN) by 5vision：深度注意力循环 Q 网络\n\n## 理论\n### 讲座\n* 斯坦福 NLP ([CS224d](http:\u002F\u002Fcs224d.stanford.edu\u002Findex.html)) by Richard Socher\n  * [讲义第 3 讲](http:\u002F\u002Fcs224d.stanford.edu\u002Flecture_notes\u002FLectureNotes3.pdf)：神经网络基础\n  * [讲义第 4 讲](http:\u002F\u002Fcs224d.stanford.edu\u002Flecture_notes\u002FLectureNotes4.pdf)：RNN 语言模型、双向 RNN、GRU、LSTM\n* 斯坦福视觉 ([CS231n](http:\u002F\u002Fcs231n.github.io\u002F)) by Andrej Karpathy\n  * 关于 NN 基础和 CNN\n* 牛津 [机器学习](https:\u002F\u002Fwww.cs.ox.ac.uk\u002Fpeople\u002Fnando.defreitas\u002Fmachinelearning\u002F) by Nando de Freitas\n  * [第 12 讲](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=56TYLaQN4N8)：循环神经网络和 LSTM\n  * [第 13 讲](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=-yX1SYeDHbg)：（特邀讲座）Alex Graves 谈 RNN 中的幻觉\n\n### 书籍 \u002F 学位论文\n* 亚历克斯·格雷夫斯（2008年）\n  * [基于循环神经网络的监督序列标注](http:\u002F\u002Fwww.cs.toronto.edu\u002F~graves\u002Fpreprint.pdf)\n* 托马斯·米科洛夫（2012年）\n  * [基于神经网络的统计语言模型](http:\u002F\u002Fwww.fit.vutbr.cz\u002F~imikolov\u002Frnnlm\u002Fthesis.pdf)\n* 伊利亚·苏茨克维尔（2013年）\n  * [循环神经网络的训练](http:\u002F\u002Fwww.cs.utoronto.ca\u002F~ilya\u002Fpubs\u002Filya_sutskever_phd_thesis.pdf)\n* 理查德·索彻（2014年）\n  * [用于自然语言处理和计算机视觉的递归深度学习](http:\u002F\u002Fnlp.stanford.edu\u002F~socherr\u002Fthesis.pdf)\n* 伊恩·古德费洛、约书亚·本吉奥和阿伦·库尔维尔（2016年）\n  * [《深度学习》第10章](http:\u002F\u002Fwww.deeplearningbook.org\u002Fcontents\u002Frnn.html)\n\n\n### 架构变体\n\n#### 结构\n\n* 双向RNN [[论文](http:\u002F\u002Fwww.di.ufpe.br\u002F~fnj\u002FRNA\u002Fbibliografia\u002FBRNN.pdf)]\n  * 迈克·舒斯特和库尔迪普·K·帕利瓦尔，《双向循环神经网络》，信号处理汇刊，1997年\n* 多维RNN [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F0705.2011.pdf)]\n  * 亚历克斯·格雷夫斯、圣地亚哥·费尔南德斯和尤尔根·施密德胡伯，《多维循环神经网络》，ICANN 2007\n* GFRNN [[论文-arXiv](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1502.02367)] [[论文-ICML](http:\u002F\u002Fjmlr.org\u002Fproceedings\u002Fpapers\u002Fv37\u002Fchung15.pdf)] [[补充材料](http:\u002F\u002Fjmlr.org\u002Fproceedings\u002Fpapers\u002Fv37\u002Fchung15-supp.pdf)]\n  * 朱永忠、恰格拉尔·居尔切赫雷、丘庆贤、约书亚·本吉奥，《门控反馈循环神经网络》，arXiv:1502.02367 \u002F ICML 2015\n* 树结构RNNs\n  * 凯·盛泰、理查德·索彻和克里斯托弗·D·曼宁，《基于树结构长短期记忆网络的语义表示改进》，arXiv:1503.00075 \u002F ACL 2015 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1503.00075)]\n  * 塞缪尔·R·鲍曼、克里斯托弗·D·曼宁和克里斯托弗·波茨，《无需树状架构的神经网络中的树状组合》，arXiv:1506.04834 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.04834)]\n* 网格LSTM [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1507.01526)] [[代码](https:\u002F\u002Fgithub.com\u002Fcoreylynch\u002Fgrid-lstm)]\n  * 纳尔·卡尔希布伦纳、伊沃·丹尼赫尔卡和亚历克斯·格雷夫斯，《网格长短期记忆》，arXiv:1507.01526\n* 分段RNN [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.06018v2.pdf)]\n  * 孔令鹏、克里斯·戴尔、诺亚·史密斯，《分段循环神经网络》，ICLR 2016。\n* 集合的Seq2seq [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.06391v4.pdf)]\n  * 奥里奥尔·维尼亚尔斯、萨米·本吉奥、曼朱纳特·库德卢尔，《顺序很重要：用于集合的序列到序列模型》，ICLR 2016。\n* 层次化循环神经网络 [[论文](http:\u002F\u002Farxiv.org\u002Fabs\u002F1609.01704)]\n  * 朱永忠、安成镇、约书亚·本吉奥，《层次化多尺度循环神经网络》，arXiv:1609.01704\n\n#### 记忆模块\n\n* LSTM [[论文](http:\u002F\u002Fdeeplearning.cs.cmu.edu\u002Fpdfs\u002FHochreiter97_lstm.pdf)]\n  * 塞普·霍克赖特和尤尔根·施密德胡伯，《长短期记忆》，神经计算，1997年\n* GRU（门控循环单元）[[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1406.1078.pdf)]\n  * 丘庆贤、巴特·范·贝尔伦博尔、恰格拉尔·居尔切赫雷、季米特里·巴赫达诺夫、费蒂·布加雷斯、霍尔格·施文克和约书亚·本吉奥，《使用RNN编码器-解码器进行统计机器翻译的短语表示学习》，arXiv:1406.1078 \u002F EMNLP 2014\n* NTM [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1410.5401)]\n  * A.格雷夫斯、G.韦恩和I.丹尼赫尔卡，《神经图灵机》，arXiv预印本arXiv:1410.5401\n* 神经GPU [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.08228.pdf)]\n  * 卢卡什·凯泽、伊利亚·苏茨克维尔，arXiv:1511.08228 \u002F ICML 2016（待审）\n* 记忆网络 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1410.3916)]\n  * 杰森·韦斯顿、苏米特·乔普拉、安托万·博尔德斯，《记忆网络》，arXiv:1410.3916\n* 指针网络 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.03134)]\n  * 奥里奥尔·维尼亚尔斯、梅雷·福图纳托和纳夫迪普·贾特利，《指针网络》，arXiv:1506.03134 \u002F NIPS 2015\n* 深度注意力循环Q网络 [[论文](http:\u002F\u002Farxiv.org\u002Fabs\u002F1512.01693)]\n  * 伊万·索罗金、阿列克谢·谢列兹涅夫、米哈伊尔·帕夫洛夫、亚历山大·费多罗夫、安娜斯塔西娅·伊格纳捷娃，《深度注意力循环Q网络》，arXiv:1512.01693\n* 动态记忆网络 [[论文](http:\u002F\u002Farxiv.org\u002Fabs\u002F1506.07285)]\n  * 安基特·库马尔、奥赞·伊尔索伊、彼得·翁德鲁斯卡、莫希特·艾耶尔、詹姆斯·布拉德伯里、伊桑·古尔拉贾尼、维克多·钟、罗曼·保卢斯、理查德·索彻，《有问必答：用于自然语言处理的动态记忆网络》，arXiv:1506.07285\n\n### 综述\n* 扬·勒丘恩、约书亚·本吉奥和杰弗里·辛顿，《深度学习》（Nature，2015年）\n* 克劳斯·格雷夫、鲁佩什·库马尔·斯里瓦斯塔瓦、扬·考特尼克、巴斯·R·斯托因布林克、尤尔根·施密德胡伯，《LSTM：搜索空间之旅》（arXiv，2015年）\n* 扎卡里·C·利普顿，《关于用于序列学习的循环神经网络的批判性评论》（arXiv，2015年）\n* 安德烈·卡帕西、贾斯汀·约翰逊、李飞飞，《可视化与理解循环网络》（arXiv，2015年）\n* 拉法尔·约泽福维奇、沃伊切赫·扎伦巴、伊利亚·苏茨克维尔，《对循环网络架构的实证探索》（JMLR，2015年）。\n\n## 应用\n\n### 自然语言处理\n\n#### 语言建模\n* 托马斯·米科洛夫、马丁·卡拉菲亚特、卢卡斯·布尔盖特、扬“洪扎”切尔诺茨基、桑杰夫·库丹普尔，《基于循环神经网络的语言模型》，Interspeech 2010 [[论文](http:\u002F\u002Fwww.fit.vutbr.cz\u002Fresearch\u002Fgroups\u002Fspeech\u002Fpubli\u002F2010\u002Fmikolov_interspeech2010_IS100722.pdf)]\n* 托马斯·米科洛夫、斯特凡·孔布林克、卢卡斯·布尔盖特、扬“洪扎”切尔诺茨基、桑杰夫·库丹普尔，《循环神经网络语言模型的扩展》，ICASSP 2011 [[论文](http:\u002F\u002Fwww.fit.vutbr.cz\u002Fresearch\u002Fgroups\u002Fspeech\u002Fpubli\u002F2011\u002Fmikolov_icassp2011_5528.pdf)]\n* 斯特凡·孔布林克、托马斯·米科洛夫、马丁·卡拉菲亚特、卢卡斯·布尔盖特，《会议识别中的基于循环神经网络的语言建模》，Interspeech 2011 [[论文](http:\u002F\u002Fwww.fit.vutbr.cz\u002F~imikolov\u002Frnnlm\u002FApplicationOfRNNinMeetingRecognition_IS2011.pdf)]\n* 李纪伟、明堂龙和丹·朱拉夫斯基，《用于段落和文档的层次化神经自动编码器》，ACL 2015 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.01057)], [[代码](https:\u002F\u002Fgithub.com\u002Fjiweil\u002FHierarchical-Neural-Autoencoder)]\n* 瑞安·基罗斯、于坤·朱、鲁斯兰·萨拉胡丁诺夫和理查德·S·泽梅尔，《跳过思想向量》，arXiv:1506.06726 \u002F NIPS 2015 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.06726.pdf)]\n* 尹金、雅辛·热尔尼特、大卫·松塔格和亚历山大·M·拉什，《字符感知型神经语言模型》，arXiv:1508.06615 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1508.06615)]\n* 张兴兴、梁陆和米雷拉·拉帕塔，《树形循环神经网络及其在语言建模中的应用》，arXiv:1511.00060 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.00060.pdf)]\n* 费利克斯·希尔、安托万·博尔德斯、苏米特·乔普拉和杰森·韦斯顿，《金发姑娘原则：用显式记忆表示阅读儿童读物》，arXiv:1511.0230 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.02301.pdf)]\n\n#### 语音识别\n* 杰弗里·辛顿、李邓、董宇、乔治·E·达尔、阿卜杜勒-拉赫曼·穆罕默德、纳夫迪普·贾特利、安德鲁·塞尼尔、文森特·范霍克、帕特里克·阮、塔拉·N·赛纳特和布赖恩·金斯伯里，《用于语音识别声学建模的深度神经网络》，IEEE信号处理杂志，2012年 [[论文](http:\u002F\u002Fcs224d.stanford.edu\u002Fpapers\u002Fmaas_paper.pdf)]\n* 亚历克斯·格雷夫斯、阿卜杜勒-拉赫曼·穆罕默德和杰弗里·辛顿，《基于深度循环神经网络的语音识别》，arXiv:1303.5778 \u002F ICASSP 2013 [[论文](http:\u002F\u002Fwww.cs.toronto.edu\u002F~fritz\u002Fabsps\u002FRNN13.pdf)]\n* 扬·乔罗夫斯基、季米特里·巴达诺夫、德米特里·谢尔久克、邱炯炫和约书亚·本吉奥，《基于注意力机制的语音识别模型》，arXiv:1506.07503 \u002F NIPS 2015 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.07503)]\n* 哈希姆·萨克、安德鲁·塞尼尔、卡尼什卡·拉奥和弗朗索瓦丝·博菲斯。《用于语音识别的快速且准确的循环神经网络声学模型》，arXiv:1507.06947 2015年 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1507.06947v1.pdf)]。\n\n#### 机器翻译\n* 牛津大学 [[论文](http:\u002F\u002Fwww.nal.ai\u002Fpapers\u002Fkalchbrennerblunsom_emnlp13)]\n  * 纳尔·卡尔奇布伦纳和菲尔·布伦索姆，《递归连续翻译模型》，EMNLP 2013\n* 蒙特利尔大学\n  * 邱炯炫、巴特·范·贝尔里恩博尔、恰格拉尔·古尔切赫雷、季米特里·巴达诺夫、费蒂·布加雷斯、霍尔格·施文克和约书亚·本吉奥，《使用RNN编码器-解码器学习短语表示以进行统计机器翻译》，arXiv:1406.1078 \u002F EMNLP 2014 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1406.1078)]\n  * 邱炯炫、巴特·范·梅里恩博尔、季米特里·巴达诺夫和约书亚·本吉奥，《神经机器翻译的性质：编码器-解码器方法》，SSST-8 2014 [[论文](http:\u002F\u002Fwww.aclweb.org\u002Fanthology\u002FW14-4012)]\n  * 让·普热特-阿巴迪、季米特里·巴达诺夫、巴特·范·梅里恩博尔、邱炯炫和约书亚·本吉奥，《通过自动分段克服神经机器翻译中句子长度的诅咒》，SSST-8 2014\n  * 季米特里·巴达诺夫、邱炯炫和约书亚·本吉奥，《通过联合学习对齐与翻译实现神经机器翻译》，arXiv:1409.0473 \u002F ICLR 2015 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1409.0473)]\n  * 塞巴斯蒂安·让、邱炯炫、罗兰·梅米塞维奇和约书亚·本吉奥，《关于在神经机器翻译中使用超大目标词汇表》，arXiv:1412.2007 \u002F ACL 2015 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1412.2007.pdf)]\n* 蒙特利尔大学 + 中东技术大学 + 缅因大学 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1503.03535.pdf)]\n  * 恰格拉尔·古尔切赫雷、奥尔汗·菲拉特、凯尔文·徐、邱炯炫、洛伊克·巴拉尔、惠伊-奇·林、费蒂·布加雷斯、霍尔格·施文克和约书亚·本吉奥，《在神经机器翻译中使用单语语料库》，arXiv:1503.03535\n* 谷歌 [[论文](http:\u002F\u002Fpapers.nips.cc\u002Fpaper\u002F5346-sequence-to-sequence-learning-with-neural-networks.pdf)]\n  * 伊利亚·苏茨克维尔、奥里奥尔·维尼亚尔斯和郭文乐，《基于神经网络的序列到序列学习》，arXiv:1409.3215 \u002F NIPS 2014\n* 谷歌 + 纽约大学 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1410.8206)]\n  * 明-汤·隆、伊利亚·苏茨克维尔、郭文乐、奥里奥尔·维尼亚尔斯和沃伊切赫·扎伦巴，《解决神经机器翻译中的罕见词问题》，arXiv:1410.8206 \u002F ACL 2015\n* ICT + 华为 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.06442.pdf)]\n  * 孟凡东、陆正东、涂兆鹏、李航和刘群，《一种基于深度记忆的序列到序列学习架构》，arXiv:1506.06442\n* 斯坦福大学 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1508.04025.pdf)]\n  * 明-汤·隆、辉·范和克里斯托弗·D·曼宁，《基于注意力的有效神经机器翻译方法》，arXiv:1508.04025\n* 中东技术大学 + 纽约大学 + 蒙特利尔大学 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1601.01073.pdf)]\n  * 奥尔汗·菲拉特、邱炯炫和约书亚·本吉奥，《具有共享注意力机制的多路多语种神经机器翻译》，arXiv:1601.01073\n\n#### 对话建模\n* 尚立峰、陆正东和李航，《用于短文本对话的神经响应机器》，arXiv:1503.02364 \u002F ACL 2015 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1503.02364)]\n* 奥里奥尔·维尼亚尔斯和郭文乐，《一种神经对话模型》，arXiv:1506.05869 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.05869)]\n* 瑞安·洛厄、尼桑·鲍、尤利安·V·塞尔班和乔埃尔·派诺，《Ubuntu对话语料库：用于非结构化多轮对话系统研究的大规模数据集》，arXiv:1506.08909 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.08909)]\n* 杰西·道奇、安德烈娅·加内、张翔、安托万·博尔德斯、苏米特·乔普拉、亚历山大·米勒、阿瑟·斯拉姆和杰森·韦斯顿，《评估端到端对话系统学习的前提条件》，arXiv:1511.06931 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.06931)]\n* 杰森·韦斯顿，《基于对话的语言学习》，arXiv:1604.06045，[[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1604.06045)]\n* 安托万·博尔德斯和杰森·韦斯顿，《学习端到端的目标导向对话》，arXiv:1605.07683 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1605.07683)]\n\n#### 问答\n* FAIR\n  * 杰森·韦斯顿、安托万·博尔德斯、苏米特·乔普拉、托马斯·米科洛夫和亚历山大·M·拉什，《迈向人工智能完备的问答：一组前提玩具任务》，arXiv:1502.05698 [[网页](https:\u002F\u002Fresearch.facebook.com\u002Fresearchers\u002F1543934539189348)] [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1502.05698.pdf)]\n  * 安托万·博尔德斯、尼古拉斯·乌苏涅尔、苏米特·乔普拉和杰森·韦斯顿，《使用记忆网络进行简单问答》，arXiv:1506.02075 [[论文](http:\u002F\u002Farxiv.org\u002Fabs\u002F1506.02075)]\n  * 费利克斯·希尔、安托万·博尔德斯、苏米特·乔普拉、杰森·韦斯顿，《金发姑娘原则：用显式记忆表示阅读儿童书籍》，ICLR 2016 [[论文](http:\u002F\u002Farxiv.org\u002Fabs\u002F1511.02301)]\n* DeepMind + 牛津大学 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.03340.pdf)]\n  * 卡尔·M·赫尔曼、托马斯·科奇斯基、爱德华·格雷芬斯特特、拉斯·埃斯佩霍尔特、威尔·凯、穆斯塔法·苏莱曼和菲尔·布伦索姆，《教机器阅读和理解》，arXiv:1506.03340 \u002F NIPS 2015\n* MetaMind [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.07285.pdf)]\n  * 安基特·库马尔、欧赞·伊尔索伊、乔纳森·苏、詹姆斯·布拉德伯里、罗伯特·英格利什、布莱恩·皮尔斯、彼得·翁德鲁斯卡、莫希特·艾耶尔、伊尚·古尔拉贾尼和理查德·索彻，《问我任何问题：用于自然语言处理的动态记忆网络》，arXiv:1506.07285\n\n### 计算机视觉\n\n#### 物体识别\n* Pedro Pinheiro 和 Ronan Collobert，《用于场景标注的循环卷积神经网络》，ICML 2014 [[论文](http:\u002F\u002Fjmlr.org\u002Fproceedings\u002Fpapers\u002Fv32\u002Fpinheiro14.pdf)]\n* Ming Liang 和 Xiaolin Hu，《用于物体识别的循环卷积神经网络》，CVPR 2015 [[论文](http:\u002F\u002Fwww.cv-foundation.org\u002Fopenaccess\u002Fcontent_cvpr_2015\u002Fpapers\u002FLiang_Recurrent_Convolutional_Neural_2015_CVPR_paper.pdf)]\n* Wonmin Byeon、Thomas Breuel、Federico Raue1 和 Marcus Liwicki1，《使用 LSTM 循环神经网络进行场景标注》，CVPR 2015 [[论文](http:\u002F\u002Fwww.cv-foundation.org\u002Fopenaccess\u002Fcontent_cvpr_2015\u002Fpapers\u002FByeon_Scene_Labeling_With_2015_CVPR_paper.pdf)]\n* Mircea Serban Pavel、Hannes Schulz 和 Sven Behnke，《用于 RGB-D 视频中物体类别分割的循环卷积神经网络》，IJCNN 2015 [[论文](http:\u002F\u002Fwww.ais.uni-bonn.de\u002Fpapers\u002FIJCNN_2015_Pavel.pdf)]\n* Shuai Zheng、Sadeep Jayasumana、Bernardino Romera-Paredes、Vibhav Vineet、Zhizhong Su、Dalong Du、Chang Huang 和 Philip H. S. Torr，《条件随机场作为循环神经网络》，arXiv:1502.03240 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1502.03240)]\n* Xiaodan Liang、Xiaohui Shen、Donglai Xiang、Jiashi Feng、Liang Lin 和 Shuicheng Yan，《基于局部-全局长短时记忆网络的语义对象解析》，arXiv:1511.04510 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.04510.pdf)]\n* Sean Bell、C. Lawrence Zitnick、Kavita Bala 和 Ross Girshick，《内外网：利用跳跃池化和循环神经网络在上下文中检测目标》，arXiv:1512.04143 \u002F ICCV 2015 研讨会 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1512.04143)]\n\n#### 视觉跟踪\n* Quan Gan、Qipeng Guo、Zheng Zhang 和 Kyunghyun Cho，《迈向无模型、匿名物体跟踪的第一步：使用循环神经网络》，arXiv:1511.06425 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.06425)]\n\n\n#### 图像生成\n* Karol Gregor、Ivo Danihelka、Alex Graves、Danilo J. Rezende 和 Daan Wierstra，《DRAW：一种用于图像生成的循环神经网络》，ICML 2015 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1502.04623)]\n* Angeliki Lazaridou、Dat T. Nguyen、R. Bernardi 和 M. Baroni，《揭开词嵌入之梦：迈向语言驱动的图像生成》，arXiv:1506.03500 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.03500)]\n* Lucas Theis 和 Matthias Bethge，《使用空间 LSTM 进行生成式图像建模》，arXiv:1506.03478 \u002F NIPS 2015 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.03478)]\n* Aaron van den Oord、Nal Kalchbrenner 和 Koray Kavukcuoglu，《像素递归神经网络》，arXiv:1601.06759 [[论文](http:\u002F\u002Farxiv.org\u002Fabs\u002F1601.06759)]\n\n#### 视频分析\n\n* 多伦多大学 [[论文](http:\u002F\u002Farxiv.org\u002Fabs\u002F1502.04681)]\n  * Nitish Srivastava、Elman Mansimov、Ruslan Salakhutdinov，《使用 LSTM 进行视频表示的无监督学习》，arXiv:1502.04681 \u002F ICML 2015\n* 剑桥大学 [[论文](http:\u002F\u002Farxiv.org\u002Fabs\u002F1511.06309)]\n  * Viorica Patraucean、Ankur Handa、Roberto Cipolla，《具有可微记忆的时空视频自编码器》，arXiv:1511.06309\n\n\n\n### 多模态（计算机视觉 + 自然语言处理）\n\n#### 图像字幕生成\n* UCLA + 百度 [[网页](http:\u002F\u002Fwww.stat.ucla.edu\u002F~junhua.mao\u002Fm-RNN.html)] [[论文-arXiv1](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1410.1090)], [[论文-arXiv2](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1412.6632)]\n  * Junhua Mao, Wei Xu, Yi Yang, Jiang Wang, 和 Alan L. Yuille，《用多模态循环神经网络解释图像》，arXiv:1410.1090\n  * Junhua Mao, Wei Xu, Yi Yang, Jiang Wang, Zhiheng Huang, 和 Alan L. Yuille，《基于多模态循环神经网络（m-RNN）的深度字幕生成》，arXiv:1412.6632 \u002F ICLR 2015\n* 多伦多大学 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1411.2539)] [[网页演示](http:\u002F\u002Fdeeplearning.cs.toronto.edu\u002Fi2t)]\n  * Ryan Kiros, Ruslan Salakhutdinov, 和 Richard S. Zemel，《用多模态神经语言模型统一视觉-语义嵌入》，arXiv:1411.2539 \u002F TACL 2015\n* 伯克利 [[网页](http:\u002F\u002Fjeffdonahue.com\u002Flrcn\u002F)] [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1411.4389)]\n  * Jeff Donahue, Lisa Anne Hendricks, Sergio Guadarrama, Marcus Rohrbach, Subhashini Venugopalan, Kate Saenko, 和 Trevor Darrell，《用于视觉识别和描述的长期循环卷积网络》，arXiv:1411.4389 \u002F CVPR 2015\n* 谷歌 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1411.4555)]\n  * Oriol Vinyals, Alexander Toshev, Samy Bengio, 和 Dumitru Erhan，《展示与讲述：一种神经图像字幕生成器》，arXiv:1411.4555 \u002F CVPR 2015\n* 斯坦福 [[网页]](http:\u002F\u002Fcs.stanford.edu\u002Fpeople\u002Fkarpathy\u002Fdeepimagesent\u002F) [[论文]](http:\u002F\u002Fcs.stanford.edu\u002Fpeople\u002Fkarpathy\u002Fcvpr2015.pdf)\n  * Andrej Karpathy 和 Li Fei-Fei，《用于生成图像描述的深度视觉-语义对齐》，CVPR 2015\n* 微软 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1411.4952)]\n  * Hao Fang, Saurabh Gupta, Forrest Iandola, Rupesh Srivastava, Li Deng, Piotr Dollar, Jianfeng Gao, Xiaodong He, Margaret Mitchell, John C. Platt, Lawrence Zitnick, 和 Geoffrey Zweig，《从字幕到视觉概念再返回》，arXiv:1411.4952 \u002F CVPR 2015\n* CMU + 微软 [[论文-arXiv](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1411.5654)], [[论文-CVPR](http:\u002F\u002Fwww.cs.cmu.edu\u002F~xinleic\u002Fpapers\u002Fcvpr15_rnn.pdf)]\n  * Xinlei Chen, 和 C. Lawrence Zitnick，《学习用于图像字幕生成的循环视觉表示》\n  * Xinlei Chen, 和 C. Lawrence Zitnick，《心灵之眼：用于图像字幕生成的循环视觉表示》，CVPR 2015\n* 蒙特利尔大学 + 多伦多大学 [[网页](http:\u002F\u002Fkelvinxu.github.io\u002Fprojects\u002Fcapgen.html)] [[论文](http:\u002F\u002Fwww.cs.toronto.edu\u002F~zemel\u002Fdocuments\u002FcaptionAttn.pdf)]\n  * Kelvin Xu, Jimmy Lei Ba, Ryan Kiros, Kyunghyun Cho, Aaron Courville, Ruslan Salakhutdinov, Richard S. Zemel, 和 Yoshua Bengio，《展示、注意与讲述：基于视觉注意力的神经图像字幕生成》，arXiv:1502.03044 \u002F ICML 2015\n* Idiap + EPFL + Facebook [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1502.03671)]\n  * Remi Lebret, Pedro O. Pinheiro, 和 Ronan Collobert，《基于短语的图像字幕生成》，arXiv:1502.03671 \u002F ICML 2015\n* UCLA + 百度 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1504.06692)]\n  * Junhua Mao, Wei Xu, Yi Yang, Jiang Wang, Zhiheng Huang, 和 Alan L. Yuille，《像孩子一样学习：从图像的句子描述中快速学习新的视觉概念》，arXiv:1504.06692\n* MS + 伯克利\n  * Jacob Devlin, Saurabh Gupta, Ross Girshick, Margaret Mitchell, 和 C. Lawrence Zitnick，《探索最近邻方法用于图像字幕生成》，arXiv:1505.04467（注：严格来说并非RNN）[[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1505.04467.pdf)]\n  * Jacob Devlin, Hao Cheng, Hao Fang, Saurabh Gupta, Li Deng, Xiaodong He, Geoffrey Zweig, 和 Margaret Mitchell，《用于图像字幕生成的语言模型：特点及有效方法》，arXiv:1505.01809 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1505.01809.pdf)]\n* 阿德莱德 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.01144.pdf)]\n  * Qi Wu, Chunhua Shen, Anton van den Hengel, Lingqiao Liu, 和 Anthony Dick，《使用中间属性层进行图像字幕生成》，arXiv:1506.01144\n* 提尔堡 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.03694.pdf)]\n  * Grzegorz Chrupala, Akos Kadar, 和 Afra Alishahi，《通过图片学习语言》，arXiv:1506.03694\n* 蒙特利尔大学 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1507.01053.pdf)]\n  * Kyunghyun Cho, Aaron Courville, 和 Yoshua Bengio，《使用基于注意力的编码器-解码器网络描述多媒体内容》，arXiv:1507.01053\n* 康奈尔 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1508.02091.pdf)]\n  * Jack Hessel, Nicolas Savva, 和 Michael J. Wilber，《神经图像字幕生成中的图像表示与新领域》，arXiv:1508.02091\n\n\n#### 视频字幕生成\n* 伯克利 [[网页](http:\u002F\u002Fjeffdonahue.com\u002Flrcn\u002F)] [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1411.4389)]\n  * Jeff Donahue, Lisa Anne Hendricks, Sergio Guadarrama, Marcus Rohrbach, Subhashini Venugopalan, Kate Saenko, 和 Trevor Darrell，《用于视觉识别和描述的长期循环卷积网络》，arXiv:1411.4389 \u002F CVPR 2015\n* UT Austin + UML + 伯克利 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1412.4729)]\n  * Subhashini Venugopalan, Huijuan Xu, Jeff Donahue, Marcus Rohrbach, Raymond Mooney, 和 Kate Saenko，《使用深度循环神经网络将视频翻译成自然语言》，arXiv:1412.4729\n* 微软 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1505.01861)]\n  * Yingwei Pan, Tao Mei, Ting Yao, Houqiang Li, 和 Yong Rui，《联合建模嵌入与翻译以连接视频和语言》，arXiv:1505.01861\n* UT Austin + 伯克利 + UML [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1505.00487)]\n  * Subhashini Venugopalan, Marcus Rohrbach, Jeff Donahue, Raymond Mooney, Trevor Darrell, 和 Kate Saenko，《序列到序列——视频到文本》，arXiv:1505.00487\n* 蒙特利尔大学 + 舍布鲁克大学 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1502.08029.pdf)]\n  * Li Yao, Atousa Torabi, Kyunghyun Cho, Nicolas Ballas, Christopher Pal, Hugo Larochelle, 和 Aaron Courville，《利用时间结构描述视频》，arXiv:1502.08029\n* MPI + 伯克利 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.01698.pdf)]\n  * Anna Rohrbach, Marcus Rohrbach, 和 Bernt Schiele，《电影描述的长短期故事》，arXiv:1506.01698\n* 多伦多大学 + MIT [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.06724.pdf)]\n  * Yukun Zhu, Ryan Kiros, Richard Zemel, Ruslan Salakhutdinov, Raquel Urtasun, Antonio Torralba, 和 Sanja Fidler，《对齐书籍与电影：通过观看电影和阅读书籍实现类似故事的视觉解释》，arXiv:1506.06724\n* 蒙特利尔大学 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1507.01053.pdf)]\n  * Kyunghyun Cho, Aaron Courville, 和 Yoshua Bengio，《使用基于注意力的编码器-解码器网络描述多媒体内容》，arXiv:1507.01053\n* 浙江大学 + UTS [[论文](http:\u002F\u002Farxiv.org\u002Fabs\u002F1511.03476)]\n  * Pingbo Pan, Zhongwen Xu, Yi Yang, Fei Wu, Yueting Zhuang，《用于视频表示并应用于字幕生成的层次化循环神经编码器》，arXiv:1511.03476\n* 蒙特利尔大学 + NYU + IBM [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.04590.pdf)]\n  * Li Yao, Nicolas Ballas, Kyunghyun Cho, John R. Smith, 和 Yoshua Bengio，《图像和视频字幕生成的经验性能上限》，arXiv:1511.04590\n\n\n#### 视觉问答\n\n* 弗吉尼亚理工大学 + 微软研究院 [[网页](http:\u002F\u002Fwww.visualqa.org\u002F)] [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1505.00468)]\n  * Stanislaw Antol、Aishwarya Agrawal、Jiasen Lu、Margaret Mitchell、Dhruv Batra、C. Lawrence Zitnick 和 Devi Parikh，《VQA：视觉问答》，arXiv:1505.00468 \u002F CVPR 2015 SUNw：场景理解研讨会\n* 马普学会 + 伯克利 [[网页](https:\u002F\u002Fwww.mpi-inf.mpg.de\u002Fdepartments\u002Fcomputer-vision-and-multimodal-computing\u002Fresearch\u002Fvision-and-language\u002Fvisual-turing-challenge\u002F)] [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1505.01121)]\n  * Mateusz Malinowski、Marcus Rohrbach 和 Mario Fritz，《问问你的神经元：一种基于神经网络的图像问题解答方法》，arXiv:1505.01121\n* 多伦多大学 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1505.02074)] [[数据集](http:\u002F\u002Fwww.cs.toronto.edu\u002F~mren\u002Fimageqa\u002Fdata\u002Fcocoqa\u002F)]\n  * Mengye Ren、Ryan Kiros 和 Richard Zemel，《探索用于图像问答的模型和数据》，arXiv:1505.02074 \u002F ICML 2015 深度学习研讨会\n* 百度 + UCLA [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1505.05612)] [[数据集]()]\n  * Hauyuan Gao、Junhua Mao、Jie Zhou、Zhiheng Huang、Lei Wang 和 Wei Xu，《你在和机器对话吗？多语言图像问答的数据集与方法》，arXiv:1505.05612 \u002F NIPS 2015\n* 首尔国立大学 + NAVER [[论文](http:\u002F\u002Farxiv.org\u002Fabs\u002F1606.01455)]\n  * Jin-Hwa Kim、Sang-Woo Lee、Dong-Hyun Kwak、Min-Oh Heo、Jeonghee Kim、Jung-Woo Ha、Byoung-Tak Zhang，《用于视觉问答的多模态残差学习》，arXiv:1606.01455\n* 加州大学伯克利分校 + 索尼 [[论文](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1606.01847)]\n  * Akira Fukui、Dong Huk Park、Daylen Yang、Anna Rohrbach、Trevor Darrell 和 Marcus Rohrbach，《用于视觉问答和视觉定位的多模态紧凑双线性池化》，arXiv:1606.01847\n* 浦项工科大学 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1606.03647.pdf)]\n  * Hyeonwoo Noh 和 Bohyung Han，《通过联合损失最小化训练递归回答单元以用于VQA》，arXiv:1606.03647\n* 首尔国立大学 + NAVER [[论文](http:\u002F\u002Farxiv.org\u002Fabs\u002F1610.04325)]\n  * Jin-Hwa Kim、Kyoung Woon On、Jeonghee Kim、Jung-Woo Ha、Byoung-Tak Zhang，《低秩双线性池化的哈达玛积》，arXiv:1610.04325\n* 视频问答\n  * 卡内基梅隆大学 + UTS [[论文](http:\u002F\u002Farxiv.org\u002Fabs\u002F1511.04670)]\n    * Linchao Zhu、Zhongwen Xu、Yi Yang、Alexander G. Hauptmann，《揭示视频问答中的时间上下文》，arXiv:1511.04670\n  * KIT + MIT + 多伦多大学 [[论文](http:\u002F\u002Farxiv.org\u002Fabs\u002F1512.02902)] [[数据集](http:\u002F\u002Fmovieqa.cs.toronto.edu\u002Fhome\u002F)]\n    * Makarand Tapaswi、Yukun Zhu、Rainer Stiefelhagen、Antonio Torralba、Raquel Urtasun、Sanja Fidler，《MovieQA：通过问答理解电影故事》，arXiv:1512.02902\n\n\n#### 图灵机\n* A.Graves、G. Wayne 和 I. Danihelka，《神经图灵机》，arXiv 预印本 arXiv:1410.5401 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1410.5401)]\n* Jason Weston、Sumit Chopra、Antoine Bordes，《记忆网络》，arXiv:1410.3916 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1410.3916)]\n* Armand Joulin 和 Tomas Mikolov，《利用栈增强的循环网络推断算法模式》，arXiv:1503.01007 \u002F NIPS 2015 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1503.01007)]\n* Sainbayar Sukhbaatar、Arthur Szlam、Jason Weston 和 Rob Fergus，《端到端记忆网络》，arXiv:1503.08895 \u002F NIPS 2015 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1503.08895)]\n* Wojciech Zaremba 和 Ilya Sutskever，《强化学习神经图灵机》，arXiv:1505.00521 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1505.00521)]\n* Baolin Peng 和 Kaisheng Yao，《具有外部记忆的循环神经网络用于语言理解》，arXiv:1506.00195 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.00195.pdf)]\n* Fandong Meng、Zhengdong Lu、Zhaopeng Tu、Hang Li 和 Qun Liu，《基于深度记忆的序列到序列学习架构》，arXiv:1506.06442 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.06442.pdf)]\n* Arvind Neelakantan、Quoc V. Le 和 Ilya Sutskever，《神经程序员：利用梯度下降诱导潜在程序》，arXiv:1511.04834 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.04834.pdf)]\n* Scott Reed 和 Nando de Freitas，《神经程序员-解释器》，arXiv:1511.06279 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.06279.pdf)]\n* Karol Kurach、Marcin Andrychowicz 和 Ilya Sutskever，《神经随机存取机器》，arXiv:1511.06392 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.06392.pdf)]\n* Łukasz Kaiser 和 Ilya Sutskever，《神经GPU学习算法》，arXiv:1511.08228 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.08228.pdf)]\n* Ethan Caballero，《跳过式记忆网络》，arXiv:1511.6420 [[论文](https:\u002F\u002Fpdfs.semanticscholar.org\u002F6b9f\u002F0d695df0ce01d005eb5aa69386cb5fbac62a.pdf)]\n* Wojciech Zaremba、Tomas Mikolov、Armand Joulin 和 Rob Fergus，《从示例中学习简单算法》，arXiv:1511.07275 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.07275.pdf)]\n\n\n\n### 机器人学\n\n* Hongyuan Mei、Mohit Bansal 和 Matthew R. Walter，《听、注意并行走：将导航指令映射为动作序列的神经网络》，arXiv:1506.04089 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.04089.pdf)]\n* Marvin Zhang、Sergey Levine、Zoe McCarthy、Chelsea Finn 和 Pieter Abbeel，《针对部分可观测机器人控制的连续记忆状态策略学习》，arXiv:1507.01273。[[论文]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1507.01273)\n\n### 其他\n* Alex Graves，《使用循环神经网络生成序列》，arXiv:1308.0850 [[论文]](http:\u002F\u002Farxiv.org\u002Fabs\u002F1308.0850)\n* Volodymyr Mnih、Nicolas Heess、Alex Graves 和 Koray Kavukcuoglu，《视觉注意力的循环模型》，NIPS 2014 \u002F arXiv:1406.6247 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1406.6247.pdf)]\n* Wojciech Zaremba 和 Ilya Sutskever，《学习执行》，arXiv:1410.4615 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1410.4615.pdf)] [[代码](https:\u002F\u002Fgithub.com\u002Fwojciechz\u002Flearning_to_execute)]\n* Samy Bengio、Oriol Vinyals、Navdeep Jaitly 和 Noam Shazeer，《基于循环神经网络的序列预测中的调度采样》，arXiv:1506.03099 \u002F NIPS 2015 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1506.03099)]\n* Bing Shuai、Zhen Zuo、Gang Wang 和 Bing Wang，《用于场景标注的DAG-循环神经网络》，arXiv:1509.00552 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1509.00552)]\n* Soren Kaae Sonderby、Casper Kaae Sonderby、Lars Maaloe 和 Ole Winther，《循环空间变换网络》，arXiv:1509.05329 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1509.05329)]\n* Cesar Laurent、Gabriel Pereyra、Philemon Brakel、Ying Zhang 和 Yoshua Bengio，《批归一化循环神经网络》，arXiv:1510.01378 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1510.01378)]\n* Jiwon Kim、Jung Kwon Lee、Kyoung Mu Lee，《用于图像超分辨率的深度递归卷积网络》，arXiv:1511.04491 [[论文]](http:\u002F\u002Farxiv.org\u002Fabs\u002F1511.04491)\n* Quan Gan、Qipeng Guo、Zheng Zhang 和 Kyunghyun Cho，《迈向无模型匿名目标跟踪的第一步：基于循环神经网络的方法》，arXiv:1511.06425 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.06425.pdf)]\n* Francesco Visin、Kyle Kastner、Aaron Courville、Yoshua Bengio、Matteo Matteucci 和 Kyunghyun Cho，《ReSeg：一种用于目标分割的循环神经网络》，arXiv:1511.07053 [[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.07053.pdf)]\n* Juergen Schmidhuber，《关于学习思考：强化学习控制器与循环世界模型的新组合的算法信息论》，arXiv:1511.09249 [[论文]](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1511.09249)\n\n## 数据集\n* 语音识别\n  * [OpenSLR](http:\u002F\u002Fwww.openslr.org\u002Fresources.php)（开放语音与语言资源）\n    * [LibriSpeech ASR语料库](http:\u002F\u002Fwww.openslr.org\u002F12\u002F)\n  * [VoxForge](http:\u002F\u002Fvoxforge.org\u002Fhome)\n* 图像字幕生成\n  * [Flickr 8k](http:\u002F\u002Fnlp.cs.illinois.edu\u002FHockenmaierGroup\u002FFraming_Image_Description\u002FKCCA.html)\n  * [Flickr 30k](http:\u002F\u002Fshannon.cs.illinois.edu\u002FDenotationGraph\u002F)\n  * [Microsoft COCO](http:\u002F\u002Fmscoco.org\u002Fhome\u002F)\n* 问答\n  * [bAbI项目](http:\u002F\u002Ffb.ai\u002Fbabi)——由Facebook AI Research提供的文本理解与推理数据集。包含：\n    * (20)个QA bAbI任务——[[论文](http:\u002F\u002Farxiv.org\u002Fabs\u002F1502.05698)]\n    * (6)个对话bAbI任务——[[论文](http:\u002F\u002Farxiv.org\u002Fabs\u002F1605.07683)]\n    * 儿童读物测试——[[论文](http:\u002F\u002Farxiv.org\u002Fabs\u002F1511.02301)]\n    * 电影对话数据集——[[论文](http:\u002F\u002Farxiv.org\u002Fabs\u002F1511.06931)]\n    * MovieQA数据集——[[数据](http:\u002F\u002Fwww.thespermwhale.com\u002Fjaseweston\u002Fbabi\u002Fmovie_dialog_dataset.tgz)]\n    * 对话式语言学习数据集——[[论文](http:\u002F\u002Farxiv.org\u002Fabs\u002F1604.06045)]\n    * SimpleQuestions数据集——[[论文](http:\u002F\u002Farxiv.org\u002Fabs\u002F1506.02075)]\n  * [SQuAD](https:\u002F\u002Fstanford-qa.com\u002F)——斯坦福问答数据集：[[论文](http:\u002F\u002Farxiv.org\u002Fpdf\u002F1606.05250)]\n* 图像问答\n  * [DAQUAR](https:\u002F\u002Fwww.mpi-inf.mpg.de\u002Fdepartments\u002Fcomputer-vision-and-multimodal-computing\u002Fresearch\u002Fvision-and-language\u002Fvisual-turing-challenge\u002F)——基于N. Silberman等人构建的[NYU Depth v2](http:\u002F\u002Fcs.nyu.edu\u002F~silberman\u002Fdatasets\u002Fnyu_depth_v2.html)\n  * [VQA](http:\u002F\u002Fwww.visualqa.org\u002F)——基于[MSCOCO](http:\u002F\u002Fmscoco.org\u002F)图像\n  * [Image QA](http:\u002F\u002Fwww.cs.toronto.edu\u002F~mren\u002Fimageqa\u002Fdata\u002Fcocoqa\u002F)——基于MSCOCO图像\n  * [多语言图像问答](http:\u002F\u002Fidl.baidu.com\u002FFM-IQA.html)——由百度从零开始构建，中文版并附英文翻译\n* 行动识别\n  * [THUMOS](http:\u002F\u002Fwww.thumos.info\u002Fhome.html)：大规模行动识别数据集\n  * [MultiTHUMOS](http:\u002F\u002Fai.stanford.edu\u002F~syyeung\u002Fresources\u002Fmultithumos.zip)：对THUMOS '14行动检测数据集的扩展，带有密集的多标签标注\n\n## 博客\n* [RNN的不合理有效性](http:\u002F\u002Fkarpathy.github.io\u002F2015\u002F05\u002F21\u002Frnn-effectiveness\u002F)，作者[Andrej Karpathy](http:\u002F\u002Fcs.stanford.edu\u002Fpeople\u002Fkarpathy\u002F)\n* [理解LSTM网络](http:\u002F\u002Fcolah.github.io\u002Fposts\u002F2015-08-Understanding-LSTMs\u002F)，出自[Colah的博客](http:\u002F\u002Fcolah.github.io\u002F)\n* [WildML](http:\u002F\u002Fwww.wildml.com\u002F)博客的RNN教程[[第一部分](http:\u002F\u002Fwww.wildml.com\u002F2015\u002F09\u002Frecurrent-neural-networks-tutorial-part-1-introduction-to-rnns\u002F)]、[[第二部分](http:\u002F\u002Fwww.wildml.com\u002F2015\u002F09\u002Frecurrent-neural-networks-tutorial-part-2-implementing-a-language-model-rnn-with-python-numpy-and-theano\u002F)]、[[第三部分](http:\u002F\u002Fwww.wildml.com\u002F2015\u002F10\u002Frecurrent-neural-networks-tutorial-part-3-backpropagation-through-time-and-vanishing-gradients\u002F)]、[[第四部分](http:\u002F\u002Fwww.wildml.com\u002F2015\u002F10\u002Frecurrent-neural-network-tutorial-part-4-implementing-a-grulstm-rnn-with-python-and-theano\u002F)]\n* [TensorFlow中的RNN：实用指南与未文档化的特性](http:\u002F\u002Fwww.wildml.com\u002F2016\u002F08\u002Frnns-in-tensorflow-a-practical-guide-and-undocumented-features\u002F)\n* [优化RNN性能](https:\u002F\u002Fsvail.github.io\u002F)，来自百度硅谷人工智能实验室。\n* [使用RNN进行字符级语言建模](http:\u002F\u002Fnbviewer.jupyter.org\u002Fgist\u002Fyoavg\u002Fd76121dfde2618422139)，作者Yoav Goldberg\n* [用Python实现RNN](http:\u002F\u002Fpeterroelants.github.io\u002Fposts\u002Frnn_implementation_part01\u002F)。\n* [LSTM反向传播](http:\u002F\u002Farunmallya.github.io\u002Fwriteups\u002Fnn\u002Flstm\u002Findex.html#\u002F)\n* [TensorFlow中循环网络导论](https:\u002F\u002Fdanijar.com\u002Fintroduction-to-recurrent-networks-in-tensorflow\u002F)，作者Danijar Hafner\n* [TensorFlow中的可变序列长度](https:\u002F\u002Fdanijar.com\u002Fvariable-sequence-lengths-in-tensorflow\u002F)，作者Danijar Hafner\n* [书面记忆：理解、推导与扩展LSTM](http:\u002F\u002Fr2rt.com\u002Fwritten-memories-understanding-deriving-and-extending-the-lstm.html)，作者Silviu Pitis\n\n## 在线演示\n* Alex Graves的手写生成[[链接](http:\u002F\u002Fwww.cs.toronto.edu\u002F~graves\u002Fhandwriting.html)]\n* Ink Poster：手写便签[[链接](http:\u002F\u002Fwww.inkposter.com\u002F?)]\n* LSTMVis：循环神经网络的可视化分析[[链接](http:\u002F\u002Flstm.seas.harvard.edu\u002F)]","# Awesome-RNN 快速上手指南\n\n`awesome-rnn` 并非一个可直接安装的单一软件库，而是一个**循环神经网络（RNN）资源精选列表**。它汇集了理论教程、经典论文、开源代码实现（涵盖 TensorFlow, PyTorch, Theano 等框架）以及数据集。\n\n本指南将指导你如何利用该列表中的资源，快速搭建环境并运行一个基础的 RNN 示例。\n\n## 环境准备\n\n由于列表中包含了多种框架的实现，建议根据你的开发偏好选择其一。目前最主流且对国内开发者友好的选择是 **PyTorch** 或 **TensorFlow**。\n\n### 系统要求\n- **操作系统**: Linux (推荐 Ubuntu), macOS, 或 Windows (需 WSL2)\n- **硬件**: 建议使用支持 CUDA 的 NVIDIA GPU 以加速训练（非必须，CPU 亦可运行小型示例）\n- **Python**: 3.8 或更高版本\n\n### 前置依赖\n确保已安装 `git` 和 `pip`。\n\n```bash\n# 检查 Python 版本\npython --version\n\n# 检查 git\ngit --version\n```\n\n## 安装步骤\n\n以下以使用 **PyTorch** 运行列表中推荐的 `word_language_model` 为例。\n\n### 1. 配置国内镜像源（加速下载）\n为避免网络延迟，建议临时或永久配置清华\u002F阿里镜像源。\n\n```bash\n# 临时使用清华源安装（推荐）\npip install torch torchvision torchaudio --index-url https:\u002F\u002Fdownload.pytorch.org\u002Fwhl\u002Fcu118\n# 若无 GPU，使用 CPU 版本：\n# pip install torch torchvision torchaudio --index-url https:\u002F\u002Fdownload.pytorch.org\u002Fwhl\u002Fcpu\n```\n\n### 2. 克隆官方示例代码\n`awesome-rnn` 的 Codes 部分直接链接到了 PyTorch 官方仓库中的 RNN 示例。\n\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Fpytorch\u002Fexamples.git\ncd examples\u002Fword_language_model\n```\n\n### 3. 安装项目依赖\n进入目录后安装所需的 Python 包。\n\n```bash\npip install -r requirements.txt\n```\n\n## 基本使用\n\n本示例将训练一个基于 RNN\u002FLSTM\u002FGRU 的词级语言模型，使用的数据集为 Wikitext-2。\n\n### 1. 查看帮助信息\n了解可用的参数（如模型类型、隐藏层大小、Epoch 数等）。\n\n```bash\npython main.py --help\n```\n\n### 2. 运行训练脚本\n使用默认参数（LSTM 模型）开始训练。程序会自动下载数据集。\n\n```bash\npython main.py\n```\n\n*注：首次运行会下载 `data.pt` 数据集文件，若网络受阻可手动下载后放入 `data` 目录。*\n\n### 3. 自定义运行（可选）\n你可以尝试切换为 GRU 模型，并减少 Epoch 数以快速测试流程：\n\n```bash\npython main.py --model GRU --epochs 5\n```\n\n### 4. 结果验证\n训练完成后，脚本会自动在测试集上评估困惑度（Perplexity）。你也可以使用训练好的模型生成文本：\n\n```bash\n# 使用保存的最佳模型生成文本\npython main.py --eval --checkpoint model.pt\n```\n\n---\n**更多资源探索**：\n访问 [awesome-rnn 原始仓库](https:\u002F\u002Fgithub.com\u002Fkjw0612\u002Fawesome-rnn) 查阅“理论”、“应用”及“其他框架（如 TensorFlow, Theano）”部分的详细链接，以深入钻研特定领域的 RNN 应用。","某初创公司的算法工程师团队正致力于开发一款基于循环神经网络（RNN）的金融舆情情感分析系统，急需从理论选型到代码落地的全套解决方案。\n\n### 没有 awesome-rnn 时\n- **资源检索低效**：团队成员需在 Google、GitHub 和各类论文库中盲目搜索，花费数天时间筛选过时的教程或质量参差不齐的代码库。\n- **架构选型困难**：面对 LSTM、GRU 等多种变体，缺乏系统的对比综述和权威理论指引，导致技术选型依赖个人经验，存在试错风险。\n- **复现成本高昂**：找不到与金融文本处理匹配的开源基准代码（如 TensorFlow 或 Theano 实现），从零编写数据预处理和模型训练脚本耗时费力。\n- **应用场景迷茫**：难以快速定位 NLP 领域外是否有可借鉴的多模态方案（如结合新闻图表分析），限制了产品功能的创新边界。\n\n### 使用 awesome-rnn 后\n- **一站式的资源聚合**：直接获取按“代码”、“理论”、“应用”分类的精良清单，半小时内即可锁定适合情感分析的高星 GitHub 项目和经典论文。\n- **清晰的演进路径**：通过\"Architecture Variants\"和\"Surveys\"章节，迅速理解不同 RNN 变体的优劣，科学选定最适合短文本情感分类的模型结构。\n- **开箱即用的代码基线**：利用列表中提供的 `char-rnn-tensorflow` 或 Keras 示例代码，快速搭建原型并迁移至自有数据集，将开发周期从周缩短至天。\n- **跨域灵感激发**：参考\"Multimodal\"和\"Conversation Modeling\"板块的案例，成功引入对话上下文机制，显著提升了舆情判断的准确度。\n\nawesome-rnn 通过将分散的深度学习资源结构化，帮助团队消除了信息不对称，实现了从理论调研到工程落地的高效闭环。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkjw0612_awesome-rnn_34a8246d.png","kjw0612","Jiwon Kim","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fkjw0612_a0217bea.png","AI Researcher","SK Telecom",null,"https:\u002F\u002Fgithub.com\u002Fkjw0612",6207,1434,"2026-04-14T00:29:12",5,"","未说明",{"notes":86,"python":84,"dependencies":87},"该项目是一个资源列表（Awesome List），而非单一的可执行软件工具。它汇总了多种不同的深度学习框架（如 TensorFlow, Theano, PyTorch, Torch 等）下的 RNN 相关代码、教程和论文。因此，具体的运行环境需求取决于用户选择使用的特定子项目或框架。例如，使用 Torch 需要 Lua 环境，使用 DL4J 需要 Java\u002FScala 环境，而其他大多数项目基于 Python。建议根据具体想运行的示例代码去查阅对应子项目的仓库以获取详细的环境配置要求。",[88,89,90,91,92,93,94,95,96],"TensorFlow","Theano","Caffe","Torch (Lua)","PyTorch","DL4J","Keras","Chainer","Numpy",[35,15,98,99,100],"音频","视频","其他","2026-03-27T02:49:30.150509","2026-04-15T06:53:36.278992",[],[105],{"id":106,"version":107,"summary_zh":108,"released_at":109},263681,"v0.1","用于在文档状态良好时对其进行标记的版本。","2015-09-02T18:47:50"]