[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-louisfb01--Best_AI_paper_2020":3,"tool-louisfb01--Best_AI_paper_2020":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",151918,2,"2026-04-12T11:33:05",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",108322,"2026-04-10T11:39:34",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[52,13,15,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":32,"last_commit_at":59,"category_tags":60,"status":17},4721,"markitdown","microsoft\u002Fmarkitdown","MarkItDown 是一款由微软 AutoGen 团队打造的轻量级 Python 工具，专为将各类文件高效转换为 Markdown 格式而设计。它支持 PDF、Word、Excel、PPT、图片（含 OCR）、音频（含语音转录）、HTML 乃至 YouTube 链接等多种格式的解析，能够精准提取文档中的标题、列表、表格和链接等关键结构信息。\n\n在人工智能应用日益普及的今天，大语言模型（LLM）虽擅长处理文本，却难以直接读取复杂的二进制办公文档。MarkItDown 恰好解决了这一痛点，它将非结构化或半结构化的文件转化为模型“原生理解”且 Token 效率极高的 Markdown 格式，成为连接本地文件与 AI 分析 pipeline 的理想桥梁。此外，它还提供了 MCP（模型上下文协议）服务器，可无缝集成到 Claude Desktop 等 LLM 应用中。\n\n这款工具特别适合开发者、数据科学家及 AI 研究人员使用，尤其是那些需要构建文档检索增强生成（RAG）系统、进行批量文本分析或希望让 AI 助手直接“阅读”本地文件的用户。虽然生成的内容也具备一定可读性，但其核心优势在于为机器",93400,"2026-04-06T19:52:38",[52,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":72,"owner_name":73,"owner_avatar_url":74,"owner_bio":75,"owner_company":76,"owner_location":77,"owner_email":78,"owner_twitter":79,"owner_website":80,"owner_url":81,"languages":78,"stars":82,"forks":83,"last_commit_at":84,"license":85,"difficulty_score":86,"env_os":87,"env_gpu":88,"env_ram":88,"env_deps":89,"category_tags":92,"github_topics":93,"view_count":32,"oss_zip_url":78,"oss_zip_packed_at":78,"status":17,"created_at":114,"updated_at":115,"faqs":116,"releases":117},6942,"louisfb01\u002FBest_AI_paper_2020","Best_AI_paper_2020","A curated list of the latest breakthroughs in AI by release date with a clear video explanation, link to a more in-depth article, and code","Best_AI_paper_2020 是一份精心整理的 2020 年度人工智能突破性论文清单，旨在帮助读者高效回顾该领域的重要进展。面对每年海量涌现的学术成果，研究人员和开发者往往难以快速筛选出最具价值的研究。这份资源按发布时间排序，不仅收录了如 YOLOv4、GameGAN 等标志性论文，还为每一项突破提供了清晰的视频解说、深度文章链接以及可用的代码实现（如适用）。\n\n通过整合多媒体讲解与实战代码，Best_AI_paper_2020 极大地降低了理解复杂算法的门槛，让用户不仅能读懂理论，还能快速复现结果。它特别关注了当年关于 AI 伦理、数据偏见以及类脑智能等关键议题，展现了技术发展的多面性。无论是希望紧跟前沿的 AI 研究员、需要寻找灵感的开发者，还是对数据科学感兴趣的学生，都能从中获益。此外，维护者还贴心地提供了计算机视觉专项榜单及实验管理工具集成指南，进一步提升了其实用价值。这是一份兼具广度与深度的年度复习指南，助你在繁忙中不错过任何重要发现。","# 2020: A Year Full of Amazing AI papers- A Review\n## A curated list of the latest breakthroughs in AI by release date with a clear video explanation, link to a more in-depth article, and code\n\nEven with everything that happened in the world this year, we still had the chance to see a lot of amazing research come out. Especially in the field of artificial intelligence. More, many important aspects were highlighted this year, like the ethical aspects, important biases, and much more. Artificial intelligence and our understanding of the human brain and its link to AI is constantly evolving, showing promising applications in the soon future.\n\nHere are the most interesting research papers of the year, in case you missed any of them. In short, it is basically a curated list of the latest breakthroughs in AI and Data Science by release date with a clear video explanation, link to a more in-depth article, and code (if applicable). Enjoy the read!\n\n**The complete reference to each paper is listed at the end of this repository.**\n\nMaintainer - [louisfb01](https:\u002F\u002Fgithub.com\u002Flouisfb01)\n\nSubscribe to my [newsletter](http:\u002F\u002Feepurl.com\u002FhuGLT5) - The latest updates in AI explained every week.\n\n🆕 Check [the 2021](https:\u002F\u002Fgithub.com\u002Flouisfb01\u002Fbest_AI_papers_2021) repo!\n\n*Feel free to message me any great papers I missed to add to this repository on bouchard.lf@gmail.com*\n\n***Tag me on Twitter [@Whats_AI](https:\u002F\u002Ftwitter.com\u002FWhats_AI) or LinkedIn [@Louis (What's AI) Bouchard](https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Fwhats-ai\u002F)  if you share the list!***\n\n### Watch a complete 2020 rewind in 15 minutes\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002FxzZT1ll.png)](https:\u002F\u002Fyoutu.be\u002FDHBclF-8KwE)\n\n---\n\n### If you are interested in computer vision research, here is another great repository for you:\nThe top 10 computer vision papers in 2020 with video demos, articles, code, and paper reference.\n\n[Top 10 Computer Vision Papers 2020](https:\u002F\u002Fgithub.com\u002Flouisfb01\u002FTop-10-Computer-Vision-Papers-2020)\n\n----\n\n👀 **If you'd like to support my work** and use W&B (for free) to track your ML experiments and make your work reproducible or collaborate with a team, you can try it out by following [this guide](https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Flouisfb01\u002Fexamples\u002Fblob\u002Fmaster\u002Fcolabs\u002Fpytorch\u002FSimple_PyTorch_Integration.ipynb)! Since most of the code here is PyTorch-based, we thought that a [QuickStart guide](https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Flouisfb01\u002Fexamples\u002Fblob\u002Fmaster\u002Fcolabs\u002Fpytorch\u002FSimple_PyTorch_Integration.ipynb) for using W&B on PyTorch would be most interesting to share.\n\n👉Follow [this quick guide](https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Flouisfb01\u002Fexamples\u002Fblob\u002Fmaster\u002Fcolabs\u002Fpytorch\u002FSimple_PyTorch_Integration.ipynb), use the same W&B lines in your code or any of the repos below, and have all your experiments automatically tracked in your w&b account! It doesn't take more than 5 minutes to set up and will change your life as it did for me! [Here's a more advanced guide](https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Flouisfb01\u002Fexamples\u002Fblob\u002Fmaster\u002Fcolabs\u002Fpytorch\u002FOrganizing_Hyperparameter_Sweeps_in_PyTorch_with_W%26B.ipynb) for using Hyperparameter Sweeps if interested :)\n\n🙌 Thank you to [Weights & Biases](https:\u002F\u002Fwandb.ai\u002F) for sponsoring this repository and the work I've been doing, and thanks to any of you using this link and trying W&B!\n\n[![Open In Colab](https:\u002F\u002Fcolab.research.google.com\u002Fassets\u002Fcolab-badge.svg)](https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Flouisfb01\u002Fexamples\u002Fblob\u002Fmaster\u002Fcolabs\u002Fpytorch\u002FSimple_PyTorch_Integration.ipynb)\n\n----\n\n## The Full List\n- [YOLOv4: Optimal Speed and Accuracy of Object Detection [1]](#1)\n- [DeepFaceDrawing: Deep Generation of Face Images from Sketches [2]](#2)\n- [Learning to Simulate Dynamic Environments with GameGAN [3]](#3)\n- [PULSE: Self-Supervised Photo Upsampling via Latent Space Exploration of Generative Models [4]](#4)\n- [Unsupervised Translation of Programming Languages [5]](#5)\n- [PIFuHD: Multi-Level Pixel-Aligned Implicit Function for High-Resolution 3D Human Digitization [6]](#6)\n- [High-Resolution Neural Face Swapping for Visual Effects [7]](#7)\n- [Swapping Autoencoder for Deep Image Manipulation [8]](#8)\n- [GPT-3: Language Models are Few-Shot Learners [9]](#9)\n- [Learning Joint Spatial-Temporal Transformations for Video Inpainting [10]](#10)\n- [Image GPT - Generative Pretraining from Pixels [11]](#11)\n- [Learning to Cartoonize Using White-box Cartoon Representations [12]](#12)\n- [FreezeG: Freeze the Discriminator: a Simple Baseline for Fine-Tuning GANs [13]](#13)\n- [Neural Re-Rendering of Humans from a Single Image [14]](#14)\n- [I2L-MeshNet: Image-to-Lixel Prediction Network for Accurate 3D Human Pose and Mesh Estimation from a Single RGB Image [15]](#15)\n- [Beyond the Nav-Graph: Vision-and-Language Navigation in Continuous Environments [16]](#16)\n- [RAFT: Recurrent All-Pairs Field Transforms for Optical Flow [17]](#17)\n- [Crowdsampling the Plenoptic Function [18]](#18)\n- [Old Photo Restoration via Deep Latent Space Translation [19]](#19)\n- [Neural circuit policies enabling auditable autonomy [20]](#20)\n- [Lifespan Age Transformation Synthesis [21]](#21)\n- [DeOldify [22]](#22)\n- [COOT: Cooperative Hierarchical Transformer for Video-Text Representation Learning [23]](#23)\n- [Stylized Neural Painting [24]](#24)\n- [Is a Green Screen Really Necessary for Real-Time Portrait Matting? [25]](#25)\n- [ADA: Training Generative Adversarial Networks with Limited Data [26]](#26)\n- [Improving Data‐Driven Global Weather Prediction Using Deep Convolutional Neural Networks on a Cubed Sphere [27]](#27)\n- [NeRV: Neural Reflectance and Visibility Fields for Relighting and View Synthesis [28]](#28)\n- [Paper references](#references)\n\n---\n\n## YOLOv4: Optimal Speed and Accuracy of Object Detection [1]\u003Ca name=\"1\">\u003C\u002Fa>\nThis 4th version has been recently introduced in April 2020 by Alexey Bochkovsky et al. in the paper \"YOLOv4: Optimal Speed and Accuracy of Object Detection\". The main goal of this algorithm was to make a super-fast object detector with high quality in terms of accuracy.\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002FShqhwQl.png)](https:\u002F\u002Fyoutu.be\u002FCtjZFkO5RPw)\n* [The YOLOv4 algorithm | Introduction to You Only Look Once, Version 4 | Real-Time Object Detection](https:\u002F\u002Fmedium.com\u002Fwhat-is-artificial-intelligence\u002Fthe-yolov4-algorithm-introduction-to-you-only-look-once-version-4-real-time-object-detection-5fd8a608b0fa) - Short Read\n* [YOLOv4: Optimal Speed and Accuracy of Object Detection](https:\u002F\u002Farxiv.org\u002Fabs\u002F2004.10934) - The Paper\n* [Click here for the Yolo v4 code](https:\u002F\u002Fgithub.com\u002FAlexeyAB\u002Fdarknet) - The Code\n\n\n## DeepFaceDrawing: Deep Generation of Face Images from Sketches [2]\u003Ca name=\"2\">\u003C\u002Fa>\nYou can now generate high-quality face images from rough or even incomplete sketches with zero drawing skills using this new image-to-image translation technique! If your drawing skills as bad as mine you can even adjust how much the eyes, mouth, and nose will affect the final image! Let's see if it really works and how they did it.\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002FNJiJ3ny.png)](https:\u002F\u002Fyoutu.be\u002FdjXdgCVB0oM)\n* [AI Generates Real Faces From Sketches!](https:\u002F\u002Fmedium.com\u002Fwhat-is-artificial-intelligence\u002Fai-generates-real-faces-from-sketches-8ccbac5d2b2e) - Short Read\n* [DeepFaceDrawing: Deep Generation of Face Images from Sketches](http:\u002F\u002Fgeometrylearning.com\u002Fpaper\u002FDeepFaceDrawing.pdf) - The Paper\n* [Click here for the DeepFaceDrawing code](https:\u002F\u002Fgithub.com\u002FIGLICT\u002FDeepFaceDrawing-Jittor) - The Code\n\n\n## Learning to Simulate Dynamic Environments with GameGAN [3]\u003Ca name=\"3\">\u003C\u002Fa>\nGameGAN, a generative adversarial network trained on 50,000 PAC-MAN episodes, produces a fully functional version of the dot-munching classic without an underlying game engine.\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002Fbs3HPrm.png)](https:\u002F\u002Fyoutu.be\u002FRzFxhSfTww4)\n* [40 Years on, PAC-MAN Recreated with AI by NVIDIA Researchers](https:\u002F\u002Fblogs.nvidia.com\u002Fblog\u002F2020\u002F05\u002F22\u002Fgamegan-research-pacman-anniversary\u002F) - Short Read\n* [Learning to Simulate Dynamic Environments with GameGAN](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2005.12126.pdf) - The Paper\n* [Click here for the GameGAN code](https:\u002F\u002Fgithub.com\u002Fnv-tlabs\u002FGameGAN_code) - The Code\n\n\n## PULSE: Self-Supervised Photo Upsampling via Latent Space Exploration of Generative Models [4]\u003Ca name=\"4\">\u003C\u002Fa>\nThis new algorithm transforms a blurry image into a high-resolution image!\nIt can take a super low-resolution 16x16 image and turn it into a 1080p high definition human face! You don't believe me? Then you can do just like me and try it on yourself in less than a minute! But first, let's see how they did that.\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002F2R9Yhgk.png)](https:\u002F\u002Fyoutu.be\u002FcgakyOI9r8M)\n* [This AI makes blurry faces look 60 times sharper](https:\u002F\u002Fmedium.com\u002Fwhat-is-artificial-intelligence\u002Fthis-ai-makes-blurry-faces-look-60-times-sharper-7fcd3b820910) - Short Read\n* [PULSE: Self-Supervised Photo Upsampling via Latent Space Exploration of Generative Models](https:\u002F\u002Farxiv.org\u002Fabs\u002F2003.03808) - The Paper\n* [Click here for the PULSE code](https:\u002F\u002Fgithub.com\u002Fadamian98\u002Fpulse) - The Code\n\n\n## Unsupervised Translation of Programming Languages [5]\u003Ca name=\"5\">\u003C\u002Fa>\nThis new model converts code from a programming language to another without any supervision! It can take a Python function and translate it into a C++ function, and vice-versa, without any prior examples! It understands the syntax of each language and can thus generalize to any programming language! Let's see how they did that.\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002FU56grxy.png)](https:\u002F\u002Fyoutu.be\u002Fu6kM2lkrGQk)\n* [This AI translates code from a programming language to another | Facebook TransCoder Explained](https:\u002F\u002Fmedium.com\u002Fwhat-is-artificial-intelligence\u002Fthis-ai-translates-code-from-a-programming-language-to-another-facebook-transcoder-explained-3017d052f4fd) - Short Read\n* [Unsupervised Translation of Programming Languages](https:\u002F\u002Farxiv.org\u002Fabs\u002F2006.03511) - The Paper\n* [Click here for the Transcoder code](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002FTransCoder?utm_source=catalyzex.com) - The Code\n\n\n## PIFuHD: Multi-Level Pixel-Aligned Implicit Function for High-Resolution 3D Human Digitization [6]\u003Ca name=\"6\">\u003C\u002Fa>\nThis AI Generates 3D high-resolution reconstructions of people from 2D images! It only needs a single image of you to generate a 3D avatar that looks just like you, even from the back!\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002Fbtyo32w.png)](https:\u002F\u002Fyoutu.be\u002FajWtdm05-6g)\n* [AI Generates 3D high-resolution reconstructions of people from 2D images | Introduction to PIFuHD](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Fai-generates-3d-high-resolution-reconstructions-of-people-from-2d-images-introduction-to-pifuhd-d4aa515a482a) - Short Read\n* [PIFuHD: Multi-Level Pixel-Aligned Implicit Function for High-Resolution 3D Human Digitization](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2004.00452.pdf) - The Paper\n* [Click here for the PiFuHD code](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fpifuhd) - The Code\n\n\n## High-Resolution Neural Face Swapping for Visual Effects [7]\u003Ca name=\"7\">\u003C\u002Fa>\nResearchers at Disney developed a new High-Resolution Face Swapping algorithm for Visual Effects in the paper of the same name. It is capable of rendering photo-realistic results at megapixel resolution. Working for Disney, they are most certainly the best team for this work. Their goal is to swap the face of a target actor from a source actor while maintaining the actor's performance. This is incredibly challenging and is useful in many circumstances, such as changing the age of a character, when an actor is not available, or even when it involves a stunt scene that would be too dangerous for the main actor to perform. The current approaches require a lot of frame-by-frame animation and post-processing by professionals.\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002FGFTQVfY.png)](https:\u002F\u002Fyoutu.be\u002FEzyhA46DQWA)\n* [Disney's New High-Resolution Face Swapping Algorithm | New 2020 Face Swap Technology Explained](https:\u002F\u002Fmedium.com\u002Fwhat-is-artificial-intelligence\u002Fdisneys-new-high-resolution-face-swapping-algorithm-new-2020-face-swap-technology-explained-da7dc8caa2f2) - Short Read\n* [High-Resolution Neural Face Swapping for Visual Effects](https:\u002F\u002Fstudios.disneyresearch.com\u002F2020\u002F06\u002F29\u002Fhigh-resolution-neural-face-swapping-for-visual-effects\u002F) - The Paper\n\n\n## Swapping Autoencoder for Deep Image Manipulation [8]\u003Ca name=\"8\">\u003C\u002Fa>\nThis new technique can change the texture of any picture while staying realistic using complete unsupervised training! The results look even better than what GANs can achieve while being way faster! It could even be used to create deepfakes!\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002F1RLjXVm.png)](https:\u002F\u002Fyoutu.be\u002FhPR4cRzQY0s)\n* [Texture-Swapping AI beats GANs for Image Manipulation!](https:\u002F\u002Fmedium.com\u002Fwhat-is-artificial-intelligence\u002Ftexture-swapping-ai-beats-gans-for-image-manipulation-e05700782183) - Short Read\n* [Swapping Autoencoder for Deep Image Manipulation](https:\u002F\u002Farxiv.org\u002Fabs\u002F2007.00653) - The Paper\n* [Click here for the Swapping autoencoder code](https:\u002F\u002Fgithub.com\u002Frosinality\u002Fswapping-autoencoder-pytorch?utm_source=catalyzex.com) - The Code\n\n\n## GPT-3: Language Models are Few-Shot Learners [9]\u003Ca name=\"9\">\u003C\u002Fa>\nThe current state-of-the-art NLP systems struggle to generalize to work on different tasks. They need to be fine-tuned on datasets of thousands of examples while humans only need to see a few examples to perform a new language task. This was the goal behind GPT-3, to improve the task-agnostic characteristic of language models.\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002FCqm6FIs.png)](https:\u002F\u002Fyoutu.be\u002FgDDnTZchKec)\n* [Can GPT-3 Really Help You and Your Company?](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Fcan-gpt-3-really-help-you-and-your-company-84dac3c5b58a) - Short Read\n* [Language Models are Few-Shot Learners](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2005.14165.pdf) - The Paper\n* [Click here for GPT-3's GitHub page](https:\u002F\u002Fgithub.com\u002Fopenai\u002Fgpt-3) - The GitHub\n\n\n## Learning Joint Spatial-Temporal Transformations for Video Inpainting [10]\u003Ca name=\"10\">\u003C\u002Fa>\nThis AI can fill the missing pixels behind a removed moving object and reconstruct the whole video with way more accuracy and less blurriness than current state-of-the-art approaches!\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002FpKAD8Zu.png)](https:\u002F\u002Fyoutu.be\u002FMAxMYGoN5U0)\n* [This AI takes a video and fills the missing pixels behind an object!](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Fthis-ai-takes-a-video-and-fills-the-missing-pixels-behind-an-object-video-inpainting-9be38e141f46) - Short Read\n* [Learning Joint Spatial-Temporal Transformations for Video Inpainting](https:\u002F\u002Farxiv.org\u002Fabs\u002F2007.10247) - The Paper\n* [Click here for this Video Inpainting code](https:\u002F\u002Fgithub.com\u002Fresearchmm\u002FSTTN?utm_source=catalyzex.com) - The Code\n\n\n## Image GPT - Generative Pretraining from Pixels [11]\u003Ca name=\"11\">\u003C\u002Fa>\nA good AI, like the one used in Gmail, can generate coherent text and finish your phrase. This one uses the same principles in order to complete an image! All done in an unsupervised training with no labels required at all!\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002F4RKhkL0.png)](https:\u002F\u002Fyoutu.be\u002FFwXQ568_io0)\n* [This AI Can Generate the Other Half of a Picture Using a GPT Model](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Fthis-ai-can-generate-the-pixels-of-half-of-a-picture-from-nothing-using-a-nlp-model-7d7ba14b5522) - Short Read\n* [Image GPT - Generative Pretraining from Pixels](https:\u002F\u002Fopenai.com\u002Fblog\u002Fimage-gpt\u002F) - The Paper\n* [Click here for the OpenAI's Image GPT code](https:\u002F\u002Fgithub.com\u002Fopenai\u002Fimage-gpt) - The Code\n\n\n## Learning to Cartoonize Using White-box Cartoon Representations [12]\u003Ca name=\"12\">\u003C\u002Fa>\nThis AI can cartoonize any picture or video you feed it in the cartoon style you want! Let's see how it does that and some amazing examples. You can even try it yourself on the website they created as I did for myself!\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002FSTy8f51.png)](https:\u002F\u002Fyoutu.be\u002FGZVsONq3qtg)\n* [This AI can cartoonize any picture or video you feed it! Paper Introduction & Results examples](https:\u002F\u002Fmedium.com\u002Fwhat-is-artificial-intelligence\u002Fthis-ai-can-cartoonize-any-picture-or-video-you-feed-it-paper-introduction-results-examples-d7e400d8c3e8) - Short Read\n* [Learning to Cartoonize Using White-box Cartoon Representations](https:\u002F\u002Fsystemerrorwang.github.io\u002FWhite-box-Cartoonization\u002Fpaper\u002F06791.pdf) - The Paper\n* [Click here for the Cartoonize code](https:\u002F\u002Fgithub.com\u002FSystemErrorWang\u002FWhite-box-Cartoonization) - The Code\n\n\n## FreezeG: Freeze the Discriminator: a Simple Baseline for Fine-Tuning GANs [13]\u003Ca name=\"13\">\u003C\u002Fa>\nThis face generating model is able to transfer normal face photographs into distinctive styles such as Lee Mal-Nyeon's cartoon style, the Simpsons, arts, and even dogs! The best thing about this new technique is that it's super simple and significantly outperforms previous techniques used in GANs.\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002FgjSAMI7.png)](https:\u002F\u002Fyoutu.be\u002FRvPUVniQiuw)\n* [This Face Generating Model Transfers Real Face Photographs Into Distinctive Cartoon Styles](https:\u002F\u002Fmedium.com\u002Fwhat-is-artificial-intelligence\u002Fthis-face-generating-model-transfers-real-face-photographs-into-distinctive-cartoon-styles-33dde907737a) - Short Read\n* [Freeze the Discriminator: a Simple Baseline for Fine-Tuning GANs](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2002.10964.pdf) - The Paper\n* [Click here for the FreezeG code](https:\u002F\u002Fgithub.com\u002Fsangwoomo\u002FfreezeD?utm_source=catalyzex.com) - The Code\n\n\n## Neural Re-Rendering of Humans from a Single Image [14]\u003Ca name=\"14\">\u003C\u002Fa>\nThe algorithm represents body pose and shape as a parametric mesh which can be reconstructed from a single image and easily reposed. Given an image of a person, they are able to create synthetic images of the person in different poses or with different clothing obtained from another input image.\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002FpsuEw4g.png)](https:\u002F\u002Fyoutu.be\u002FE7fGsSNKMc4)\n* [Transfer clothes between photos using AI. From a single image!](https:\u002F\u002Fmedium.com\u002Fdataseries\u002Ftransfer-clothes-between-photos-using-ai-from-a-single-image-4430a291afd7) - Short Read\n* [Neural Re-Rendering of Humans from a Single Image](http:\u002F\u002Fgvv.mpi-inf.mpg.de\u002Fprojects\u002FNHRR\u002Fdata\u002F1415.pdf) - The Paper\n\n\n\n## I2L-MeshNet: Image-to-Lixel Prediction Network for Accurate 3D Human Pose and Mesh Estimation from a Single RGB Image [15]\u003Ca name=\"15\">\u003C\u002Fa>\nTheir goal was to propose a new technique for 3D Human Pose and Mesh Estimation from a single RGB image. They called it I2L-MeshNet. Where I2L stands for Image-to-Lixel. Just like a voxel, volume + pixel, is a quantized cell in three-dimensional space, they defined lixel, a line, and pixel, as a quantized cell in one-dimensional space. Their method outperforms previous methods and the code is publicly available!\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002FiEd7FeO.png)](https:\u002F\u002Fyoutu.be\u002FtDz2wTixcrI)\n* [Accurate 3D Human Pose and Mesh Estimation from a Single RGB Image! With Code Publicly Avaibable!](https:\u002F\u002Fmedium.com\u002Fdataseries\u002Faccurate-3d-human-pose-and-mesh-estimation-from-a-single-rgb-image-with-code-publicly-avaibable-b7cc995bcf2a) - Short Read\n* [I2L-MeshNet: Image-to-Lixel Prediction Network for Accurate 3D Human Pose and Mesh Estimation from a Single RGB Image](https:\u002F\u002Fwww.catalyzex.com\u002Fpaper\u002Farxiv:2008.03713?fbclid=IwAR1pQGBhIwO4gW4mVZm1UEtyPLyZInsLZMyq3EoANaWxGO0CZ00Sj3ViM7I) - The Paper\n* [Click here for the I2L-MeshNet code](https:\u002F\u002Fgithub.com\u002Fmks0601\u002FI2L-MeshNet_RELEASE) \n\nhttps:\u002F\u002Fgithub.com\u002Fmks0601\u002FI2L-MeshNet_RELEASE\n\n\n## Beyond the Nav-Graph: Vision-and-Language Navigation in Continuous Environments [16]\u003Ca name=\"16\">\u003C\u002Fa>\nLanguage-guided navigation is a widely studied field and a very complex one. Indeed, it may seem simple for a human to just walk through a house to get to your coffee that you left on your nightstand to the left of your bed. But it is a whole other story for an agent, which is an autonomous AI-driven system using deep learning to perform tasks.\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002FzsRG2lb.png)](https:\u002F\u002Fyoutu.be\u002FFw_RUlUjuN4)\n* [Language-Guided Navigation in a 3D Environment](https:\u002F\u002Fbecominghuman.ai\u002Flanguage-guided-navigation-in-a-3d-environment-e3cf4102fb89) - Short Read\n* [Beyond the Nav-Graph: Vision-and-Language Navigation in Continuous Environments](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2004.02857.pdf) - The Paper\n* [Click here for the VLN-CE code](https:\u002F\u002Fgithub.com\u002Fjacobkrantz\u002FVLN-CE) - The Code\n\n\n## RAFT: Recurrent All-Pairs Field Transforms for Optical Flow [17]\u003Ca name=\"17\">\u003C\u002Fa>\nECCV 2020 Best Paper Award Goes to Princeton Team. They developed a new end-to-end trainable model for optical flow. Their method beats state-of-the-art architectures' accuracy across multiple datasets and is way more efficient. They even made the code available for everyone on their Github!\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002FVdcyRAE.png)](https:\u002F\u002Fyoutu.be\u002FOSEuYBwOSGI)\n* [ECCV 2020 Best Paper Award | A New Architecture For Optical Flow](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Feccv-2020-best-paper-award-a-new-architecture-for-optical-flow-3298c8a40dc7) - Short Read\n* [RAFT: Recurrent All-Pairs Field Transforms for Optical Flow](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2003.12039.pdf) - The Paper\n* [Click here for the RAFT code](https:\u002F\u002Fgithub.com\u002Fprinceton-vl\u002FRAFT) - The Code\n\n\n## Crowdsampling the Plenoptic Function [18]\u003Ca name=\"18\">\u003C\u002Fa>\nUsing tourists' public photos from the internet, they were able to reconstruct multiple viewpoints of a scene conserving the realistic shadows and lighting! This is a huge advancement of the state-of-the-art techniques for photorealistic scene rendering and their results are simply amazing.\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002FHk8XiOS.png)](https:\u002F\u002Fyoutu.be\u002FF_JqJNBvJ64)\n* [Reconstruct Photorealistic Scenes from Tourists' Public Photos on the Internet!](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Freconstruct-photorealistic-scenes-from-tourists-public-photos-on-the-internet-bb9ad39c96f3) - Short Read\n* [Crowdsampling the Plenoptic Function](https:\u002F\u002Fresearch.cs.cornell.edu\u002Fcrowdplenoptic\u002F) - The Paper\n* [Click here for the Crowdsampling code](https:\u002F\u002Fgithub.com\u002Fzhengqili\u002FCrowdsampling-the-Plenoptic-Function) - The Code\n\n\n## Old Photo Restoration via Deep Latent Space Translation [19]\u003Ca name=\"19\">\u003C\u002Fa>\nImagine having the old, folded, and even torn pictures of your grandmother when she was 18 years old in high definition with zero artifacts. This is called old photo restoration and this paper just opened a whole new avenue to address this problem using a deep learning approach.\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002Fcnds8hi.png)](https:\u002F\u002Fyoutu.be\u002FQUmrIpl0afQ)\n* [Old Photo Restoration using Deep Learning](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Fold-photo-restoration-using-deep-learning-47d4ab1bdc4d) - Short Read\n* [Old Photo Restoration via Deep Latent Space Translation](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2009.07047.pdf) - The Paper\n* [Click here for the Old Photo Restoration code](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FBringing-Old-Photos-Back-to-Life?utm_source=catalyzex.com) - The Code\n\n\n## Neural circuit policies enabling auditable autonomy [20]\u003Ca name=\"20\">\u003C\u002Fa>\nResearchers from IST Austria and MIT have successfully trained a self-driving car using a new artificial intelligence system based on the brains of tiny animals, such as threadworms. They achieved that with only a few neurons able to control the self-driving car, compared to the millions of neurons needed by the popular deep neural networks such as Inceptions, Resnets, or VGG. Their network was able to completely control a car using only 75 000 parameters, composed of 19 control neurons, rather than millions!\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002F86EtfbT.png)](https:\u002F\u002Fyoutu.be\u002FwAa358pNDkQ)\n* [A New Brain-inspired Intelligent System Drives a Car Using Only 19 Control Neurons!](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Fa-new-brain-inspired-intelligent-system-drives-a-car-using-only-19-control-neurons-1ed127107db9) - Short Read\n* [Neural circuit policies enabling auditable autonomy](https:\u002F\u002Fwww.nature.com\u002Farticles\u002Fs42256-020-00237-3.epdf?sharing_token=xHsXBg2SoR9l8XdbXeGSqtRgN0jAjWel9jnR3ZoTv0PbS_e49wmlSXvnXIRQ7wyir5MOFK7XBfQ8sxCtVjc7zD1lWeQB5kHoRr4BAmDEU0_1-UN5qHD5nXYVQyq5BrRV_tFa3_FZjs4LBHt-yebsG4eQcOnNsG4BenK3CmBRFLk%3D) - The Paper\n* [Click here for the NCP code](https:\u002F\u002Fgithub.com\u002Fmlech26l\u002Fkeras-ncp) - The Code\n\n\n## Lifespan Age Transformation Synthesis [21]\u003Ca name=\"21\">\u003C\u002Fa>\nA team of researchers from Adobe Research developed a new technique for age transformation synthesis based on only one picture from the person. It can generate the lifespan pictures from any picture you sent it.\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002FUW1wTRY.png)](https:\u002F\u002Fyoutu.be\u002FxA-3cWJ4Y9Q)\n* [Generate Younger & Older Versions of Yourself!](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Fgenerate-younger-older-versions-of-yourself-1a87f970f3da) - Short Read\n* [Lifespan Age Transformation Synthesis](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2003.09764.pdf) - The Paper\n* [Click here for the Lifespan age transformation synthesis code](https:\u002F\u002Fgithub.com\u002Froyorel\u002FLifespan_Age_Transformation_Synthesis) - The Code\n\n\n## DeOldify [22]\u003Ca name=\"22\">\u003C\u002Fa>\nDeOldify is a technique to colorize and restore old black and white images or even film footage. It was developed and is still getting updated by only one person Jason Antic. It is now the state of the art way to colorize black and white images, and everything is open-sourced, but we will get back to this in a bit.\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002FjGOxFl1.png)](https:\u002F\u002Fyoutu.be\u002F1EP_Lq04h4M)\n* [This AI can Colorize your Black & White Photos with Full Photorealistic Renders! (DeOldify)](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Fthis-ai-can-colorize-your-black-white-photos-with-full-photorealistic-renders-deoldify-bf1eed5cb02a) - Short Read\n* [Click here for the DeOldify code](https:\u002F\u002Fgithub.com\u002Fjantic\u002FDeOldify) - The Code\n\n\n## COOT: Cooperative Hierarchical Transformer for Video-Text Representation Learning [23]\u003Ca name=\"23\">\u003C\u002Fa>\nAs the name states, it uses transformers to generate accurate text descriptions for each sequence of a video, using both the video and a general description of it as inputs.\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002FqZDiMZh.png)](https:\u002F\u002Fyoutu.be\u002F5TRp5SuEtoY)\n* [Video to Text Description Using Deep Learning and Transformers | COOT](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Fvideo-to-text-description-using-deep-learning-and-transformers-coot-e05b8d0db110) - Short Read\n* [COOT: Cooperative Hierarchical Transformer for Video-Text Representation Learning](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2011.00597.pdf) - The Paper\n* [Click here for the COOT code](https:\u002F\u002Fgithub.com\u002Fgingsi\u002Fcoot-videotext) - The Code\n\n\n## Stylized Neural Painting [24]\u003Ca name=\"24\">\u003C\u002Fa>\nThis Image-to-Painting Translation method simulates a real painter on multiple styles using a novel approach that does not involve any GAN architecture, unlike all the current state-of-the-art approaches!\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002F6Bespnd.png)](https:\u002F\u002Fyoutu.be\u002FdzJStceOaQs)\n* [Image-to-Painting Translation With Style Transfer](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Fimage-to-painting-translation-with-style-transfer-508618596409) - Short Read\n* [Stylized Neural Painting](https:\u002F\u002Farxiv.org\u002Fabs\u002F2011.08114) - The Paper\n* [Click here for the Stylized Neural Painting code](https:\u002F\u002Fgithub.com\u002Fjiupinjia\u002Fstylized-neural-painting) - The Code\n\n\n## Is a Green Screen Really Necessary for Real-Time Portrait Matting? [25]\u003Ca name=\"25\">\u003C\u002Fa>\nHuman matting is an extremely interesting task where the goal is to find any human in a picture and remove the background from it. It is really hard to achieve due to the complexity of the task, having to find the person or people with the perfect contour. In this post, I review the best techniques used over the years and a novel approach published on November 29th, 2020. Many techniques are using basic computer vision algorithms to achieve this task, such as the GrabCut algorithm, which is extremely fast, but not very precise.\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002FEXMOzI9.png)](https:\u002F\u002Fyoutu.be\u002FrUo0wuVyefU)\n* [High-Quality Background Removal Without Green Screens](https:\u002F\u002Fmedium.com\u002Fdatadriveninvestor\u002Fhigh-quality-background-removal-without-green-screens-8e61c69de63) - Short Read\n* [Is a Green Screen Really Necessary for Real-Time Portrait Matting?](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2011.11961.pdf) - The Paper\n* [Click here for the MODNet code](https:\u002F\u002Fgithub.com\u002FZHKKKe\u002FMODNet) - The Code\n\n\n## ADA: Training Generative Adversarial Networks with Limited Data [26]\u003Ca name=\"26\">\u003C\u002Fa>\nWith this new training method developed by NVIDIA, you can train a powerful generative model with one-tenth of the images! Making possible many applications that do not have access to so many images!\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002Fd9x33BO.png)](https:\u002F\u002Fyoutu.be\u002F9fVNtVr_luc)\n* [GAN Training Breakthrough for Limited Data Applications & New NVIDIA Program! NVIDIA Research](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Fgan-training-breakthrough-for-limited-data-applications-new-nvidia-program-nvidia-research-3652c4c172e6) - Short Read\n* [Training Generative Adversarial Networks with Limited Data](https:\u002F\u002Farxiv.org\u002Fabs\u002F2006.06676) - The Paper\n* [Click here for the ADA code](https:\u002F\u002Fgithub.com\u002FNVlabs\u002Fstylegan2-ada) - The Code\n\n\n## Improving Data‐Driven Global Weather Prediction Using Deep Convolutional Neural Networks on a Cubed Sphere [27]\u003Ca name=\"27\">\u003C\u002Fa>\nWith this new training method developed by NVIDIA, you can train a powerful generative model with one-tenth of the images! Making possible many applications that do not have access to so many images!\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002F02FfFOg.png)](https:\u002F\u002Fyoutu.be\u002FC7dNU298A0A)\n* [AI is Predicting Faster and More Accurate Weather Forecasts](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Fai-is-predicting-faster-and-more-accurate-weather-forecasts-5d99a1d9c4f) - Short Read\n* [Improving Data‐Driven Global Weather Prediction Using Deep Convolutional Neural Networks on a Cubed Sphere](https:\u002F\u002Fagupubs.onlinelibrary.wiley.com\u002Fdoi\u002F10.1029\u002F2020MS002109) - The Paper\n* [Click here for the weather forecasting code](https:\u002F\u002Fgithub.com\u002Fjweyn\u002FDLWP-CS) - The Code\n\n\n## NeRV: Neural Reflectance and Visibility Fields for Relighting and View Synthesis [28]\u003Ca name=\"28\">\u003C\u002Fa>\nThis new method is able to generate a complete 3-dimensional scene and has the ability to decide the lighting of the scene. All this with very limited computation costs and amazing results compared to previous approaches.\n\n* Short Video Explanation:\n\n[![Watch the video](https:\u002F\u002Fimgur.com\u002Fw3QnN6g.png)](https:\u002F\u002Fyoutu.be\u002FZkaTyBvS2w4)\n* [Generate a Complete 3D Scene Under Arbitrary Lighting Conditions from a Set of Input Images](https:\u002F\u002Fmedium.com\u002Fwhat-is-artificial-intelligence\u002Fgenerate-a-complete-3d-scene-under-arbitrary-lighting-conditions-from-a-set-of-input-images-9d2fbce63243) - Short Read\n* [NeRV: Neural Reflectance and Visibility Fields for Relighting and View Synthesis](https:\u002F\u002Farxiv.org\u002Fabs\u002F2012.03927) - The Paper\n* [Click here for the NeRV code *(coming soon)*](https:\u002F\u002Fpeople.eecs.berkeley.edu\u002F~pratul\u002Fnerv\u002F) - The Code\n\n---\n\n🆕 Check [the 2021](https:\u002F\u002Fgithub.com\u002Flouisfb01\u002Fbest_AI_papers_2021) repo!\n\n***Tag me on Twitter [@Whats_AI](https:\u002F\u002Ftwitter.com\u002FWhats_AI) or LinkedIn [@Louis (What's AI) Bouchard](https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Fwhats-ai\u002F)  if you share the list!***\n\n---\n\n## Paper references\u003Ca name=\"references\">\u003C\u002Fa>\n\n[1] A. Bochkovskiy, C.-Y. Wang, and H.-Y. M. Liao, Yolov4: Optimal speed and accuracy of object detection, 2020. arXiv:2004.10934 [cs.CV].\n\n[2] S.-Y. Chen, W. Su, L. Gao, S. Xia, and H. Fu, \"DeepFaceDrawing: Deep generation of face images from sketches,\" ACM Transactions on Graphics (Proceedings of ACM SIGGRAPH2020), vol. 39, no. 4, 72:1–72:16, 2020.\n\n[3] S. W. Kim, Y. Zhou, J. Philion, A. Torralba, and S. Fidler, \"Learning to Simulate DynamicEnvironments with GameGAN,\" in IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Jun. 2020.\n\n[4] S. Menon, A. Damian, S. Hu, N. Ravi, and C. Rudin, Pulse: Self-supervised photo upsampling via latent space exploration of generative models, 2020. arXiv:2003.03808 [cs.CV].\n\n[5] M.-A. Lachaux, B. Roziere, L. Chanussot, and G. Lample, Unsupervised translation of programming languages, 2020. arXiv:2006.03511 [cs.CL].\n\n[6] S. Saito, T. Simon, J. Saragih, and H. Joo, Pifuhd: Multi-level pixel-aligned implicit function for high-resolution 3d human digitization, 2020. arXiv:2004.00452 [cs.CV].\n\n[7] J. Naruniec, L. Helminger, C. Schroers, and R. Weber, \"High-resolution neural face-swapping for visual effects,\" Computer Graphics Forum, vol. 39, pp. 173–184, Jul. 2020.doi:10.1111\u002Fcgf.14062.\n\n[8] T. Park, J.-Y. Zhu, O. Wang, J. Lu, E. Shechtman, A. A. Efros, and R. Zhang,Swappingautoencoder for deep image manipulation, 2020. arXiv:2007.00653 [cs.CV].\n\n[9] T. B. Brown, B. Mann, N. Ryder, M. Subbiah, J. Kaplan, P. Dhariwal, A. Neelakantan, P.Shyam, G. Sastry, A. Askell, S. Agarwal, A. Herbert-Voss, G. Krueger, T. Henighan, R. Child, A. Ramesh, D. M. Ziegler, J. Wu, C. Winter, C. Hesse, M. Chen, E. Sigler, M. Litwin, S.Gray, B. Chess, J. Clark, C. Berner, S. McCandlish, A. Radford, I. Sutskever, and D. Amodei,\"Language models are few-shot learners,\" 2020. arXiv:2005.14165 [cs.CL].\n\n[10] Y. Zeng, J. Fu, and H. Chao, Learning joint spatial-temporal transformations for video in-painting, 2020. arXiv:2007.10247 [cs.CV].\n\n[11] M. Chen, A. Radford, R. Child, J. Wu, H. Jun, D. Luan, and I. Sutskever, \"Generative pretraining from pixels,\" in Proceedings of the 37th International Conference on Machine Learning, H. D. III and A. Singh, Eds., ser. Proceedings of Machine Learning Research, vol. 119, Virtual: PMLR, 13–18 Jul 2020, pp. 1691–1703. [Online]. Available:http:\u002F\u002Fproceedings.mlr.press\u002Fv119\u002Fchen20s.html.\n\n[12] Xinrui Wang and Jinze Yu, \"Learning to Cartoonize Using White-box Cartoon Representations.\", IEEE Conference on Computer Vision and Pattern Recognition, June 2020.\n\n[13] S. Mo, M. Cho, and J. Shin, Freeze the discriminator: A simple baseline for fine-tuning gans,2020. arXiv:2002.10964 [cs.CV].\n\n[14] K. Sarkar, D. Mehta, W. Xu, V. Golyanik, and C. Theobalt, \"Neural re-rendering of humans from a single image,\" in European Conference on Computer Vision (ECCV), 2020.\n\n[15] G. Moon and K. M. Lee, \"I2l-meshnet: Image-to-lixel prediction network for accurate 3d human pose and mesh estimation from a single rgb image,\" in European Conference on ComputerVision (ECCV), 2020\n\n[16] J. Krantz, E. Wijmans, A. Majumdar, D. Batra, and S. Lee, \"Beyond the nav-graph: Vision-and-language navigation in continuous environments,\" 2020. arXiv:2004.02857 [cs.CV].\n\n[17] Z. Teed and J. Deng, Raft: Recurrent all-pairs field transforms for optical flow, 2020. arXiv:2003.12039 [cs.CV].\n\n[18] Z. Li, W. Xian, A. Davis, and N. Snavely, \"Crowdsampling the plenoptic function,\" inProc.European Conference on Computer Vision (ECCV), 2020.\n\n[19] Z. Wan, B. Zhang, D. Chen, P. Zhang, D. Chen, J. Liao, and F. Wen, Old photo restoration via deep latent space translation, 2020. arXiv:2009.07047 [cs.CV].\n\n[20] Lechner, M., Hasani, R., Amini, A. et al. Neural circuit policies enabling auditable autonomy. Nat Mach Intell 2, 642–652 (2020). https:\u002F\u002Fdoi.org\u002F10.1038\u002Fs42256-020-00237-3\n\n[21] R. Or-El, S. Sengupta, O. Fried, E. Shechtman, and I. Kemelmacher-Shlizerman, \"Lifespanage transformation synthesis,\" in Proceedings of the European Conference on Computer Vision(ECCV), 2020.\n\n[22] Jason Antic, Creator of DeOldify, https:\u002F\u002Fgithub.com\u002Fjantic\u002FDeOldify\n\n[23] S. Ging, M. Zolfaghari, H. Pirsiavash, and T. Brox, \"Coot: Cooperative hierarchical trans-former for video-text representation learning,\" in Conference on Neural Information ProcessingSystems, 2020.\n\n[24] Z. Zou, T. Shi, S. Qiu, Y. Yuan, and Z. Shi, Stylized neural painting, 2020. arXiv:2011.08114[cs.CV].\n\n[25] Z. Ke, K. Li, Y. Zhou, Q. Wu, X. Mao, Q. Yan, and R. W. Lau, \"Is a green screen really necessary for real-time portrait matting?\" ArXiv, vol. abs\u002F2011.11961, 2020.\n\n[26] T. Karras, M. Aittala, J. Hellsten, S. Laine, J. Lehtinen, and T. Aila, Training generative adversarial networks with limited data, 2020. arXiv:2006.06676 [cs.CV].\n\n[27] J. A. Weyn, D. R. Durran, and R. Caruana, \"Improving data-driven global weather prediction using deep convolutional neural networks on a cubed sphere\", Journal of Advances in Modeling Earth Systems, vol. 12, no. 9, Sep. 2020, issn: 1942–2466.doi:10.1029\u002F2020ms002109\n\n[28] P. P. Srinivasan, B. Deng, X. Zhang, M. Tancik, B. Mildenhall, and J. T. Barron, \"Nerv: Neural reflectance and visibility fields for relighting and view synthesis,\" in arXiv, 2020.\n\n\n\n","# 2020：充满惊人AI论文的一年——综述\n## 按发布日期精选的最新AI突破列表，附清晰视频讲解、深度文章链接及代码\n\n尽管今年全球发生了诸多大事，我们依然见证了许多令人瞩目的研究成果涌现，尤其是在人工智能领域。今年尤其突出了许多重要议题，比如伦理考量、关键偏见等。人工智能与我们对人类大脑的理解及其与AI的关联正在不断演进，并有望在不久的将来实现极具前景的应用。\n\n以下列出了今年最值得关注的研究论文，以防你错过了其中任何一篇。简而言之，这是一份按发表日期排序的最新AI和数据科学突破精选清单，每篇都配有清晰的视频讲解、深入解读的文章链接，以及（如适用）代码。希望你喜欢这篇阅读！\n\n**本仓库末尾列出了每篇论文的完整引用信息。**\n\n维护者 - [louisfb01](https:\u002F\u002Fgithub.com\u002Flouisfb01)\n\n订阅我的[新闻通讯](http:\u002F\u002Feepurl.com\u002FhuGLT5)——每周为你带来AI领域的最新动态解析。\n\n🆕 查看[2021年的版本](https:\u002F\u002Fgithub.com\u002Flouisfb01\u002Fbest_AI_papers_2021)！\n\n*如果你发现有遗漏但非常优秀的论文，请随时通过bouchard.lf@gmail.com告知我，以便添加到此仓库中*\n\n***如果你分享这份列表，请在Twitter上@Whats_AI或LinkedIn上@Louis (What's AI) Bouchard标记我！***\n\n### 观看15分钟内的2020年回顾视频\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002FxzZT1ll.png)](https:\u002F\u002Fyoutu.be\u002FDHBclF-8KwE)\n\n---\n\n### 如果你对计算机视觉研究感兴趣，这里还有另一个很棒的仓库推荐：\n2020年十大计算机视觉论文，附视频演示、文章、代码及论文引用。\n\n[2020年十大计算机视觉论文](https:\u002F\u002Fgithub.com\u002Flouisfb01\u002FTop-10-Computer-Vision-Papers-2020)\n\n----\n\n👀 **如果你想支持我的工作**，并使用Weights & Biases（免费）来跟踪你的机器学习实验、提高工作的可重复性或与团队协作，可以按照[这篇指南](https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Flouisfb01\u002Fexamples\u002Fblob\u002Fmaster\u002Fcolabs\u002Fpytorch\u002FSimple_PyTorch_Integration.ipynb)试用一下！由于这里的大部分代码基于PyTorch，我们认为分享一份关于如何在PyTorch中使用W&B的[快速入门指南](https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Flouisfb01\u002Fexamples\u002Fblob\u002Fmaster\u002Fcolabs\u002Fpytorch\u002FSimple_PyTorch_Integration.ipynb)会非常有趣。\n\n👉按照[这个快速指南](https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Flouisfb01\u002Fexamples\u002Fblob\u002Fmaster\u002Fcolabs\u002Fpytorch\u002FSimple_PyTorch_Integration.ipynb)，将相同的W&B代码行加入你的项目或下方的任意一个仓库中，你的所有实验就会自动被记录到你的W&B账户中！设置过程不超过5分钟，它将彻底改变你的工作方式，就像对我一样！如果感兴趣，还可以参考[这篇进阶指南](https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Flouisfb01\u002Fexamples\u002Fblob\u002Fmaster\u002Fcolabs\u002Fpytorch\u002FOrganizing_Hyperparameter_Sweeps_in_PyTorch_with_W%26B.ipynb)，了解如何使用超参数搜索 :)\n\n🙌 感谢[Weights & Biases](https:\u002F\u002Fwandb.ai\u002F)对本仓库及我所做工作的赞助，也感谢每一位通过此链接尝试W&B的朋友们！\n\n[![在Colab中打开](https:\u002F\u002Fcolab.research.google.com\u002Fassets\u002Fcolab-badge.svg)](https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Flouisfb01\u002Fexamples\u002Fblob\u002Fmaster\u002Fcolabs\u002Fpytorch\u002FSimple_PyTorch_Integration.ipynb)\n\n----\n\n## 完整列表\n- [YOLOv4：目标检测的速度与精度最优解 [1]](#1)\n- [DeepFaceDrawing：基于草图的深度人脸图像生成 [2]](#2)\n- [利用GameGAN学习模拟动态环境 [3]](#3)\n- [PULSE：基于生成模型潜在空间探索的自监督照片超分辨率 [4]](#4)\n- [无监督编程语言翻译 [5]](#5)\n- [PIFuHD：用于高分辨率人体数字化的多层级像素对齐隐式函数 [6]](#6)\n- [面向视觉特效的高分辨率神经人脸交换 [7]](#7)\n- [用于深度图像处理的交换自编码器 [8]](#8)\n- [GPT-3：语言模型是少样本学习者 [9]](#9)\n- [用于视频修复的联合时空变换学习 [10]](#10)\n- [Image GPT——从像素出发的生成式预训练 [11]](#11)\n- [利用白盒卡通表示进行卡通化学习 [12]](#12)\n- [FreezeG：冻结判别器——微调GAN的简单基线 [13]](#13)\n- [单张图像驱动的人体神经重渲染 [14]](#14)\n- [I2L-MeshNet：基于单张RGB图像的精确人体姿态与网格估计的图像到体素预测网络 [15]](#15)\n- [超越导航图：连续环境中的视觉-语言导航 [16]](#16)\n- [RAFT：用于光流计算的循环全对场变换 [17]](#17)\n- [全景函数的众包采样 [18]](#18)\n- [通过深度潜在空间翻译恢复老照片 [19]](#19)\n- [可审计自主性的神经回路策略 [20]](#20)\n- [生命周期年龄转换合成 [21]](#21)\n- [DeOldify [22]](#22)\n- [COOT：用于视频-文本表征学习的合作式分层Transformer [23]](#23)\n- [风格化的神经绘画 [24]](#24)\n- [实时人像抠图真的需要绿幕吗？ [25]](#25)\n- [ADA：有限数据下的生成对抗网络训练 [26]](#26)\n- [利用立方球面上的深度卷积神经网络改进数据驱动的全球天气预报 [27]](#27)\n- [NeRV：用于重新打光和视图合成的神经反射率与可见性场 [28]](#28)\n- [论文引用](#references)\n\n---\n\n## YOLOv4：目标检测的速度与精度最优解 [1]\u003Ca name=\"1\">\u003C\u002Fa>\n这一第4版由Alexey Bochkovsky等人于2020年4月在论文《YOLOv4：目标检测的速度与精度最优解》中首次提出。该算法的主要目标是打造一款兼具超高速度和高质量准确率的目标检测器。\n\n* 简短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002FShqhwQl.png)](https:\u002F\u002Fyoutu.be\u002FCtjZFkO5RPw)\n* [YOLOv4算法 | 一探究竟：You Only Look Once 第4版 | 实时目标检测](https:\u002F\u002Fmedium.com\u002Fwhat-is-artificial-intelligence\u002Fthe-yolov4-algorithm-introduction-to-you-only-look-once-version-4-real-time-object-detection-5fd8a608b0fa) —— 简短阅读\n* [YOLOv4：目标检测的速度与精度最优解](https:\u002F\u002Farxiv.org\u002Fabs\u002F2004.10934) —— 原文\n* [点击此处获取Yolo v4代码](https:\u002F\u002Fgithub.com\u002FAlexeyAB\u002Fdarknet) —— 代码\n\n## DeepFaceDrawing：基于草图的深度人脸图像生成 [2]\u003Ca name=\"2\">\u003C\u002Fa>\n现在，借助这项全新的图像到图像转换技术，即使你完全没有绘画技巧，也能仅凭粗糙甚至不完整的草图生成高质量的人脸图像！如果你的绘画水平像我一样糟糕，还可以调整眼睛、嘴巴和鼻子对最终图像的影响程度。让我们来看看它是否真的有效，以及他们是如何实现的。\n\n* 简短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002FNJiJ3ny.png)](https:\u002F\u002Fyoutu.be\u002FdjXdgCVB0oM)\n* [AI根据草图生成真实人脸！](https:\u002F\u002Fmedium.com\u002Fwhat-is-artificial-intelligence\u002Fai-generates-real-faces-from-sketches-8ccbac5d2b2e) - 简短文章\n* [DeepFaceDrawing：基于草图的深度人脸图像生成](http:\u002F\u002Fgeometrylearning.com\u002Fpaper\u002FDeepFaceDrawing.pdf) - 论文\n* [点击此处获取DeepFaceDrawing代码](https:\u002F\u002Fgithub.com\u002FIGLICT\u002FDeepFaceDrawing-Jittor) - 代码\n\n\n## 使用GameGAN学习模拟动态环境 [3]\u003Ca name=\"3\">\u003C\u002Fa>\nGameGAN是一种生成对抗网络，它通过训练5万局吃豆人游戏，无需任何底层游戏引擎，便能生成一个功能完备的经典吃豆人版本。\n\n* 简短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002Fbs3HPrm.png)](https:\u002F\u002Fyoutu.be\u002FRzFxhSfTww4)\n* [历经40年，NVIDIA研究人员用AI重现吃豆人](https:\u002F\u002Fblogs.nvidia.com\u002Fblog\u002F2020\u002F05\u002F22\u002Fgamegan-research-pacman-anniversary\u002F) - 简短文章\n* [使用GameGAN学习模拟动态环境](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2005.12126.pdf) - 论文\n* [点击此处获取GameGAN代码](https:\u002F\u002Fgithub.com\u002Fnv-tlabs\u002FGameGAN_code) - 代码\n\n\n## PULSE：基于生成模型潜在空间探索的自监督照片超分辨率 [4]\u003Ca name=\"4\">\u003C\u002Fa>\n这一新算法可以将模糊图像转化为高分辨率图像！它甚至能够把一张超低分辨率的16×16像素图像变成1080p高清的人脸！不信？那就跟我一样，花不到一分钟亲自试试吧！不过在那之前，我们先来看看他们是怎么做到的。\n\n* 简短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002F2R9Yhgk.png)](https:\u002F\u002Fyoutu.be\u002FcgakyOI9r8M)\n* [这款AI让模糊人脸清晰度提升60倍](https:\u002F\u002Fmedium.com\u002Fwhat-is-artificial-intelligence\u002Fthis-ai-makes-blurry-faces-look-60-times-sharper-7fcd3b820910) - 简短文章\n* [PULSE：基于生成模型潜在空间探索的自监督照片超分辨率](https:\u002F\u002Farxiv.org\u002Fabs\u002F2003.03808) - 论文\n* [点击此处获取PULSE代码](https:\u002F\u002Fgithub.com\u002Fadamian98\u002Fpulse) - 代码\n\n\n## 无监督编程语言翻译 [5]\u003Ca name=\"5\">\u003C\u002Fa>\n这个新模型可以在没有任何监督的情况下，将一种编程语言的代码转换为另一种！它可以将Python函数翻译成C++函数，反之亦然，而且无需任何示例！它能够理解每种语言的语法，因此可以推广到任何编程语言。让我们看看它是如何做到的。\n\n* 简短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002FU56grxy.png)](https:\u002F\u002Fyoutu.be\u002Fu6kM2lkrGQk)\n* [这款AI可将一种编程语言的代码翻译成另一种 | Facebook TransCoder详解](https:\u002F\u002Fmedium.com\u002Fwhat-is-artificial-intelligence\u002Fthis-ai-translates-code-from-a-programming-language-to-another-facebook-transcoder-explained-3017d052f4fd) - 简短文章\n* [无监督编程语言翻译](https:\u002F\u002Farxiv.org\u002Fabs\u002F2006.03511) - 论文\n* [点击此处获取TransCoder代码](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002FTransCoder?utm_source=catalyzex.com) - 代码\n\n\n## PIFuHD：用于高分辨率人体三维数字化的多级像素对齐隐式函数 [6]\u003Ca name=\"6\">\u003C\u002Fa>\n这款AI可以根据2D图像生成人物的高分辨率3D重建！它只需要一张你的单张照片，就能生成一个与你本人几乎一模一样的3D虚拟形象，甚至连背面都栩栩如生！\n\n* 简短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002Fbtyo32w.png)](https:\u002F\u002Fyoutu.be\u002FajWtdm05-6g)\n* [AI根据2D图像生成人物高分辨率3D重建 | PIFuHD简介](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Fai-generates-3d-high-resolution-reconstructions-of-people-from-2d-images-introduction-to-pifuhd-d4aa515a482a) - 简短文章\n* [PIFuHD：用于高分辨率人体三维数字化的多级像素对齐隐式函数](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2004.00452.pdf) - 论文\n* [点击此处获取PiFuHD代码](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fpifuhd) - 代码\n\n\n## 用于视觉特效的高分辨率神经面部替换 [7]\u003Ca name=\"7\">\u003C\u002Fa>\n迪士尼的研究人员在同名论文中开发了一种用于视觉特效的新型高分辨率面部替换算法。该算法能够在百万像素级别上渲染出照片般逼真的效果。作为迪士尼的团队，他们无疑是从事这项工作的最佳人选。他们的目标是在保持演员表演风格的同时，将目标演员的脸部替换成源演员的脸部。这极具挑战性，但在许多情况下都非常有用，比如改变角色年龄、演员无法到场，甚至是涉及主演员执行起来过于危险的特技场景时。而现有的方法通常需要专业人员逐帧进行动画制作和后期处理。\n\n* 简短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002FGFTQVfY.png)](https:\u002F\u002Fyoutu.be\u002FEzyhA46DQWA)\n* [迪士尼新款高分辨率面部替换算法 | 新型2020年面部替换技术解析](https:\u002F\u002Fmedium.com\u002Fwhat-is-artificial-intelligence\u002Fdisneys-new-high-resolution-face-swapping-algorithm-new-2020-face-swap-technology-explained-da7dc8caa2f2) - 简短文章\n* [用于视觉特效的高分辨率神经面部替换](https:\u002F\u002Fstudios.disneyresearch.com\u002F2020\u002F06\u002F29\u002Fhigh-resolution-neural-face-swapping-for-visual-effects\u002F) - 论文\n\n\n## 用于深度图像操纵的交换自编码器 [8]\u003Ca name=\"8\">\u003C\u002Fa>\n这项新技术可以在完全无监督训练的情况下，以逼真的方式改变任何图片的纹理！其效果甚至优于GANs，同时速度也快得多！它甚至可以用来制作深度伪造内容！\n\n* 简短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002F1RLjXVm.png)](https:\u002F\u002Fyoutu.be\u002FhPR4cRzQY0s)\n* [纹理交换AI在图像操纵方面超越GANs！](https:\u002F\u002Fmedium.com\u002Fwhat-is-artificial-intelligence\u002Ftexture-swapping-ai-beats-gans-for-image-manipulation-e05700782183) - 简短文章\n* [用于深度图像操纵的交换自编码器](https:\u002F\u002Farxiv.org\u002Fabs\u002F2007.00653) - 论文\n* [点击此处获取交换自编码器代码](https:\u002F\u002Fgithub.com\u002Frosinality\u002Fswapping-autoencoder-pytorch?utm_source=catalyzex.com) - 代码\n\n## GPT-3：语言模型是少样本学习者 [9]\u003Ca name=\"9\">\u003C\u002Fa>\n当前最先进的自然语言处理系统在跨任务泛化方面存在困难。它们通常需要在包含数千个示例的数据集上进行微调，而人类只需看到几个例子就能完成新的语言任务。GPT-3 的设计目标正是为了提升语言模型的任务无关性。\n\n* 简短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002FCqm6FIs.png)](https:\u002F\u002Fyoutu.be\u002FgDDnTZchKec)\n* [GPT-3 真的能帮助你和你的公司吗？](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Fcan-gpt-3-really-help-you-and-your-company-84dac3c5b58a) - 简短阅读\n* [语言模型是少样本学习者](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2005.14165.pdf) - 论文原文\n* [点击此处前往 GPT-3 的 GitHub 页面](https:\u002F\u002Fgithub.com\u002Fopenai\u002Fgpt-3) - GitHub 仓库\n\n\n## 针对视频修复的学习联合时空变换 [10]\u003Ca name=\"10\">\u003C\u002Fa>\n这款 AI 能够填补被移除的运动物体背后的缺失像素，并以远超现有最先进方法的精度和更低的模糊度重建整段视频！\n\n* 简短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002FpKAD8Zu.png)](https:\u002F\u002Fyoutu.be\u002FMAxMYGoN5U0)\n* [这款 AI 可以处理视频并填补物体背后的缺失像素！](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Fthis-ai-takes-a-video-and-fills-the-missing-pixels-behind-an-object-video-inpainting-9be38e141f46) - 简短阅读\n* [针对视频修复的学习联合时空变换](https:\u002F\u002Farxiv.org\u002Fabs\u002F2007.10247) - 论文原文\n* [点击此处获取该视频修复代码](https:\u002F\u002Fgithub.com\u002Fresearchmm\u002FSTTN?utm_source=catalyzex.com) - 代码\n\n\n## Image GPT — 基于像素的生成式预训练 [11]\u003Ca name=\"11\">\u003C\u002Fa>\n像 Gmail 中使用的优秀 AI 一样，能够生成连贯的文本并补全你的语句。这款 AI 则运用相同原理来完成图像的补全！整个过程采用无监督训练，完全无需标注数据！\n\n* 简短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002F4RKhkL0.png)](https:\u002F\u002Fyoutu.be\u002FFwXQ568_io0)\n* [这款 AI 可以利用 GPT 模型生成图片的另一半内容](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Fthis-ai-can-generate-the-pixels-of-half-of-a-picture-from-nothing-using-a-nlp-model-7d7ba14b5522) - 简短阅读\n* [Image GPT — 基于像素的生成式预训练](https:\u002F\u002Fopenai.com\u002Fblog\u002Fimage-gpt\u002F) - 论文原文\n* [点击此处获取 OpenAI 的 Image GPT 代码](https:\u002F\u002Fgithub.com\u002Fopenai\u002Fimage-gpt) - 代码\n\n\n## 基于白盒卡通表示的学习卡通化 [12]\u003Ca name=\"12\">\u003C\u002Fa>\n这款 AI 可以将你输入的任何图片或视频按照你想要的卡通风格进行卡通化处理！让我们来看看它是如何做到的，以及一些令人惊叹的案例。你甚至可以像我一样，在他们搭建的网站上亲自试一试！\n\n* 简短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002FSTy8f51.png)](https:\u002F\u002Fyoutu.be\u002FGZVsONq3qtg)\n* [这款 AI 可以将你输入的任何图片或视频进行卡通化！论文介绍及结果示例](https:\u002F\u002Fmedium.com\u002Fwhat-is-artificial-intelligence\u002Fthis-ai-can-cartoonize-any-picture-or-video-you-feed-it-paper-introduction-results-examples-d7e400d8c3e8) - 简短阅读\n* [基于白盒卡通表示的学习卡通化](https:\u002F\u002Fsystemerrorwang.github.io\u002FWhite-box-Cartoonization\u002Fpaper\u002F06791.pdf) - 论文原文\n* [点击此处获取卡通化代码](https:\u002F\u002Fgithub.com\u002FSystemErrorWang\u002FWhite-box-Cartoonization) - 代码\n\n\n## FreezeG：冻结判别器——一种用于微调 GAN 的简单基线 [13]\u003Ca name=\"13\">\u003C\u002Fa>\n这款人脸生成模型能够将普通的人脸照片转换为独特的风格，例如李明宪的卡通风格、辛普森一家、艺术风格，甚至是狗狗的形象！这项新技术的最大优点在于其极其简单，同时显著优于以往在 GAN 中使用的技术。\n\n* 简短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002FgjSAMI7.png)](https:\u002F\u002Fyoutu.be\u002FRvPUVniQiuw)\n* [这款人脸生成模型可将真实人脸照片转换为独特的卡通风格](https:\u002F\u002Fmedium.com\u002Fwhat-is-artificial-intelligence\u002Fthis-face-generating-model-transfers-real-face-photographs-into-distinctive-cartoon-styles-33dde907737a) - 简短阅读\n* [冻结判别器：一种用于微调 GAN 的简单基线](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2002.10964.pdf) - 论文原文\n* [点击此处获取 FreezeG 代码](https:\u002F\u002Fgithub.com\u002Fsangwoomo\u002FfreezeD?utm_source=catalyzex.com) - 代码\n\n\n## 基于单张图像的人体神经网络重渲染 [14]\u003Ca name=\"14\">\u003C\u002Fa>\n该算法将人体姿态和形状表示为参数化网格，仅需一张图像即可重建，并轻松调整姿势。给定一张人物图像，它能够根据另一张输入图像中的服装信息，生成该人物在不同姿势或穿着不同服饰的合成图像。\n\n* 简短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002FpsuEw4g.png)](https:\u002F\u002Fyoutu.be\u002FE7fGsSNKMc4)\n* [利用 AI 在照片之间转移服装！仅需一张图像！](https:\u002F\u002Fmedium.com\u002Fdataseries\u002Ftransfer-clothes-between-photos-using-ai-from-a-single-image-4430a291afd7) - 简短阅读\n* [基于单张图像的人体神经网络重渲染](http:\u002F\u002Fgvv.mpi-inf.mpg.de\u002Fprojects\u002FNHRR\u002Fdata\u002F1415.pdf) - 论文原文\n\n\n\n## I2L-MeshNet：从单张 RGB 图像中准确估计 3D 人体姿态与网格的图像到 Lixel 预测网络 [15]\u003Ca name=\"15\">\u003C\u002Fa>\n他们的目标是提出一种全新的技术，用于从单张 RGB 图像中估计 3D 人体姿态与网格。他们将其命名为 I2L-MeshNet。其中，I2L 代表“图像到 Lixel”。正如体素（voxel）是由体积和像素组合而成的三维空间量化单元一样，他们定义了 lixel，即由线和像素组成的单位，作为一维空间中的量化单元。他们的方法性能优于先前的方法，且代码已公开可用！\n\n* 简短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002FiEd7FeO.png)](https:\u002F\u002Fyoutu.be\u002FtDz2wTixcrI)\n* [从单张 RGB 图像中准确估计 3D 人体姿态与网格！代码公开可用！](https:\u002F\u002Fmedium.com\u002Fdataseries\u002Faccurate-3d-human-pose-and-mesh-estimation-from-a-single-rgb-image-with-code-publicly-avaibable-b7cc995bcf2a) - 简短阅读\n* [I2L-MeshNet：从单张 RGB 图像中准确估计 3D 人体姿态与网格的图像到 Lixel 预测网络](https:\u002F\u002Fwww.catalyzex.com\u002Fpaper\u002Farxiv:2008.03713?fbclid=IwAR1pQGBhIwO4gW4mVZm1UEtyPLyZInsLZMyq3EoANaWxGO0CZ00Sj3ViM7I) - 论文原文\n* [点击此处获取 I2L-MeshNet 代码](https:\u002F\u002Fgithub.com\u002Fmks0601\u002FI2L-MeshNet_RELEASE)\n\nhttps:\u002F\u002Fgithub.com\u002Fmks0601\u002FI2L-MeshNet_RELEASE\n\n## 超越导航图：连续环境中的视觉-语言导航 [16]\u003Ca name=\"16\">\u003C\u002Fa>\n语言引导的导航是一个被广泛研究且非常复杂的领域。对人类来说，穿过房间去拿放在床边梳妆台上的咖啡似乎很简单。然而，对于一个自主的人工智能系统——即利用深度学习执行任务的代理——这却是一项完全不同的挑战。\n\n* 简短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002FzsRG2lb.png)](https:\u002F\u002Fyoutu.be\u002FFw_RUlUjuN4)\n* [3D环境中的语言引导导航](https:\u002F\u002Fbecominghuman.ai\u002Flanguage-guided-navigation-in-a-3d-environment-e3cf4102fb89) - 简短阅读\n* [超越导航图：连续环境中的视觉-语言导航](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2004.02857.pdf) - 论文\n* [点击此处获取VLN-CE代码](https:\u002F\u002Fgithub.com\u002Fjacobkrantz\u002FVLN-CE) - 代码\n\n\n## RAFT：用于光流的循环全对场变换 [17]\u003Ca name=\"17\">\u003C\u002Fa>\n普林斯顿大学团队荣获ECCV 2020最佳论文奖。他们开发了一种全新的端到端可训练光流模型。该方法在多个数据集上均超越了当前最先进的架构精度，同时效率更高。他们甚至将其代码公开在GitHub上供所有人使用！\n\n* 简短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002FVdcyRAE.png)](https:\u002F\u002Fyoutu.be\u002FOSEuYBwOSGI)\n* [ECCV 2020最佳论文奖 | 光流的新架构](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Feccv-2020-best-paper-award-a-new-architecture-for-optical-flow-3298c8a40dc7) - 简短阅读\n* [RAFT：用于光流的循环全对场变换](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2003.12039.pdf) - 论文\n* [点击此处获取RAFT代码](https:\u002F\u002Fgithub.com\u002Fprinceton-vl\u002FRAFT) - 代码\n\n\n## 群众采样全景光函数 [18]\u003Ca name=\"18\">\u003C\u002Fa>\n他们利用互联网上游客的公开照片，成功重建了场景的多视角图像，并保留了逼真的阴影和光照效果！这一成果极大地推动了基于照片的真实感场景渲染技术的发展，其结果令人惊叹。\n\n* 简短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002FHk8XiOS.png)](https:\u002F\u002Fyoutu.be\u002FF_JqJNBvJ64)\n* [用互联网上的游客公开照片重建逼真场景！](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Freconstruct-photorealistic-scenes-from-tourists-public-photos-on-the-internet-bb9ad39c96f3) - 简短阅读\n* [群众采样全景光函数](https:\u002F\u002Fresearch.cs.cornell.edu\u002Fcrowdplenoptic\u002F) - 论文\n* [点击此处获取Crowdsampling代码](https:\u002F\u002Fgithub.com\u002Fzhengqili\u002FCrowdsampling-the-Plenoptic-Function) - 代码\n\n\n## 基于深度潜在空间翻译的老照片修复 [19]\u003Ca name=\"19\">\u003C\u002Fa>\n想象一下，你祖母18岁时的那些泛黄、褶皱甚至破损的照片，如今能以高清画质呈现，且没有任何瑕疵。这就是老照片修复技术。而这篇论文则开创了一种全新的深度学习方法来解决这一问题。\n\n* 简短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002Fcnds8hi.png)](https:\u002F\u002Fyoutu.be\u002FQUmrIpl0afQ)\n* [利用深度学习进行老照片修复](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Fold-photo-restoration-using-deep-learning-47d4ab1bdc4d) - 简短阅读\n* [基于深度潜在空间翻译的老照片修复](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2009.07047.pdf) - 论文\n* [点击此处获取老照片修复代码](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FBringing-Old-Photos-Back-to-Life?utm_source=catalyzex.com) - 代码\n\n\n## 可审计自主性的神经回路策略 [20]\u003Ca name=\"20\">\u003C\u002Fa>\n来自奥地利科学技术研究所和麻省理工学院的研究人员成功地使用一种基于微小动物（如线虫）大脑的人工智能系统训练了一辆自动驾驶汽车。与Inception、ResNet或VGG等流行的深度神经网络所需的数百万个神经元相比，他们的系统仅用少数几个神经元就能控制车辆。这个网络仅需19个控制神经元、总计75,000个参数，便能完全操控一辆汽车，而无需数百万个参数！\n\n* 简短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002F86EtfbT.png)](https:\u002F\u002Fyoutu.be\u002FwAa358pNDkQ)\n* [新型仿脑智能系统仅用19个控制神经元即可驾驶汽车！](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Fa-new-brain-inspired-intelligent-system-drives-a-car-using-only-19-control-neurons-1ed127107db9) - 简短阅读\n* [可审计自主性的神经回路策略](https:\u002F\u002Fwww.nature.com\u002Farticles\u002Fs42256-020-00237-3.epdf?sharing_token=xHsXBg2SoR9l8XdbXeGSqtRgN0jAjWel9jnR3ZoTv0PbS_e49wmlSXvnXIRQ7wyir5MOFK7XBfQ8sxCtVjc7zD1lWeQB5kHoRr4BAmDEU0_1-UN5qHD5nXYVQyq5BrRV_tFa3_FZjs4LBHt-yebsG4eQcOnNsG4BenK3CmBRFLk%3D) - 论文\n* [点击此处获取NCP代码](https:\u002F\u002Fgithub.com\u002Fmlech26l\u002Fkeras-ncp) - 代码\n\n\n## 生命周期年龄转换合成 [21]\u003Ca name=\"21\">\u003C\u002Fa>\nAdobe Research的研究团队开发了一项基于单张照片的全新年龄转换合成技术。只需提供一张照片，该技术便可生成从幼年到老年的多张不同年龄段的图像。\n\n* 简短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002FUW1wTRY.png)](https:\u002F\u002Fyoutu.be\u002FxA-3cWJ4Y9Q)\n* [生成自己年轻和年老时的模样！](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Fgenerate-younger-older-versions-of-yourself-1a87f970f3da) - 简短阅读\n* [生命周期年龄转换合成](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2003.09764.pdf) - 论文\n* [点击此处获取生命周期年龄转换合成代码](https:\u002F\u002Fgithub.com\u002Froyorel\u002FLifespan_Age_Transformation_Synthesis) - 代码\n\n\n## DeOldify [22]\u003Ca name=\"22\">\u003C\u002Fa>\nDeOldify是一种为老旧黑白照片甚至电影胶片着色并修复的技术。它由Jason Antic一人开发，至今仍在持续更新。目前，它是黑白图像着色领域的最先进技术，所有代码均为开源。我们稍后会再详细讨论这一点。\n\n* 简短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002FjGOxFl1.png)](https:\u002F\u002Fyoutu.be\u002F1EP_Lq04h4M)\n* [这款AI可将你的黑白照片完整地转化为逼真彩色图像！（DeOldify）](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Fthis-ai-can-colorize-your-black-white-photos-with-full-photorealistic-renders-deoldify-bf1eed5cb02a) - 简短阅读\n* [点击此处获取DeOldify代码](https:\u002F\u002Fgithub.com\u002Fjantic\u002FDeOldify) - 代码\n\n## COOT：用于视频-文本表示学习的协作式分层Transformer [23]\u003Ca name=\"23\">\u003C\u002Fa>  \n顾名思义，该方法利用Transformer架构，以视频及其通用描述作为输入，为视频中的每一帧序列生成准确的文本描述。\n\n* 短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002FqZDiMZh.png)](https:\u002F\u002Fyoutu.be\u002F5TRp5SuEtoY)\n* [使用深度学习和Transformer将视频转换为文本描述 | COOT](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Fvideo-to-text-description-using-deep-learning-and-transformers-coot-e05b8d0db110) - 简短阅读\n* [COOT：用于视频-文本表示学习的协作式分层Transformer](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2011.00597.pdf) - 论文原文\n* [点击此处获取COOT代码](https:\u002F\u002Fgithub.com\u002Fgingsi\u002Fcoot-videotext) - 代码\n\n\n## 风格化神经绘画 [24]\u003Ca name=\"24\">\u003C\u002Fa>  \n这种图像到绘画的转换方法采用一种新颖的方式，在不依赖任何GAN架构的情况下，模拟真实画家创作多种风格的作品，这与当前所有最先进方法的做法截然不同！\n\n* 短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002F6Bespnd.png)](https:\u002F\u002Fyoutu.be\u002FdzJStceOaQs)\n* [基于风格迁移的图像到绘画转换](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Fimage-to-painting-translation-with-style-transfer-508618596409) - 简短阅读\n* [风格化神经绘画](https:\u002F\u002Farxiv.org\u002Fabs\u002F2011.08114) - 论文原文\n* [点击此处获取风格化神经绘画代码](https:\u002F\u002Fgithub.com\u002Fjiupinjia\u002Fstylized-neural-painting) - 代码\n\n\n## 实时人像抠图真的需要绿幕吗？ [25]\u003Ca name=\"25\">\u003C\u002Fa>  \n人像抠图是一项极其有趣的任务，其目标是从图片中精确地提取出人物并去除背景。由于需要准确识别出人物轮廓，这项任务非常具有挑战性。在这篇文章中，我回顾了多年来使用的最佳技术，并介绍了一种于2020年11月29日发表的新方法。许多现有技术都基于基础的计算机视觉算法来实现这一目标，例如GrabCut算法，它速度快但精度不足。\n\n* 短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002FEXMOzI9.png)](https:\u002F\u002Fyoutu.be\u002FrUo0wuVyefU)\n* [无需绿幕的高质量背景去除](https:\u002F\u002Fmedium.com\u002Fdatadriveninvestor\u002Fhigh-quality-background-removal-without-green-screens-8e61c69de63) - 简短阅读\n* [实时人像抠图真的需要绿幕吗？](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2011.11961.pdf) - 论文原文\n* [点击此处获取MODNet代码](https:\u002F\u002Fgithub.com\u002FZHKKKe\u002FMODNet) - 代码\n\n\n## ADA：用有限数据训练生成对抗网络 [26]\u003Ca name=\"26\">\u003C\u002Fa>  \n借助NVIDIA开发的这一全新训练方法，仅需十分之一数量的图像即可训练出强大的生成模型！这使得许多无法获取大量图像的应用成为可能！\n\n* 短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002Fd9x33BO.png)](https:\u002F\u002Fyoutu.be\u002F9fVNtVr_luc)\n* [针对有限数据应用的GAN训练突破及NVIDIA新计划！NVIDIA研究](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Fgan-training-breakthrough-for-limited-data-applications-new-nvidia-program-nvidia-research-3652c4c172e6) - 简短阅读\n* [用有限数据训练生成对抗网络](https:\u002F\u002Farxiv.org\u002Fabs\u002F2006.06676) - 论文原文\n* [点击此处获取ADA代码](https:\u002F\u002Fgithub.com\u002FNVlabs\u002Fstylegan2-ada) - 代码\n\n\n## 利用立方体球面上的深度卷积神经网络改进数据驱动的全球天气预报 [27]\u003Ca name=\"27\">\u003C\u002Fa>  \n借助NVIDIA开发的这一全新训练方法，仅需十分之一数量的图像即可训练出强大的生成模型！这使得许多无法获取大量图像的应用成为可能！\n\n* 短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002F02FfFOg.png)](https:\u002F\u002Fyoutu.be\u002FC7dNU298A0A)\n* [人工智能正在预测更快速、更准确的天气预报](https:\u002F\u002Fmedium.com\u002Ftowards-artificial-intelligence\u002Fai-is-predicting-faster-and-more-accurate-weather-forecasts-5d99a1d9c4f) - 简短阅读\n* [利用立方体球面上的深度卷积神经网络改进数据驱动的全球天气预报](https:\u002F\u002Fagupubs.onlinelibrary.wiley.com\u002Fdoi\u002F10.1029\u002F2020MS002109) - 论文原文\n* [点击此处获取天气预报代码](https:\u002F\u002Fgithub.com\u002Fjweyn\u002FDLWP-CS) - 代码\n\n\n## NeRV：用于重照明和视图合成的神经反射率与可见性场 [28]\u003Ca name=\"28\">\u003C\u002Fa>  \n这一新方法能够生成完整的三维场景，并可自由控制场景的光照条件。同时，它的计算开销极低，效果却远超以往的方法。\n\n* 短视频讲解：\n\n[![观看视频](https:\u002F\u002Fimgur.com\u002Fw3QnN6g.png)](https:\u002F\u002Fyoutu.be\u002FZkaTyBvS2w4)\n* [从一组输入图像生成任意光照条件下的完整3D场景](https:\u002F\u002Fmedium.com\u002Fwhat-is-artificial-intelligence\u002Fgenerate-a-complete-3d-scene-under-arbitrary-lighting-conditions-from-a-set-of-input-images-9d2fbce63243) - 简短阅读\n* [NeRV：用于重照明和视图合成的神经反射率与可见性场](https:\u002F\u002Farxiv.org\u002Fabs\u002F2012.03927) - 论文原文\n* [点击此处获取NeRV代码 *(即将发布)*](https:\u002F\u002Fpeople.eecs.berkeley.edu\u002F~pratul\u002Fnerv\u002F) - 代码\n\n---\n\n🆕 查看 [2021年的项目合集](https:\u002F\u002Fgithub.com\u002Flouisfb01\u002Fbest_AI_papers_2021)！\n\n***如果你分享这份列表，请在Twitter上@Whats_AI或LinkedIn上@Louis (What's AI) Bouchard标记我！***\n\n---\n\n## 论文参考文献\u003Ca name=\"references\">\u003C\u002Fa>\n\n[1] A. Bochkovskiy、C.-Y. Wang 和 H.-Y. M. Liao，《Yolov4：目标检测的最佳速度与精度》，2020 年。arXiv:2004.10934 [cs.CV]。\n\n[2] S.-Y. Chen、W. Su、L. Gao、S. Xia 和 H. Fu，《DeepFaceDrawing：基于草图的深度人脸图像生成》，ACM 图形学汇刊（ACM SIGGRAPH 2020 会议论文集），第 39 卷，第 4 期，72:1–72:16，2020 年。\n\n[3] S. W. Kim、Y. Zhou、J. Philion、A. Torralba 和 S. Fidler，《利用 GameGAN 学习模拟动态环境》，IEEE 计算机视觉与模式识别会议（CVPR），2020 年 6 月。\n\n[4] S. Menon、A. Damian、S. Hu、N. Ravi 和 C. Rudin，《Pulse：基于生成模型潜在空间探索的自监督照片超分辨率》，2020 年。arXiv:2003.03808 [cs.CV]。\n\n[5] M.-A. Lachaux、B. Roziere、L. Chanussot 和 G. Lample，《编程语言的无监督翻译》，2020 年。arXiv:2006.03511 [cs.CL]。\n\n[6] S. Saito、T. Simon、J. Saragih 和 H. Joo，《Pifuhd：用于高分辨率人体数字化的多层级像素对齐隐式函数》，2020 年。arXiv:2004.00452 [cs.CV]。\n\n[7] J. Naruniec、L. Helminger、C. Schroers 和 R. Weber，《用于视觉特效的高分辨率神经换脸技术》，计算机图形学论坛，第 39 卷，第 173–184 页，2020 年 7 月。doi:10.1111\u002Fcgf.14062。\n\n[8] T. Park、J.-Y. Zhu、O. Wang、J. Lu、E. Shechtman、A. A. Efros 和 R. Zhang，《用于深度图像操作的交换自编码器》，2020 年。arXiv:2007.00653 [cs.CV]。\n\n[9] T. B. Brown、B. Mann、N. Ryder、M. Subbiah、J. Kaplan、P. Dhariwal、A. Neelakantan、P. Shyam、G. Sastry、A. Askell、S. Agarwal、A. Herbert-Voss、G. Krueger、T. Henighan、R. Child、A. Ramesh、D. M. Ziegler、J. Wu、C. Winter、C. Hesse、M. Chen、E. Sigler、M. Litwin、S. Gray、B. Chess、J. Clark、C. Berner、S. McCandlish、A. Radford、I. Sutskever 和 D. Amodei，《语言模型是少样本学习者》，2020 年。arXiv:2005.14165 [cs.CL]。\n\n[10] Y. Zeng、J. Fu 和 H. Chao，《用于视频修复的联合时空变换学习》，2020 年。arXiv:2007.10247 [cs.CV]。\n\n[11] M. Chen、A. Radford、R. Child、J. Wu、H. Jun、D. Luan 和 I. Sutskever，《从像素进行生成式预训练》，第 37 届国际机器学习大会论文集，H. D. III 和 A. Singh 主编，机器学习研究论文集系列，第 119 卷，虚拟会议：PMLR，2020 年 7 月 13–18 日，第 1691–1703 页。[在线]。网址：http:\u002F\u002Fproceedings.mlr.press\u002Fv119\u002Fchen20s.html。\n\n[12] Xinrui Wang 和 Jinze Yu，《使用白盒卡通表示学习卡通化》，IEEE 计算机视觉与模式识别会议，2020 年 6 月。\n\n[13] S. Mo、M. Cho 和 J. Shin，《冻结判别器：微调 GAN 的简单基线》，2020 年。arXiv:2002.10964 [cs.CV]。\n\n[14] K. Sarkar、D. Mehta、W. Xu、V. Golyanik 和 C. Theobalt，《单张图像中的人体神经重渲染》，欧洲计算机视觉会议（ECCV），2020 年。\n\n[15] G. Moon 和 K. M. Lee，《I2l-meshnet：用于从单张 RGB 图像准确估计 3D 人体姿态和网格的图像到体素预测网络》，欧洲计算机视觉会议（ECCV），2020 年。\n\n[16] J. Krantz、E. Wijmans、A. Majumdar、D. Batra 和 S. Lee，《超越导航图：连续环境中的视觉-语言导航》，2020 年。arXiv:2004.02857 [cs.CV]。\n\n[17] Z. Teed 和 J. Deng，《RAFT：用于光流的循环全对场变换》，2020 年。arXiv:2003.12039 [cs.CV]。\n\n[18] Z. Li、W. Xian、A. Davis 和 N. Snavely，《全景光场函数的众包采样》，欧洲计算机视觉会议（ECCV）论文集，2020 年。\n\n[19] Z. Wan、B. Zhang、D. Chen、P. Zhang、D. Chen、J. Liao 和 F. Wen，《基于深度潜在空间转换的老照片修复》，2020 年。arXiv:2009.07047 [cs.CV]。\n\n[20] Lechner, M., Hasani, R., Amini, A. 等人。《可审计自主性的神经回路策略》。自然机器智能，第 2 卷，第 642–652 页（2020 年）。https:\u002F\u002Fdoi.org\u002F10.1038\u002Fs42256-020-00237-3。\n\n[21] R. Or-El、S. Sengupta、O. Fried、E. Shechtman 和 I. Kemelmacher-Shlizerman，《寿命年龄变换合成》，欧洲计算机视觉会议（ECCV）论文集，2020 年。\n\n[22] Jason Antic，DeOldify 的创建者，https:\u002F\u002Fgithub.com\u002Fjantic\u002FDeOldify。\n\n[23] S. Ging、M. Zolfaghari、H. Pirsiavash 和 T. Brox，《COOT：用于视频文本表示学习的协作式层次化变换器》，神经信息处理系统会议，2020 年。\n\n[24] Z. Zou、T. Shi、S. Qiu、Y. Yuan 和 Z. Shi，《风格化的神经绘画》，2020 年。arXiv:2011.08114 [cs.CV]。\n\n[25] Z. Ke、K. Li、Y. Zhou、Q. Wu、X. Mao、Q. Yan 和 R. W. Lau，《实时人像抠图真的需要绿幕吗？》ArXiv，卷 abs\u002F2011.11961，2020 年。\n\n[26] T. Karras、M. Aittala、J. Hellsten、S. Laine、J. Lehtinen 和 T. Aila，《有限数据下的生成对抗网络训练》，2020 年。arXiv:2006.06676 [cs.CV]。\n\n[27] J. A. Weyn、D. R. Durran 和 R. Caruana，《利用立方球面上的深度卷积神经网络改进数据驱动的全球天气预报》，地球系统建模进展期刊，第 12 卷，第 9 期，2020 年 9 月，issn：1942–2466。doi：10.1029\u002F2020ms002109。\n\n[28] P. P. Srinivasan、B. Deng、X. Zhang、M. Tancik、B. Mildenhall 和 J. T. Barron，《NERV：用于重新打光和视图合成的神经反射率与可见性场》，2020 年发表于 arXiv。","# Best_AI_paper_2020 快速上手指南\n\n**项目简介**：`Best_AI_paper_2020` 并非一个单一的 AI 模型或工具库，而是一个由社区维护的**2020 年度突破性 AI 论文精选清单**。它汇集了当年最重要的研究成果（如 YOLOv4, GPT-3, GameGAN 等），并为每篇论文提供了视频解读、深度文章链接以及对应的开源代码仓库地址。\n\n本指南将指导你如何利用该清单查找资源，并快速运行其中提到的具体算法代码。\n\n## 1. 环境准备\n\n由于清单中包含多个不同的独立项目（涵盖计算机视觉、自然语言处理、强化学习等领域），没有统一的“一键安装”包。你需要根据感兴趣的具体论文准备相应的环境。\n\n### 系统要求\n*   **操作系统**: Linux (推荐 Ubuntu 18.04\u002F20.04), macOS, 或 Windows (建议使用 WSL2)\n*   **硬件**: 大多数深度学习模型需要 NVIDIA GPU (建议显存 8GB 以上，部分大模型如 GPT-3 复现版需要更高配置)\n*   **软件依赖**:\n    *   Python 3.6+ (具体版本视各子项目而定)\n    *   Git\n    *   CUDA Toolkit (版本需与各项目要求的 PyTorch\u002FTensorFlow 版本匹配)\n\n### 前置依赖\n在克隆具体代码前，建议安装通用的基础包管理工具。国内开发者推荐使用清华源或阿里源加速。\n\n```bash\n# 配置 pip 使用清华大学镜像源 (永久生效)\npip config set global.index-url https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n\n# 安装基础依赖 (示例，具体以各子项目 requirements.txt 为准)\npip install torch torchvision torchaudio --index-url https:\u002F\u002Fdownload.pytorch.org\u002Fwhl\u002Fcu118\npip install gitpython matplotlib opencv-python\n```\n\n## 2. 安装步骤\n\n本项目本身是一个文档索引，**安装步骤针对的是清单中的具体算法**。以下以清单中热门的 **YOLOv4** 和 **PULSE** 为例演示如何获取代码。\n\n### 步骤一：浏览清单选择目标\n访问项目主页或查看 README 中的 \"The Full List\"，找到你感兴趣的论文标题及对应的 \"The Code\" 链接。\n\n### 步骤二：克隆具体代码仓库\n不要克隆 `Best_AI_paper_2020` 主仓库来运行模型，而是直接克隆对应论文的官方实现仓库。\n\n**示例 A: 安装 YOLOv4 (目标检测)**\n```bash\n# 克隆 YOLOv4 官方代码库\ngit clone https:\u002F\u002Fgithub.com\u002FAlexeyAB\u002Fdarknet.git\ncd darknet\n```\n\n**示例 B: 安装 PULSE (图像超分辨率)**\n```bash\n# 克隆 PULSE 官方代码库\ngit clone https:\u002F\u002Fgithub.com\u002Fadamian98\u002Fpulse.git\ncd pulse\n```\n\n### 步骤三：安装项目特定依赖\n进入目录后，通常需要根据该项目提供的 `requirements.txt` 安装依赖。\n\n```bash\n# 以 PULSE 为例\npip install -r requirements.txt\n```\n*注意：部分老旧项目可能需要特定的 CUDA 版本或手动编译算子，请参照各子仓库的 README 说明。*\n\n## 3. 基本使用\n\n使用方法完全取决于你选择的具体论文项目。以下是基于上述两个示例的最简运行方式。\n\n### 场景 A: 运行 YOLOv4 进行物体检测\nYOLOv4 (Darknet 版本) 通常需要编译后使用命令行调用。\n\n```bash\n# 1. 修改 Makefile 启用 GPU (如果拥有 NVIDIA 显卡)\n# 编辑 Makefile，将 GPU=0 改为 GPU=1，CUDNN=0 改为 CUDNN=1\nsed -i 's\u002FGPU=0\u002FGPU=1\u002F' Makefile\nsed -i 's\u002FCUDNN=0\u002FCUDNN=1\u002F' Makefile\n\n# 2. 编译项目\nmake -j4\n\n# 3. 下载权重文件 (需手动从项目说明中获取 .weights 文件)\n# wget [权重文件链接]\n\n# 4. 执行检测 (示例命令)\n.\u002Fdarknet detector test data\u002Fcoco.data cfg\u002Fyolov4.cfg yolov4.weights data\u002Fdog.jpg\n```\n\n### 场景 B: 运行 PULSE 进行人脸超分\nPULSE 通常提供 Python 脚本直接运行。\n\n```bash\n# 确保已下载预训练模型并放入指定文件夹 (参考子仓库 README)\n\n# 运行超分脚本 (将低清图 upsampling.png 转换为高清输出)\npython run_pulse.py --input_path .\u002Finputs\u002Fupsampling.png --output_dir .\u002Foutputs\n```\n\n### 通用建议\n对于清单中的其他项目（如 `GameGAN`, `DeepFaceDrawing`, `TransCoder` 等）：\n1. 点击 README 中对应的 **\"Click here for the ... code\"** 链接。\n2. 进入该子项目的 GitHub 页面。\n3. 严格遵循该子项目 `README.md` 中的 \"Usage\" 或 \"Quick Start\" 章节。\n4. 许多项目提供了 **Google Colab** 链接，国内用户若无法访问，可尝试将 `.ipynb` 文件下载后在本地 Jupyter Lab 或 Kaggle Kernel 中运行。","某计算机视觉团队的算法工程师正致力于提升人像修复系统的超分辨率效果，急需寻找 2020 年最具突破性的生成模型作为技术基线。\n\n### 没有 Best_AI_paper_2020 时\n- **检索效率低下**：需要在 arXiv、GitHub 和各类博客间反复切换搜索，耗费数天才能拼凑出年度重要论文清单，极易遗漏关键成果如 PULSE 或 DeepFaceDrawing。\n- **理解门槛过高**：面对晦涩的学术摘要和复杂的数学公式，难以快速判断论文的核心创新点是否匹配当前业务需求，阅读成本极高。\n- **复现周期漫长**：即使找到了论文，往往需要花费大量时间寻找官方代码或等待第三方实现，缺乏直接可运行的代码链接导致验证想法的周期被大幅拉长。\n- **缺乏直观认知**：仅凭文字描述无法直观评估模型在人脸细节重建上的实际表现，必须亲自跑通代码才能看到效果，试错成本巨大。\n\n### 使用 Best_AI_paper_2020 后\n- **一站式获取精华**：直接按发布日期查阅 curated list，几分钟内即可锁定 YOLOv4、PULSE 等年度顶级论文，确保技术选型紧跟前沿且无遗漏。\n- **视频辅助决策**：通过集成的清晰视频解说，快速理解论文的核心逻辑与应用场景，无需深读全文即可判断其是否适合解决人像修复难题。\n- **代码即刻验证**：每个条目均附带深度文章链接和官方代码仓库，工程师可立即克隆代码进行本地测试，将概念验证（POC）的时间从周缩短至小时级。\n- **效果直观预览**：借助视频演示和案例展示，在编写任何代码前就能直观看到模型在素描转照片或低清图高清化上的惊人效果，极大提升了选型信心。\n\nBest_AI_paper_2020 通过将分散的顶会论文转化为“视频讲解 + 深度文章 + 可用代码”的结构化资源，让研发团队能以最低成本快速落地最前沿的 AI 技术。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Flouisfb01_Best_AI_paper_2020_7a81e884.png","louisfb01","Louis-François Bouchard","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Flouisfb01_d681bed6.png","Making AI accessible on YouTube, Newsletter, Spotify, Apple podcasts.\r\n\r\nCo-Founder at Towards AI.\r\nex-PhD student at Mila, Polytechnique Montréal","Mila\u002FPolytechnique Montréal & @towardsai","montreal",null,"Whats_AI","https:\u002F\u002Fwww.louisbouchard.ai\u002F","https:\u002F\u002Fgithub.com\u002Flouisfb01",2244,239,"2026-04-03T20:52:17","MIT",1,"","未说明",{"notes":90,"python":88,"dependencies":91},"该仓库并非单一可运行的 AI 工具，而是 2020 年优秀 AI 论文的精选列表。列表中每个项目（如 YOLOv4, GameGAN, PULSE 等）都有独立的代码仓库链接和特定的环境需求。部分项目基于 PyTorch，README 中提到了使用 Weights & Biases (W&B) 进行实验跟踪的示例，但未提供统一的整体运行环境配置。用户需根据具体感兴趣的论文前往其对应的源代码仓库查看详细的安装指南。",[],[14,13,15],[94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113],"ai","papers","2020","list","machinelearning","deeplearning","artificialintelligence","artificial-intelligence","deep-learning","machine-learning","state-of-the-art","state-of-art","state-of-the-art-models","state-of-art-general-segmentation","computer-vision","paper","paper-references","sota","sota-technique","deep-neural-networks","2026-03-27T02:49:30.150509","2026-04-13T04:24:37.174422",[],[]]