[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-IgorSusmelj--pytorch-styleguide":3,"tool-IgorSusmelj--pytorch-styleguide":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",159636,2,"2026-04-17T23:33:34",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",108322,"2026-04-10T11:39:34",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[52,13,15,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":32,"last_commit_at":59,"category_tags":60,"status":17},4721,"markitdown","microsoft\u002Fmarkitdown","MarkItDown 是一款由微软 AutoGen 团队打造的轻量级 Python 工具，专为将各类文件高效转换为 Markdown 格式而设计。它支持 PDF、Word、Excel、PPT、图片（含 OCR）、音频（含语音转录）、HTML 乃至 YouTube 链接等多种格式的解析，能够精准提取文档中的标题、列表、表格和链接等关键结构信息。\n\n在人工智能应用日益普及的今天，大语言模型（LLM）虽擅长处理文本，却难以直接读取复杂的二进制办公文档。MarkItDown 恰好解决了这一痛点，它将非结构化或半结构化的文件转化为模型“原生理解”且 Token 效率极高的 Markdown 格式，成为连接本地文件与 AI 分析 pipeline 的理想桥梁。此外，它还提供了 MCP（模型上下文协议）服务器，可无缝集成到 Claude Desktop 等 LLM 应用中。\n\n这款工具特别适合开发者、数据科学家及 AI 研究人员使用，尤其是那些需要构建文档检索增强生成（RAG）系统、进行批量文本分析或希望让 AI 助手直接“阅读”本地文件的用户。虽然生成的内容也具备一定可读性，但其核心优势在于为机器",93400,"2026-04-06T19:52:38",[52,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":67,"readme_en":68,"readme_zh":69,"quickstart_zh":70,"use_case_zh":71,"hero_image_url":72,"owner_login":73,"owner_name":74,"owner_avatar_url":75,"owner_bio":76,"owner_company":77,"owner_location":78,"owner_email":79,"owner_twitter":80,"owner_website":81,"owner_url":82,"languages":83,"stars":88,"forks":89,"last_commit_at":90,"license":91,"difficulty_score":92,"env_os":93,"env_gpu":93,"env_ram":93,"env_deps":94,"category_tags":105,"github_topics":106,"view_count":32,"oss_zip_url":79,"oss_zip_packed_at":79,"status":17,"created_at":110,"updated_at":111,"faqs":112,"releases":113},8671,"IgorSusmelj\u002Fpytorch-styleguide","pytorch-styleguide","An unofficial styleguide and best practices summary for PyTorch","pytorch-styleguide 是一份非官方的 PyTorch 风格指南与最佳实践汇总，旨在帮助开发者编写更清晰、规范且高效的深度学习代码。它凝聚了作者团队在科研与创业项目中积累的一年多实战经验，系统梳理了从 Python 基础规范到 PyTorch 高级用法的完整流程。\n\n许多开发者在使用 PyTorch 时往往缺乏统一的代码规范，导致项目难以维护或协作效率低下。pytorch-styleguide 通过提供明确的命名约定、推荐的 Python 3.6+ 新特性（如类型提示与 f-string），以及 Google Python 风格指南的精简版，有效解决了代码风格混乱的问题。此外，它还详细对比了 Jupyter Notebook 与 Python 脚本的适用场景，并提供了 VS Code 和 PyCharm 连接远程服务器的配置教程，极大优化了本地开发、远程调试的工作流。\n\n这份指南特别适合从事深度学习的研究人员、算法工程师以及初创团队使用。其独特亮点在于不仅涵盖理论规范，还分享了来自 Lightly 项目的实用代码片段，包括自注意力机制、感知损失、谱归一化等常见模块的实现方","pytorch-styleguide 是一份非官方的 PyTorch 风格指南与最佳实践汇总，旨在帮助开发者编写更清晰、规范且高效的深度学习代码。它凝聚了作者团队在科研与创业项目中积累的一年多实战经验，系统梳理了从 Python 基础规范到 PyTorch 高级用法的完整流程。\n\n许多开发者在使用 PyTorch 时往往缺乏统一的代码规范，导致项目难以维护或协作效率低下。pytorch-styleguide 通过提供明确的命名约定、推荐的 Python 3.6+ 新特性（如类型提示与 f-string），以及 Google Python 风格指南的精简版，有效解决了代码风格混乱的问题。此外，它还详细对比了 Jupyter Notebook 与 Python 脚本的适用场景，并提供了 VS Code 和 PyCharm 连接远程服务器的配置教程，极大优化了本地开发、远程调试的工作流。\n\n这份指南特别适合从事深度学习的研究人员、算法工程师以及初创团队使用。其独特亮点在于不仅涵盖理论规范，还分享了来自 Lightly 项目的实用代码片段，包括自注意力机制、感知损失、谱归一化等常见模块的实现方案，并附带完整的 CIFAR-10 模型训练示例。无论你是希望提升代码质量的新手，还是寻求工作流优化的资深从业者，pytorch-styleguide 都能为你提供极具价值的参考。","# A PyTorch Tools, best practices & Styleguide\nThis is not an official style guide for PyTorch. This document summarizes best practices from more than a year of experience with deep learning using the PyTorch framework. Note that the learnings we share come mostly from a research and startup perspective.\n\nThis is an open project and other collaborators are highly welcomed to edit and improve the document.\n\nYou will find three main parts of this doc. First, a quick recap of best practices in Python, followed by some tips and recommendations using PyTorch. Finally, we share some insights and experiences using other frameworks which helped us generally improve our workflow.\n\n\n**Update 20.12.2020**\n\n- Added a full example training a model on cifar10\n- Add setup guide for using VS Code and the remote extension\n\n**Update 30.4.2019**\n>After so much positive feedback I also added a summary of commonly used building blocks from our projects at [Lightly](https:\u002F\u002Flightly.ai\u002F):\nYou will find building blocks for (Self-Attention, Perceptual Loss using VGG, Spectral Normalization, Adaptive Instance Normalization, ...)\n\u003Cbr>[Code Snippets for Losses, Layers and other building blocks](building_blocks.md)\n\n\n## We recommend using Python 3.6+\nFrom our experience we recommend using Python 3.6+ because of the following features which became very handy for clean and simple code:\n* [Support for typing since Python 3.6.](https:\u002F\u002Fmedium.com\u002F@ageitgey\u002Flearn-how-to-use-static-type-checking-in-python-3-6-in-10-minutes-12c86d72677b)\n* [Support of f strings since Python 3.6](https:\u002F\u002Frealpython.com\u002Fpython-f-strings\u002F)\n\n\n## Python Styleguide recap\nWe try to follow the Google Styleguide for Python.\nPlease refer to the well-documented  [style guide on python code provided by Google](https:\u002F\u002Fgithub.com\u002Fgoogle\u002Fstyleguide\u002Fblob\u002Fgh-pages\u002Fpyguide.md).\n\nWe provide here a summary of the most commonly used rules:\n\n### Naming Conventions\n*From 3.16.4*\n\n| Type | Convention | Example |\n|------|------------|---------|\n| Packages & Modules | lower_with_under | from **prefetch_generator** import BackgroundGenerator |\n| Classes | CapWords | class **DataLoader** |\n| Constants | CAPS_WITH_UNDER | **BATCH_SIZE=16** |\n| Instances | lower_with_under | **dataset** = Dataset |\n| Methods & Functions | lower_with_under() | def **visualize_tensor()** |\n| Variables | lower_with_under | **background_color='Blue'** |\n\n## IDEs\n\n### Code Editors\nIn general, we recommend the use of an IDE such as visual studio code or PyCharm. ~~Whereas VS Code provides syntax highlighting and autocompletion in a relatively lightweight editor PyCharm has lots of advanced features for working with remote clusters.~~\nVS Code has become very powerful with its fast growing ecosystem of extensions.\n\n#### Setting up Visual Studio Code with a Remote Machine\nMake sure you have the following extensions installed:\n\n- Python  (linting, autocompletion, syntax highlighting, code formatting)\n- Remote - SSH (to work with remote machines)\n\n1. Follow the guide here: https:\u002F\u002Fcode.visualstudio.com\u002Fdocs\u002Fremote\u002Fremote-overview\n\n\n#### Setting up PyCharm to work with a Remote Machine\n1. Login to your remote machine (AWS, Google etc.)\n2. Create a new folder and a new virtual environment\n3. In Pycharm (professional edition) in the project settings setup a remote interpreter\n4. Configure the remote python interpreter (path to venv on AWS, Google etc.)\n5. Configure the mapping of the code from your local machine to the remote machine\n\nIf set up properly this allows you to do the following:\n* Code on your local computer (notebook, desktop) wherever you want (offline, online)\n* Sync local code with your remote machine\n* Additional packages will be installed automatically on a remote machine\n* You don't need any dataset on your local machine\n* Run the code and debug on the remote machine as if it would be your local machine running the code\n\n\n## Jupyter Notebook vs Python Scripts\nIn general, we recommend to use jupyter notebooks for initial exploration\u002F playing around with new models and code.\nPython scripts should be used as soon as you want to train the model on a bigger dataset where also reproducibility is more important.\n\n**Our recommended workflow:**\n1. Start with a jupyter notebook\n2. Explore the data and models\n3. Build your classes\u002F methods inside cells of the notebook\n4. Move your code to python scripts\n5. Train\u002F deploy on server\n\n\n| **Jupyter Notebook** | **Python Scripts** |\n|----------------------|--------------------|\n| + Exploration | + Running longer jobs without interruption |\n| + Debugging | + Easy to track changes with git |\n| - Can become a huge file| - Debugging mostly means rerunning the whole script|\n| - Can be interrupted (don't use for long training) | |\n| - Prone to errors and become a mess | |\n\n\n## Libraries\n\nCommonly used libraries:\n\n| Name | Description | Used for |\n|------|-------------|----------|\n| [torch](https:\u002F\u002Fpytorch.org\u002F) | Base Framework for working with neural networks | creating tensors, networks and training them using backprop |\n| [torchvision](https:\u002F\u002Fpytorch.org\u002Fdocs\u002Fstable\u002Ftorchvision) | PyTorch computer vision modules | image data preprocessing, augmentation, postprocessing |\n| [Pillow (PIL)](https:\u002F\u002Fpillow.readthedocs.io\u002Fen\u002Fstable\u002F) | Python Imaging Library | Loading images and storing them |\n| [Numpy](https:\u002F\u002Fwww.numpy.org\u002F) | Package for scientific computing with Python | Data preprocessing & postprocessing |\n| [prefetch_generator](https:\u002F\u002Fpypi.org\u002Fproject\u002Fprefetch_generator\u002F) | Library for background processing | Loading next batch in background during computation |\n| [tqdm](https:\u002F\u002Fgithub.com\u002Ftqdm\u002Ftqdm) | Progress bar | Progress during training of each epoch |\n| [torchinfo](https:\u002F\u002Fgithub.com\u002FTylerYep\u002Ftorchinfo) | Print Keras-like model summary for PyTorch | Displays network, it's parameters and sizes at each layer |\n| [torch.utils.tensorboard](https:\u002F\u002Fpytorch.org\u002Fdocs\u002Fstable\u002Ftensorboard.html) | Tensorboard within PyTorch | Logging experiments and showing them in tensorboard |\n\n\n## File Organization\nDon't put all layers and models into the same file. A best practice is to separate the final networks into a separate file (*networks.py*) and keep the layers, losses, and ops in respective files (*layers.py*, *losses.py*, *ops.py*). The finished model (composed of one or multiple networks) should be reference in a file with its name (e.g. *yolov3.py*, *DCGAN.py*)\n\nThe main routine, respective the train and test scripts should only import from the file having the model's name.\n\n## Building a Neural Network in PyTorch\nWe recommend breaking up the network into its smaller reusable pieces. A network is a **nn.Module** consisting of operations or other **nn.Module**s as building blocks. Loss functions are also **nn.Module** and can, therefore, be directly integrated into the network.\n\nA class inheriting from **nn.Module** must have a *forward* method implementing the forward pass of the respective layer or operation. \n\nA **nn.module** can be used on input data using **self.net(input)**. This simply uses the *__call__()* method of the object to feed the input through the module.\n\n``` python\noutput = self.net(input)\n```\n\n### A Simple Network in PyTorch\nUse the following pattern for simple networks with a single input and single output:\n``` python\nclass ConvBlock(nn.Module):\n    def __init__(self):\n        super(ConvBlock, self).__init__()\n        self.block = nn.Sequential(\n            nn.Conv2d(...), \n            nn.ReLU(), \n            nn.BatchNorm2d(...)\n        )  \n    \n    def forward(self, x):\n        return self.block(x)\n\nclass SimpleNetwork(nn.Module):\n    def __init__(self, num_resnet_blocks=6):\n        super(SimpleNetwork, self).__init__()\n        # here we add the individual layers\n        layers = [ConvBlock(...)]\n        for i in range(num_resnet_blocks):\n            layers += [ResBlock(...)]\n        self.net = nn.Sequential(*layers)\n    \n    def forward(self, x):\n        return self.net(x)\n```\n\nNote the following:\n* We reuse simple, recurrent building blocks such as *ConvBlock* which consists of the same recurrent pattern of (convolution, activation, normalization) and put them into a separate nn.Module\n* We build up a list of desired layers and finally turn them into a model using *nn.Sequential()*. We use the * operator before the list object to unwrap it.\n* In the forward pass we just run the input through the model\n\n### A Network with skip connections in PyTorch\n``` python\nclass ResnetBlock(nn.Module):\n    def __init__(self, dim, padding_type, norm_layer, use_dropout, use_bias):\n        super(ResnetBlock, self).__init__()\n        self.conv_block = self.build_conv_block(...)\n\n    def build_conv_block(self, ...):\n        conv_block = []\n\n        conv_block += [nn.Conv2d(...),\n                       norm_layer(...),\n                       nn.ReLU()]\n        if use_dropout:\n            conv_block += [nn.Dropout(...)]\n            \n        conv_block += [nn.Conv2d(...),\n                       norm_layer(...)]\n\n        return nn.Sequential(*conv_block)\n\n    def forward(self, x):\n        out = x + self.conv_block(x)\n        return out\n```\n\nHere the skip connection of a *ResNet block* has been implemented directly in the forward pass. PyTorch allows for dynamic operations during the forward pass. \n\n### A Network with multiple outputs in PyTorch\nFor a network requiring multiple outputs, such as building a perceptual loss using a pretrained VGG network we use the following pattern:\n``` python\nclass Vgg19(nn.Module):\n  def __init__(self, requires_grad=False):\n    super(Vgg19, self).__init__()\n    vgg_pretrained_features = models.vgg19(pretrained=True).features\n    self.slice1 = torch.nn.Sequential()\n    self.slice2 = torch.nn.Sequential()\n    self.slice3 = torch.nn.Sequential()\n\n    for x in range(7):\n        self.slice1.add_module(str(x), vgg_pretrained_features[x])\n    for x in range(7, 21):\n        self.slice2.add_module(str(x), vgg_pretrained_features[x])\n    for x in range(21, 30):\n        self.slice3.add_module(str(x), vgg_pretrained_features[x])\n    if not requires_grad:\n        for param in self.parameters():\n            param.requires_grad = False\n\n  def forward(self, x):\n    h_relu1 = self.slice1(x)\n    h_relu2 = self.slice2(h_relu1)        \n    h_relu3 = self.slice3(h_relu2)        \n    out = [h_relu1, h_relu2, h_relu3]\n    return out\n```\nNote here the following:\n* We use a pretrained model provided by *torchvision*.\n* We split up the network into three slices. Each slice consists of layers from the pretrained model.\n* We *freeze* the network by setting *requires_grad = False*\n* We return a list with the three outputs of our slices\n\n## Custom Loss\nEven if PyTorch already has a lot of of standard loss function it might be necessary sometimes to create your own loss function. For this, create a separate file `losses.py` and extend the `nn.Module` class to create your custom loss function:\n\n```python\nclass CustomLoss(nn.Module):\n    \n    def __init__(self):\n        super(CustomLoss,self).__init__()\n        \n    def forward(self,x,y):\n        loss = torch.mean((x - y)**2)\n        return loss\n```\n\n## Recommended code structure for training your model\n\nA full example is provided in the [cifar10-example](cifar10-example\u002Finstruction.md) folder of this\nrepository.\n\nNote that we used the following patterns:\n* We use *BackgroundGenerator* from *prefetch_generator* to load next batches in background  [see this issue for more information](https:\u002F\u002Fgithub.com\u002FIgorSusmelj\u002Fpytorch-styleguide\u002Fissues\u002F5)\n* We use tqdm to monitor training progress and show the *compute efficiency*. This helps us find bottlenecks in our data loading pipeline.\n\n``` python\n# import statements\nimport torch\nimport torch.nn as nn\nfrom torch.utils import data\n...\n\n# set flags \u002F seeds\ntorch.backends.cudnn.benchmark = True\nnp.random.seed(1)\ntorch.manual_seed(1)\ntorch.cuda.manual_seed(1)\n...\n\n# Start with main code\nif __name__ == '__main__':\n    # argparse for additional flags for experiment\n    parser = argparse.ArgumentParser(description=\"Train a network for ...\")\n    ...\n    opt = parser.parse_args() \n    \n    # add code for datasets (we always use train and validation\u002F test set)\n    data_transforms = transforms.Compose([\n        transforms.Resize((opt.img_size, opt.img_size)),\n        transforms.RandomHorizontalFlip(),\n        transforms.ToTensor(),\n        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n    ])\n    \n    train_dataset = datasets.ImageFolder(\n        root=os.path.join(opt.path_to_data, \"train\"),\n        transform=data_transforms)\n    train_data_loader = data.DataLoader(train_dataset, ...)\n    \n    test_dataset = datasets.ImageFolder(\n        root=os.path.join(opt.path_to_data, \"test\"),\n        transform=data_transforms)\n    test_data_loader = data.DataLoader(test_dataset ...)\n    ...\n    \n    # instantiate network (which has been imported from *networks.py*)\n    net = MyNetwork(...)\n    ...\n    \n    # create losses (criterion in pytorch)\n    criterion_L1 = torch.nn.L1Loss()\n    ...\n    \n    # if running on GPU and we want to use cuda move model there\n    use_cuda = torch.cuda.is_available()\n    if use_cuda:\n        net = net.cuda()\n        ...\n    \n    # create optimizers\n    optim = torch.optim.Adam(net.parameters(), lr=opt.lr)\n    ...\n    \n    # load checkpoint if needed\u002F wanted\n    start_n_iter = 0\n    start_epoch = 0\n    if opt.resume:\n        ckpt = load_checkpoint(opt.path_to_checkpoint) # custom method for loading last checkpoint\n        net.load_state_dict(ckpt['net'])\n        start_epoch = ckpt['epoch']\n        start_n_iter = ckpt['n_iter']\n        optim.load_state_dict(ckpt['optim'])\n        print(\"last checkpoint restored\")\n        ...\n        \n    # if we want to run experiment on multiple GPUs we move the models there\n    net = torch.nn.DataParallel(net)\n    ...\n    \n    # typically we use tensorboardX to keep track of experiments\n    writer = SummaryWriter(...)\n    \n    # now we start the main loop\n    n_iter = start_n_iter\n    for epoch in range(start_epoch, opt.epochs):\n        # set models to train mode\n        net.train()\n        ...\n        \n        # use prefetch_generator and tqdm for iterating through data\n        pbar = tqdm(enumerate(BackgroundGenerator(train_data_loader, ...)),\n                    total=len(train_data_loader))\n        start_time = time.time()\n        \n        # for loop going through dataset\n        for i, data in pbar:\n            # data preparation\n            img, label = data\n            if use_cuda:\n                img = img.cuda()\n                label = label.cuda()\n            ...\n            \n            # It's very good practice to keep track of preparation time and computation time using tqdm to find any issues in your dataloader\n            prepare_time = start_time-time.time()\n            \n            # forward and backward pass\n            optim.zero_grad()\n            ...\n            loss.backward()\n            optim.step()\n            ...\n            \n            # udpate tensorboardX\n            writer.add_scalar(..., n_iter)\n            ...\n            \n            # compute computation time and *compute_efficiency*\n            process_time = start_time-time.time()-prepare_time\n            pbar.set_description(\"Compute efficiency: {:.2f}, epoch: {}\u002F{}:\".format(\n                process_time\u002F(process_time+prepare_time), epoch, opt.epochs))\n            start_time = time.time()\n            \n        # maybe do a test pass every x epochs\n        if epoch % x == x-1:\n            # bring models to evaluation mode\n            net.eval()\n            ...\n            #do some tests\n            pbar = tqdm(enumerate(BackgroundGenerator(test_data_loader, ...)),\n                    total=len(test_data_loader)) \n            for i, data in pbar:\n                ...\n                \n            # save checkpoint if needed\n            ...\n```\n\n## Training on Multiple GPUs in PyTorch\nThere are two distinct patterns in PyTorch to use multiple GPUs for training.\nFrom our experience both patterns are valid. The first one results however in nicer and less code. The second one seems to have a slight performance advantage due to less communication between the GPUs. [I asked a question in the official PyTorch forum about the two approaches here](https:\u002F\u002Fdiscuss.pytorch.org\u002Ft\u002Fhow-to-best-use-dataparallel-with-multiple-models\u002F39289)\n\n### Split up the batch input of each network\nThe most common one is to simply split up the batches of all *networks* to the individual GPUs. \n> A model running on 1 GPU with batch size 64 would, therefore, run on 2 GPUs with each a batch size of 32. This can be done automatically by wrapping the model by **nn.DataParallel(model)**.\n\n### Pack all networks in a *super* network and split up input batch\nThis pattern is less commonly used. A repository implementing this approach is shown here in the [pix2pixHD implementation by Nvidia](https:\u002F\u002Fgithub.com\u002FNVIDIA\u002Fpix2pixHD)\n\n\n## Do's and Don't's\n### Avoid Numpy Code in the forward method of a nn.Module\nNumpy runs on the CPU and is slower than torch code. Since torch has been developed with being similar to numpy in mind most numpy functions are supported by PyTorch already.\n\n### Separate the DataLoader from the main Code\nThe data loading pipeline should be independent of your main training code. PyTorch uses background workers for loading the data more efficiently and without disturbing the main training process.\n\n### Don't log results in every step\nTypically we train our models for thousands of steps. Therefore, it is enough to log loss and other results every n'th step to reduce the overhead. Especially, saving intermediary results as images can be costly during training.\n\n### Use Command-line Arguments\nIt's very handy to use command-line arguments to set parameters during code execution (*batch size*, *learning rate*, etc). An easy way to keep track of the arguments for an experiment is by just printing the dictionary received from *parse_args*:\n``` python\n...\n# saves arguments to config.txt file\nopt = parser.parse_args()\nwith open(\"config.txt\", \"w\") as f:\n    f.write(opt.__str__())\n...\n```\n\n### Use **.detach()** to free tensors from the graph if possible\nPyTorch keeps track of of all operations involving tensors for automatic differentiation. Use **.detach()** to prevent recording of unnecessary operations.\n\n### Use **.item()** for printing scalar tensors\nYou can print variables directly, however it's recommended to use **variable.detach()** or **variable.item()**. In earlier PyTorch versions \u003C 0.4 you have to use **.data** to access the tensor of a variable.\n\n### Use the call method instead of forward on a **nn.Module**\nThe two ways are not identical as pointed out in one of the issues [here](https:\u002F\u002Fgithub.com\u002FIgorSusmelj\u002Fpytorch-styleguide\u002Fissues\u002F3):\n``` python\noutput = self.net.forward(input)\n# they are not equal!\noutput = self.net(input)\n```\n\n## FAQ\n1. How to keep my experiments reproducible?\n> We recommend setting the following seeds at the beginning of your code:\n``` python\nnp.random.seed(1)\ntorch.manual_seed(1)\ntorch.cuda.manual_seed(1)\n```\n2. How to improve training and inference speed further?\n> On Nvidia GPUs you can add the following line at the beginning of our code. This will allow the cuda backend to optimize your graph during its first execution. However, be aware that if you change the network input\u002Foutput tensor size the graph will be optimized each time a change occurs. This can lead to very slow runtime and out of memory errors. Only set this flag if your input and output have always the same shape. Usually, this results in an improvement of about 20%.\n``` python\ntorch.backends.cudnn.benchmark = True\n```\n3. What is a good value for compute efficiency using your tqdm + prefetch_generator pattern?\n> It depends on the machine used, the preprocessing pipeline and the network size. Running on a SSD on a 1080Ti GPU we see a compute efficiency of almost 1.0 which is an ideal scenario. If shallow (small) networks or a slow harddisk is used the number may drop to around 0.1-0.2 depending on your setup.\n4. How can I have a batch size > 1 even though I don't have enough memory?\n> In PyTorch we can implement very easily virtual batch sizes. We just prevent the optimizer from making an update of the parameters and sum up the gradients for *batch_size* cycles.\n``` python\n...\n# in the main loop\nout = net(input)\nloss = criterion(out, label)\n# we just call backward to sum up gradients but don't perform step here\nloss.backward() \ntotal_loss += loss.item() \u002F batch_size\nif n_iter % batch_size == batch_size-1:\n    # here we perform out optimization step using a virtual batch size\n    optim.step()\n    optim.zero_grad()\n    print('Total loss: ', total_loss)\n    total_loss = 0.0\n...\n```\n5. How can I adjust the learning rate during training?\n> We can access the learning rate directly using the instantiated optimizer as shown here:\n``` python\n...\nfor param_group in optim.param_groups:\n    old_lr = param_group['lr']\n    new_lr = old_lr * 0.1\n    param_group['lr'] = new_lr\n    print('Updated lr from {} to {}'.format(old_lr, new_lr))\n...\n```\n6. How to use a pretrained model as a loss (non backprop) during training\n> If you want to use a pretrained model such as VGG to compute a loss but not train it (e.g. Perceptual loss in style-transfer\u002F GANs\u002F Auto-encoder) you can use the following pattern:\n``` python\n...\n# instantiate the model\npretrained_VGG = VGG19(...)\n\n# disable gradients (prevent training)\nfor p in pretrained_VGG.parameters():  # reset requires_grad\n    p.requires_grad = False\n...\n# you don't have to use the no_grad() namespace but can just run the model\n# no gradients will be computed for the VGG model\nout_real = pretrained_VGG(input_a)\nout_fake = pretrained_VGG(input_b)\nloss = any_criterion(out_real, out_fake)\n...\n```\n7. Why do we use *.train()* and *.eval()* in PyTorch?\n> Those methods are used to set layers such as **BatchNorm2d** or **Dropout2d** from training to inference mode. Every module which inherits from **nn.Module** has an attribute called *isTraining*. **.eval()** and **.train()** just simply sets this attribute to True\u002F False. For more information of how this method is implemented please have a look at [the module code in PyTorch](https:\u002F\u002Fpytorch.org\u002Fdocs\u002Fstable\u002F_modules\u002Ftorch\u002Fnn\u002Fmodules\u002Fmodule.html)\n8. My model uses lots of memory during Inference\u002F How to run a model properly for inference in PyTorch?\n> Make sure that no gradients get computed and stored during your code execution. You can simply use the following pattern to assure that:\n``` python\nwith torch.no_grad():\n    # run model here\n    out_tensor = net(in_tensor)\n```\n9. How to fine-tune a pretrained model?\n> In PyTorch you can freeze layers. This will prevent them from being updated during an optimization step.\n``` python\n\n# you can freeze whole modules using\nfor p in pretrained_VGG.parameters():  # reset requires_grad\n    p.requires_grad = False\n\n```\n10. When to use **Variable(...)**?\n> Since PyTorch 0.4 **Variable* and **Tensor** have been merged. We don't have to explicitly create a **Variable** object anymore.\n11. Is PyTorch on C++ faster then using Python?\n> C++ version is about 10% faster\n12. Can TorchScript \u002F JIT speed up my code?\n> Todo...\n13. Is PyTorch code using **cudnn.benchmark=True** faster?\n> From our experience you can gain about 20% speed-up. But the first time you run your model it takes quite some time to \nbuild the optimized graph. In some cases (loops in forward pass, no fixed input shape, if\u002Felse in forward, etc.) this flag might\nresult in *out of memory* or other errors.\n14. How to use multiple GPUs for training?\n> Todo...\n15. How does **.detach()** work in PyTorch?\n> If frees a tensor from a computation graph. A nice illustration is shown [here](http:\u002F\u002Fwww.bnikolic.co.uk\u002Fblog\u002Fpytorch-detach.html)\n\n\n\n## You like this repo?\n\nPlease give feedback on how we can improve this style guide!\nYou can open an issue or propose changes by creating a pull request.\n\nIf you like this repo, don't forget to check out other frameworks from us:\n\n- [Lightly - A computer vision framework for self-supervised learning](https:\u002F\u002Fgithub.com\u002Flightly-ai\u002Flightly)\n","# PyTorch 工具、最佳实践与风格指南\n这不是 PyTorch 的官方风格指南。本文档总结了我们使用 PyTorch 框架进行深度学习一年多以来的最佳实践。请注意，我们分享的经验主要来自研究和初创公司的视角。\n\n这是一个开放项目，欢迎其他贡献者编辑和改进本文档。\n\n您将在这份文档中找到三个主要部分。首先是对 Python 最佳实践的快速回顾，接着是一些使用 PyTorch 的技巧和建议。最后，我们将分享一些使用其他框架的见解和经验，这些帮助我们整体上提升了工作流程。\n\n\n**更新 2020年12月20日**\n\n- 添加了一个在 cifar10 数据集上训练模型的完整示例\n- 增加了使用 VS Code 和远程扩展的设置指南\n\n**更新 2019年4月30日**\n> 在收到大量积极反馈后，我还添加了我们在 [Lightly](https:\u002F\u002Flightly.ai\u002F) 项目中常用的一些构建模块的摘要：\n您将找到用于（自注意力机制、基于 VGG 的感知损失、谱归一化、自适应实例归一化等）的构建模块。\n\u003Cbr>[损失函数、层及其他构建模块的代码片段](building_blocks.md)\n\n\n## 我们推荐使用 Python 3.6+\n根据我们的经验，我们建议使用 Python 3.6 或更高版本，因为以下特性对编写整洁简洁的代码非常有帮助：\n* [Python 3.6 起支持类型注解。](https:\u002F\u002Fmedium.com\u002F@ageitgey\u002Flearn-how-to-use-static-type-checking-in-python-3-6-in-10-minutes-12c86d72677b)\n* [Python 3.6 起支持 f-string。](https:\u002F\u002Frealpython.com\u002Fpython-f-strings\u002F)\n\n\n## Python 风格指南回顾\n我们尽量遵循 Google 的 Python 风格指南。\n请参考 Google 提供的、文档完善的 [Python 代码风格指南](https:\u002F\u002Fgithub.com\u002Fgoogle\u002Fstyleguide\u002Fblob\u002Fgh-pages\u002Fpyguide.md)。\n\n在此我们提供最常用规则的摘要：\n\n### 命名规范\n*摘自 3.16.4*\n\n| 类型 | 规范 | 示例 |\n|------|------------|---------|\n| 包与模块 | 小写加下划线 | from **prefetch_generator** import BackgroundGenerator |\n| 类 | 首字母大写 | class **DataLoader** |\n| 常量 | 全大写加下划线 | **BATCH_SIZE=16** |\n| 实例 | 小写加下划线 | **dataset** = Dataset |\n| 方法与函数 | 小写加下划线() | def **visualize_tensor()** |\n| 变量 | 小写加下划线 | **background_color='Blue'** |\n\n## IDEs\n\n### 代码编辑器\n一般来说，我们推荐使用像 Visual Studio Code 或 PyCharm 这样的 IDE。~~虽然 VS Code 是一个相对轻量级的编辑器，却提供了语法高亮和自动补全功能，而 PyCharm 则拥有许多用于处理远程集群的高级特性。~~ VS Code 凭借其快速增长的扩展生态系统，已经变得非常强大。\n\n#### 使用远程机器设置 Visual Studio Code\n请确保已安装以下扩展：\n\n- Python（语法检查、自动补全、语法高亮、代码格式化）\n- Remote - SSH（用于连接远程机器）\n\n1. 按照此处的指南操作：https:\u002F\u002Fcode.visualstudio.com\u002Fdocs\u002Fremote\u002Fremote-overview\n\n\n#### 设置 PyCharm 以连接远程机器\n1. 登录到您的远程机器（AWS、Google 等）。\n2. 创建一个新的文件夹和一个新的虚拟环境。\n3. 在 PyCharm（专业版）的项目设置中配置远程解释器。\n4. 配置远程 Python 解释器（指向 AWS、Google 等上的虚拟环境路径）。\n5. 配置本地代码与远程机器之间的映射关系。\n\n如果设置正确，这将使您能够做到以下几点：\n* 在本地计算机上编写代码（笔记本电脑、台式机），无论您身在何处（离线或在线）。\n* 将本地代码同步到远程机器。\n* 额外的包会自动安装在远程机器上。\n* 您无需在本地机器上存储任何数据集。\n* 在远程机器上运行和调试代码，就像在本地机器上运行一样。\n\n\n## Jupyter Notebook 与 Python 脚本\n一般来说，我们建议在初步探索或尝试新模型和代码时使用 Jupyter Notebook。\n一旦您需要在更大的数据集上训练模型，并且更注重可重复性时，则应使用 Python 脚本。\n\n**我们推荐的工作流程：**\n1. 从 Jupyter Notebook 开始。\n2. 探索数据和模型。\n3. 在 Notebook 的单元格中构建类和方法。\n4. 将代码迁移到 Python 脚本中。\n5. 在服务器上进行训练或部署。\n\n\n| **Jupyter Notebook** | **Python 脚本** |\n|----------------------|--------------------|\n| + 适合探索 | + 可以长时间运行任务而不中断 |\n| + 方便调试 | + 易于通过 git 跟踪更改 |\n| - 容易变成超大文件 | - 调试通常意味着重新运行整个脚本 |\n| - 可能会被中断（不适合长时间训练） | |\n| - 容易出错并变得混乱 | |\n\n\n## 库\n\n常用库：\n\n| 名称 | 描述 | 用途 |\n|------|-------------|----------|\n| [torch](https:\u002F\u002Fpytorch.org\u002F) | 用于神经网络的基础框架 | 创建张量、构建网络并使用反向传播进行训练 |\n| [torchvision](https:\u002F\u002Fpytorch.org\u002Fdocs\u002Fstable\u002Ftorchvision) | PyTorch 计算机视觉模块 | 图像数据预处理、增强和后处理 |\n| [Pillow (PIL)](https:\u002F\u002Fpillow.readthedocs.io\u002Fen\u002Fstable\u002F) | Python 图像处理库 | 加载和保存图像 |\n| [Numpy](https:\u002F\u002Fwww.numpy.org\u002F) | 用于科学计算的 Python 库 | 数据的预处理和后处理 |\n| [prefetch_generator](https:\u002F\u002Fpypi.org\u002Fproject\u002Fprefetch_generator\u002F) | 后台处理库 | 在计算过程中后台加载下一批数据 |\n| [tqdm](https:\u002F\u002Fgithub.com\u002Ftqdm\u002Ftqdm) | 进度条 | 每个 epoch 训练过程中的进度显示 |\n| [torchinfo](https:\u002F\u002Fgithub.com\u002FTylerYep\u002Ftorchinfo) | 为 PyTorch 提供类似 Keras 的模型摘要 | 显示网络结构、各层参数及尺寸 |\n| [torch.utils.tensorboard](https:\u002F\u002Fpytorch.org\u002Fdocs\u002Fstable\u002Ftensorboard.html) | PyTorch 内置 TensorBoard | 记录实验并在 TensorBoard 中可视化 |\n\n\n## 文件组织\n不要把所有的层和模型都放在同一个文件里。最佳实践是将最终的网络单独放在一个文件中（例如 networks.py），并将层、损失函数和操作分别放在各自的文件中（layers.py、losses.py、ops.py）。完成的模型（由一个或多个网络组成）应在以其命名的文件中被引用（如 yolov3.py、DCGAN.py）。\n\n主程序以及训练和测试脚本只应从包含模型名称的文件中导入内容。\n\n## 在 PyTorch 中构建神经网络\n我们建议将网络拆分为较小的可重用组件。一个网络是一个 **nn.Module**，由操作或其他 **nn.Module** 作为构建块组成。损失函数也是 **nn.Module**，因此可以直接集成到网络中。\n\n继承自 **nn.Module** 的类必须有一个 *forward* 方法，用于实现相应层或操作的前向传播。\n\n可以使用 **self.net(input)** 将一个 **nn.module** 应用于输入数据。这实际上就是调用该对象的 *__call__()* 方法，将输入通过模块传递。\n\n``` python\noutput = self.net(input)\n```\n\n### PyTorch 中的一个简单网络\n对于只有一个输入和一个输出的简单网络，可以使用以下模式：\n``` python\nclass ConvBlock(nn.Module):\n    def __init__(self):\n        super(ConvBlock, self).__init__()\n        self.block = nn.Sequential(\n            nn.Conv2d(...), \n            nn.ReLU(), \n            nn.BatchNorm2d(...)\n        )  \n    \n    def forward(self, x):\n        return self.block(x)\n\nclass SimpleNetwork(nn.Module):\n    def __init__(self, num_resnet_blocks=6):\n        super(SimpleNetwork, self).__init__()\n        # 在这里添加各个层\n        layers = [ConvBlock(...)]\n        for i in range(num_resnet_blocks):\n            layers += [ResBlock(...)]\n        self.net = nn.Sequential(*layers)\n    \n    def forward(self, x):\n        return self.net(x)\n```\n\n请注意以下几点：\n* 我们重复使用简单的、可重复的构建块，例如 *ConvBlock*，它由相同的循环模式（卷积、激活、归一化）组成，并将其放入一个单独的 nn.Module 中。\n* 我们构建一个所需层的列表，最后使用 *nn.Sequential()* 将其转换为模型。我们在列表对象前使用 * 操作符来解包列表。\n* 在前向传播中，我们只需将输入通过模型即可。\n\n### 在 PyTorch 中具有跳跃连接的网络\n``` python\nclass ResnetBlock(nn.Module):\n    def __init__(self, dim, padding_type, norm_layer, use_dropout, use_bias):\n        super(ResnetBlock, self).__init__()\n        self.conv_block = self.build_conv_block(...)\n\n    def build_conv_block(self, ...):\n        conv_block = []\n\n        conv_block += [nn.Conv2d(...),\n                       norm_layer(...),\n                       nn.ReLU()]\n        if use_dropout:\n            conv_block += [nn.Dropout(...)]\n            \n        conv_block += [nn.Conv2d(...),\n                       norm_layer(...)]\n\n        return nn.Sequential(*conv_block)\n\n    def forward(self, x):\n        out = x + self.conv_block(x)\n        return out\n```\n\n在这里，*ResNet 块*的跳跃连接直接在前向传播中实现。PyTorch 允许在前向传播过程中进行动态操作。\n\n### 在 PyTorch 中具有多个输出的网络\n对于需要多个输出的网络，例如使用预训练的 VGG 网络构建感知损失时，可以使用以下模式：\n``` python\nclass Vgg19(nn.Module):\n  def __init__(self, requires_grad=False):\n    super(Vgg19, self).__init__()\n    vgg_pretrained_features = models.vgg19(pretrained=True).features\n    self.slice1 = torch.nn.Sequential()\n    self.slice2 = torch.nn.Sequential()\n    self.slice3 = torch.nn.Sequential()\n\n    for x in range(7):\n        self.slice1.add_module(str(x), vgg_pretrained_features[x])\n    for x in range(7, 21):\n        self.slice2.add_module(str(x), vgg_pretrained_features[x])\n    for x in range(21, 30):\n        self.slice3.add_module(str(x), vgg_pretrained_features[x])\n    if not requires_grad:\n        for param in self.parameters():\n            param.requires_grad = False\n\n  def forward(self, x):\n    h_relu1 = self.slice1(x)\n    h_relu2 = self.slice2(h_relu1)        \n    h_relu3 = self.slice3(h_relu2)        \n    out = [h_relu1, h_relu2, h_relu3]\n    return out\n```\n\n请注意以下几点：\n* 我们使用了 *torchvision* 提供的预训练模型。\n* 我们将网络拆分为三个切片。每个切片由预训练模型中的层组成。\n* 我们通过设置 *requires_grad = False* 来“冻结”网络。\n* 我们返回一个包含三个切片输出的列表。\n\n## 自定义损失函数\n即使 PyTorch 已经提供了许多标准损失函数，有时仍然可能需要创建自己的损失函数。为此，可以创建一个单独的文件 `losses.py`，并扩展 `nn.Module` 类来创建自定义损失函数：\n\n``` python\nclass CustomLoss(nn.Module):\n    \n    def __init__(self):\n        super(CustomLoss,self).__init__()\n        \n    def forward(self,x,y):\n        loss = torch.mean((x - y)**2)\n        return loss\n```\n\n## 训练模型的推荐代码结构\n\n完整的示例可以在本仓库的 [cifar10-example](cifar10-example\u002Finstruction.md) 文件夹中找到。\n\n请注意，我们使用了以下模式：\n* 我们使用 *prefetch_generator* 中的 *BackgroundGenerator* 在后台加载下一个批次 [更多信息请参见此问题](https:\u002F\u002Fgithub.com\u002FIgorSusmelj\u002Fpytorch-styleguide\u002Fissues\u002F5)。\n* 我们使用 tqdm 监控训练进度并显示“计算效率”。这有助于我们找到数据加载管道中的瓶颈。\n\n``` python\n# 导入语句\nimport torch\nimport torch.nn as nn\nfrom torch.utils import data\n...\n\n# 设置标志\u002F种子\ntorch.backends.cudnn.benchmark = True\nnp.random.seed(1)\ntorch.manual_seed(1)\ntorch.cuda.manual_seed(1)\n...\n\n# 从主代码开始\nif __name__ == '__main__':\n    # 使用 argparse 处理实验的额外参数\n    parser = argparse.ArgumentParser(description=\"训练一个用于...的网络\")\n    ...\n    opt = parser.parse_args() \n    \n    # 添加数据集相关代码（我们通常使用训练集和验证\u002F测试集）\n    data_transforms = transforms.Compose([\n        transforms.Resize((opt.img_size, opt.img_size)),\n        transforms.RandomHorizontalFlip(),\n        transforms.ToTensor(),\n        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n    ])\n    \n    train_dataset = datasets.ImageFolder(\n        root=os.path.join(opt.path_to_data, \"train\"),\n        transform=data_transforms)\n    train_data_loader = data.DataLoader(train_dataset, ...)\n    \n    test_dataset = datasets.ImageFolder(\n        root=os.path.join(opt.path_to_data, \"test\"),\n        transform=data_transforms)\n    test_data_loader = data.DataLoader(test_dataset ...)\n    ...\n    \n    # 实例化网络（已从 *networks.py* 中导入）\n    net = MyNetwork(...)\n    ...\n    \n    # 创建损失函数（PyTorch 中的 criterion）\n    criterion_L1 = torch.nn.L1Loss()\n    ...\n    \n    # 如果在 GPU 上运行且希望使用 CUDA，则将模型移动到 GPU\n    use_cuda = torch.cuda.is_available()\n    if use_cuda:\n        net = net.cuda()\n        ...\n    \n    # 创建优化器\n    optim = torch.optim.Adam(net.parameters(), lr=opt.lr)\n    ...\n    \n    # 如果需要或希望，加载检查点\n    start_n_iter = 0\n    start_epoch = 0\n    if opt.resume:\n        ckpt = load_checkpoint(opt.path_to_checkpoint) # 自定义方法，用于加载上次的检查点\n        net.load_state_dict(ckpt['net'])\n        start_epoch = ckpt['epoch']\n        start_n_iter = ckpt['n_iter']\n        optim.load_state_dict(ckpt['optim'])\n        print(\"已恢复上次的检查点\")\n        ...\n        \n    # 如果希望在多个 GPU 上运行实验，则将模型转移到这些 GPU 上\n    net = torch.nn.DataParallel(net)\n    ...\n    \n    # 通常我们会使用 tensorboardX 来跟踪实验过程\n    writer = SummaryWriter(...)\n    \n    # 现在开始主循环\n    n_iter = start_n_iter\n    for epoch in range(start_epoch, opt.epochs):\n        # 将模型设置为训练模式\n        net.train()\n        ...\n        \n        # 使用 prefetch_generator 和 tqdm 遍历数据\n        pbar = tqdm(enumerate(BackgroundGenerator(train_data_loader, ...)),\n                    total=len(train_data_loader))\n        start_time = time.time()\n        \n        # 遍历数据集的循环\n        for i, data in pbar:\n            # 数据准备\n            img, label = data\n            if use_cuda:\n                img = img.cuda()\n                label = label.cuda()\n            ...\n            \n            # 使用 tqdm 跟踪数据准备时间和计算时间是非常好的做法，这样可以发现数据加载器中的问题\n            prepare_time = start_time-time.time()\n            \n            # 前向传播和反向传播\n            optim.zero_grad()\n            ...\n            loss.backward()\n            optim.step()\n            ...\n            \n            # 更新 tensorboardX\n            writer.add_scalar(..., n_iter)\n            ...\n            \n            # 计算计算时间和 *计算效率*\n            process_time = start_time-time.time()-prepare_time\n            pbar.set_description(\"计算效率: {:.2f}, 第 {}\u002F{} 个 epoch:\".format(\n                process_time\u002F(process_time+prepare_time), epoch, opt.epochs))\n            start_time = time.time()\n            \n        # 可以每隔 x 个 epoch 进行一次测试\n        if epoch % x == x-1:\n            # 将模型切换到评估模式\n            net.eval()\n            ...\n            # 进行一些测试\n            pbar = tqdm(enumerate(BackgroundGenerator(test_data_loader, ...)),\n                    total=len(test_data_loader)) \n            for i, data in pbar:\n                ...\n                \n            # 如果需要，保存检查点\n            ...\n```\n\n## 在 PyTorch 中使用多 GPU 进行训练\n在 PyTorch 中有两种不同的模式可用于多 GPU 训练。\n根据我们的经验，这两种模式都是可行的。不过第一种方式更简洁、代码量更少。第二种方式由于 GPU 之间的通信较少，性能上略有优势。[关于这两种方法的讨论，请参阅 PyTorch 官方论坛](https:\u002F\u002Fdiscuss.pytorch.org\u002Ft\u002Fhow-to-best-use-dataparallel-with-multiple-models\u002F39289)\n\n### 将每个网络的批量输入拆分\n最常见的方式是将所有 *网络* 的批次简单地分配到各个 GPU 上。\n> 例如，如果一个模型在单个 GPU 上以 64 的批量大小运行，那么在两个 GPU 上运行时，每个 GPU 的批量大小则为 32。这可以通过用 **nn.DataParallel(model)** 包装模型来自动完成。\n\n### 将所有网络打包成一个“超级”网络，并拆分输入批次\n这种模式相对较少使用。NVIDIA 的 [pix2pixHD 实现](https:\u002F\u002Fgithub.com\u002FNVIDIA\u002Fpix2pixHD) 就是一个采用这种方法的仓库。\n\n\n## 应该做的和不应该做的\n### 避免在 nn.Module 的前向传播方法中使用 NumPy 代码\nNumPy 是在 CPU 上运行的，速度比 PyTorch 代码慢。由于 PyTorch 在设计时就考虑了与 NumPy 的相似性，因此大多数 NumPy 函数都已经在 PyTorch 中得到了支持。\n\n### 将数据加载器与主代码分离\n数据加载流程应独立于主训练代码。PyTorch 使用后台工作线程来更高效地加载数据，同时不会干扰主训练过程。\n\n### 不要在每一步都记录结果\n通常我们的模型会训练数千步。因此，只需每隔几步记录一次损失和其他结果即可，这样可以减少开销。尤其是在训练过程中，保存中间结果图像可能会非常耗时。\n\n### 使用命令行参数\n使用命令行参数来设置代码执行期间的参数（如批量大小、学习率等）非常方便。一种简单的方法是打印从 *parse_args* 接收到的参数字典，以便跟踪实验参数：\n``` python\n...\n# 将参数保存到 config.txt 文件\nopt = parser.parse_args()\nwith open(\"config.txt\", \"w\") as f:\n    f.write(opt.__str__())\n...\n```\n\n### 尽可能使用 **.detach()** 从计算图中释放张量\nPyTorch 会跟踪所有涉及张量的操作以实现自动微分。使用 **.detach()** 可以避免记录不必要的操作。\n\n### 打印标量张量时使用 **.item()**\n可以直接打印变量，但建议使用 **variable.detach()** 或 **variable.item()**。在 PyTorch 0.4 之前的版本中，需要使用 **.data** 来访问变量的张量。\n\n### 在 **nn.Module** 上使用 call 方法而不是 forward 方法\n这两种方法并不完全相同，正如其中一个议题所指出的那样 [这里](https:\u002F\u002Fgithub.com\u002FIgorSusmelj\u002Fpytorch-styleguide\u002Fissues\u002F3)：\n``` python\noutput = self.net.forward(input)\n\n# 它们不相等！\noutput = self.net(input)\n```\n\n## 常见问题解答\n1. 如何确保我的实验可复现？\n> 我们建议在代码开头设置以下随机种子：\n``` python\nnp.random.seed(1)\ntorch.manual_seed(1)\ntorch.cuda.manual_seed(1)\n```\n2. 如何进一步提升训练和推理速度？\n> 在 Nvidia GPU 上，您可以在代码开头添加以下一行。这将使 CUDA 后端在首次执行时优化您的计算图。但请注意，如果更改了网络的输入或输出张量大小，每次发生改变时都会重新优化计算图。这可能导致运行速度非常慢，并出现内存不足错误。只有当您的输入和输出始终保持相同形状时，才应启用此标志。通常情况下，这会带来约 20% 的性能提升。\n``` python\ntorch.backends.cudnn.benchmark = True\n```\n3. 使用 tqdm + prefetch_generator 模式时，计算效率的理想值是多少？\n> 这取决于所使用的机器、预处理流水线以及网络规模。在配备 1080Ti GPU 和 SSD 的设备上，我们观察到接近 1.0 的计算效率，这是理想情况。如果使用较浅（较小）的网络或速度较慢的硬盘，该数值可能会降至 0.1–0.2 左右，具体取决于您的配置。\n4. 如果显存不足，如何实现批量大小大于 1？\n> 在 PyTorch 中，我们可以非常容易地实现虚拟批量大小。只需阻止优化器更新参数，并在 *batch_size* 次循环中累计梯度即可。\n``` python\n...\n# 在主循环中\nout = net(input)\nloss = criterion(out, label)\n# 只调用 backward 累计梯度，但不在此处执行 step\nloss.backward() \ntotal_loss += loss.item() \u002F batch_size\nif n_iter % batch_size == batch_size-1:\n    # 此处使用虚拟批量大小执行优化步骤\n    optim.step()\n    optim.zero_grad()\n    print('总损失：', total_loss)\n    total_loss = 0.0\n...\n```\n5. 如何在训练过程中调整学习率？\n> 我们可以直接通过实例化的优化器访问学习率，如下所示：\n``` python\n...\nfor param_group in optim.param_groups:\n    old_lr = param_group['lr']\n    new_lr = old_lr * 0.1\n    param_group['lr'] = new_lr\n    print('学习率从 {} 调整为 {}'.format(old_lr, new_lr))\n...\n```\n6. 如何在训练过程中将预训练模型用作损失函数（不进行反向传播）？\n> 如果您希望使用 VGG 等预训练模型来计算损失，但不对其进行训练（例如，在风格迁移、GAN 或自编码器中使用的感知损失），可以采用以下模式：\n``` python\n...\n# 实例化模型\npretrained_VGG = VGG19(...)\n\n# 禁用梯度（防止训练）\nfor p in pretrained_VGG.parameters():  # 将 requires_grad 重置为 False\n    p.requires_grad = False\n...\n# 不必使用 no_grad() 上下文管理器，直接运行模型即可\n# VGG 模型不会计算任何梯度\nout_real = pretrained_VGG(input_a)\nout_fake = pretrained_VGG(input_b)\nloss = any_criterion(out_real, out_fake)\n...\n```\n7. 为什么在 PyTorch 中要使用 .train() 和 .eval() 方法？\n> 这些方法用于将 **BatchNorm2d** 或 **Dropout2d** 等层从训练模式切换到推理模式。所有继承自 **nn.Module** 的模块都具有一个名为 *isTraining* 的属性。**.eval()** 和 **.train()** 只是简单地将该属性设置为 True\u002FFalse。有关此方法实现的更多信息，请参阅 [PyTorch 源码中的模块部分](https:\u002F\u002Fpytorch.org\u002Fdocs\u002Fstable\u002F_modules\u002Ftorch\u002Fnn\u002Fmodules\u002Fmodule.html)。\n8. 我的模型在推理时占用大量内存。如何在 PyTorch 中正确地进行推理？\n> 请确保在代码执行过程中不计算和存储任何梯度。您可以使用以下模式来保证这一点：\n``` python\nwith torch.no_grad():\n    # 在此处运行模型\n    out_tensor = net(in_tensor)\n```\n9. 如何对预训练模型进行微调？\n> 在 PyTorch 中，您可以冻结某些层，从而阻止它们在优化步骤中被更新。\n``` python\n\n# 您可以通过以下方式冻结整个模块：\nfor p in pretrained_VGG.parameters():  # 将 requires_grad 重置为 False\n    p.requires_grad = False\n\n```\n10. 何时使用 **Variable(...)**？\n> 自 PyTorch 0.4 版本起，**Variable** 和 **Tensor** 已经合并。我们不再需要显式创建 **Variable** 对象。\n11. 使用 C++ 编写的 PyTorch 是否比 Python 版本更快？\n> C++ 版本大约快 10%\n12. TorchScript \u002F JIT 能否加速我的代码？\n> 待办……\n13. 使用 **cudnn.benchmark=True** 的 PyTorch 代码是否更快？\n> 根据我们的经验，可以提升约 20% 的速度。不过，首次运行模型时，构建优化后的计算图需要较长时间。在某些情况下（前向传播中存在循环、输入形状不固定、前向传播中包含 if\u002Felse 等），此标志可能会导致“内存不足”或其他错误。\n14. 如何使用多块 GPU 进行训练？\n> 待办……\n15. PyTorch 中的 **.detach()** 是如何工作的？\n> 它会将张量从计算图中分离出来。一个很好的示意图可以在这里找到：[http:\u002F\u002Fwww.bnikolic.co.uk\u002Fblog\u002Fpytorch-detach.html](http:\u002F\u002Fwww.bnikolic.co.uk\u002Fblog\u002Fpytorch-detach.html)\n\n\n\n## 您喜欢这个仓库吗？\n\n请提供反馈，帮助我们改进这份风格指南！您可以开一个 issue，或者通过创建 pull request 提出修改建议。\n\n如果您喜欢这个仓库，别忘了查看我们提供的其他框架：\n\n- [Lightly - 用于自监督学习的计算机视觉框架](https:\u002F\u002Fgithub.com\u002Flightly-ai\u002Flightly)","# PyTorch Styleguide 快速上手指南\n\n本指南基于 `pytorch-styleguide` 项目整理，旨在帮助开发者遵循最佳实践，构建清晰、可维护且高效的 PyTorch 深度学习代码。\n\n## 环境准备\n\n在开始之前，请确保满足以下系统和依赖要求：\n\n*   **操作系统**: Linux, macOS 或 Windows\n*   **Python 版本**: 推荐 **Python 3.6+**\n    *   原因：支持类型提示（Type Hinting）和 f-strings，有助于编写更整洁的代码。\n*   **核心框架**: PyTorch 及 torchvision\n*   **常用辅助库**:\n    *   `Pillow`: 图像处理\n    *   `Numpy`: 科学计算\n    *   `prefetch_generator`: 后台数据加载加速\n    *   `tqdm`: 进度条显示\n    *   `torchinfo`: 模型结构摘要\n*   **开发工具 (可选但推荐)**:\n    *   **VS Code**: 需安装 `Python` 和 `Remote - SSH` 扩展（适用于远程服务器开发）。\n    *   **PyCharm Professional**: 支持配置远程解释器和代码映射。\n\n## 安装步骤\n\n建议使用虚拟环境进行隔离。以下是基于 pip 的安装命令（国内用户可使用清华源加速）：\n\n```bash\n# 创建并激活虚拟环境\npython -m venv venv\nsource venv\u002Fbin\u002Factivate  # Windows 使用: venv\\Scripts\\activate\n\n# 安装 PyTorch (请以官网最新命令为准，此处为示例)\n# 国内加速源推荐：-i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\npip install torch torchvision torchaudio -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n\n# 安装项目推荐的辅助工具库\npip install pillow numpy prefetch_generator tqdm torchinfo -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n```\n\n## 基本使用\n\n本项目主要提供代码规范、架构建议和实用代码片段，而非单一的可执行命令行工具。以下是核心用法的快速演示：\n\n### 1. 项目文件组织建议\n不要将所有代码堆砌在一个文件中。推荐结构如下：\n*   `networks.py`: 定义最终的网络模型\n*   `layers.py`: 定义可复用的层模块\n*   `losses.py`: 定义自定义损失函数\n*   `ops.py`: 定义其他操作\n*   `train.py` \u002F `test.py`: 主训练\u002F测试脚本（仅导入模型文件）\n\n### 2. 构建模块化网络\n将网络拆分为可复用的小块（继承 `nn.Module`），并使用 `nn.Sequential` 组装。\n\n```python\nimport torch\nimport torch.nn as nn\n\n# 定义可复用的基础模块\nclass ConvBlock(nn.Module):\n    def __init__(self):\n        super(ConvBlock, self).__init__()\n        self.block = nn.Sequential(\n            nn.Conv2d(3, 64, kernel_size=3, padding=1), \n            nn.ReLU(), \n            nn.BatchNorm2d(64)\n        )  \n    \n    def forward(self, x):\n        return self.block(x)\n\n# 组装主网络\nclass SimpleNetwork(nn.Module):\n    def __init__(self, num_blocks=6):\n        super(SimpleNetwork, self).__init__()\n        layers = [ConvBlock()]\n        # 动态添加层\n        for i in range(num_blocks):\n            layers += [ConvBlock()] \n        self.net = nn.Sequential(*layers)\n    \n    def forward(self, x):\n        return self.net(x)\n\n# 实例化并使用\nmodel = SimpleNetwork()\ninput_tensor = torch.randn(1, 3, 64, 64)\noutput = model(input_tensor)\nprint(output.shape)\n```\n\n### 3. 实现自定义损失函数\n通过继承 `nn.Module` 轻松创建自定义 Loss。\n\n```python\nclass CustomLoss(nn.Module):\n    def __init__(self):\n        super(CustomLoss, self).__init__()\n        \n    def forward(self, x, y):\n        # 示例：均方误差\n        loss = torch.mean((x - y)**2)\n        return loss\n\ncriterion = CustomLoss()\n# loss = criterion(prediction, target)\n```\n\n### 4. 推荐的工作流\n1.  **探索阶段**: 使用 **Jupyter Notebook** 进行数据探索和原型验证。\n2.  **重构阶段**: 将验证过的代码类和方法迁移至 **Python 脚本** (.py)。\n3.  **训练阶段**: 在服务器上使用 Python 脚本进行长时间训练，配合 `tqdm` 监控进度，使用 `prefetch_generator` 优化数据加载。\n\n```python\n# 训练循环中的最佳实践示例\nfrom tqdm import tqdm\nfrom prefetch_generator import BackgroundGenerator\n\n# 设置随机种子以保证可复现性\ntorch.backends.cudnn.benchmark = True\nnp.random.seed(1)\ntorch.manual_seed(1)\n\n# 使用 BackgroundGenerator 加速数据加载\nfor batch in BackgroundGenerator(data_loader, max_prefetch=3):\n    # 训练逻辑...\n    pass\n```\n\n> **提示**: 完整的 CIFAR-10 训练示例请参考该项目仓库中的 `cifar10-example` 文件夹。","某初创公司算法团队在开发基于 PyTorch 的图像识别模型时，面临多人协作代码风格混乱及远程调试效率低下的挑战。\n\n### 没有 pytorch-styleguide 时\n- 团队成员命名习惯各异，常量、类与函数混用大小写，导致新成员阅读 `DataLoader` 或 `batch_size` 等变量时频繁产生歧义。\n- 缺乏统一的 Python 版本与类型提示规范，代码中充满隐式类型错误，重构时极易引发难以追踪的 Bug。\n- 本地开发与远程 GPU 服务器环境割裂，手动同步代码和配置虚拟环境耗时费力，调试过程如同“盲人摸象”。\n- 常用模块（如自注意力机制、感知损失）重复造轮子，且实现质量参差不齐，严重拖慢实验迭代速度。\n- 过度依赖 Jupyter Notebook 进行最终模型训练，导致代码逻辑分散，难以版本控制和工程化部署。\n\n### 使用 pytorch-styleguide 后\n- 遵循 Google 风格指南统一命名规范，代码结构清晰一致，任何成员都能快速理解并接手他人的 `visualize_tensor` 等函数。\n- 强制采用 Python 3.6+ 及静态类型检查，结合 f-string 提升代码简洁度，显著减少了运行时类型错误。\n- 利用指南推荐的 VS Code Remote-SSH 配置，实现本地编写代码、远程无缝调试与自动环境同步，开发体验如履平地。\n- 直接复用指南中经过验证的构建块（如 Spectral Normalization、Adaptive Instance Normalization），大幅缩短模型搭建周期。\n- 明确区分 Notebook 用于探索、Python 脚本用于生产，配合标准化项目结构，使模型训练流程更稳健且易于维护。\n\npytorch-styleguide 通过确立最佳实践标准，将团队从琐碎的代码规范与环境配置中解放出来，使其能专注于核心算法创新。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FIgorSusmelj_pytorch-styleguide_74ccf329.png","IgorSusmelj","Igor Susmelj","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002FIgorSusmelj_f3c08e9f.jpg","Co-founder at Lightly\r\nDegree from ETH Zurich with a focus on embedded computing and machine learning.","Lightly","Zurich",null,"ISusmelj","https:\u002F\u002Fdata-annotation.com\u002F","https:\u002F\u002Fgithub.com\u002FIgorSusmelj",[84],{"name":85,"color":86,"percentage":87},"Python","#3572A5",100,2017,177,"2026-04-14T19:57:31","GPL-3.0",1,"未说明",{"notes":95,"python":96,"dependencies":97},"本文档主要是一份 PyTorch 最佳实践和代码风格指南，而非具有严格运行环境依赖的可执行软件包。文中建议使用 VS Code 或 PyCharm 作为开发工具，并支持通过 SSH 连接远程机器（如 AWS、Google Cloud）进行开发。推荐使用 Jupyter Notebook 进行初步探索，正式训练时转为 Python 脚本以确保可复现性。文档包含 CIFAR-10 的训练示例及多种网络构建块（如自注意力、感知损失等）的代码片段。","3.6+",[98,99,100,101,102,103,104],"torch","torchvision","Pillow","numpy","prefetch_generator","tqdm","torchinfo",[14],[107,108,109],"pytorch","styleguide","best-practices","2026-03-27T02:49:30.150509","2026-04-18T09:19:17.552908",[],[]]