[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-microsoft--renderformer":3,"tool-microsoft--renderformer":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",150720,2,"2026-04-11T11:33:10",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",108322,"2026-04-10T11:39:34",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[52,13,15,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":32,"last_commit_at":59,"category_tags":60,"status":17},4721,"markitdown","microsoft\u002Fmarkitdown","MarkItDown 是一款由微软 AutoGen 团队打造的轻量级 Python 工具，专为将各类文件高效转换为 Markdown 格式而设计。它支持 PDF、Word、Excel、PPT、图片（含 OCR）、音频（含语音转录）、HTML 乃至 YouTube 链接等多种格式的解析，能够精准提取文档中的标题、列表、表格和链接等关键结构信息。\n\n在人工智能应用日益普及的今天，大语言模型（LLM）虽擅长处理文本，却难以直接读取复杂的二进制办公文档。MarkItDown 恰好解决了这一痛点，它将非结构化或半结构化的文件转化为模型“原生理解”且 Token 效率极高的 Markdown 格式，成为连接本地文件与 AI 分析 pipeline 的理想桥梁。此外，它还提供了 MCP（模型上下文协议）服务器，可无缝集成到 Claude Desktop 等 LLM 应用中。\n\n这款工具特别适合开发者、数据科学家及 AI 研究人员使用，尤其是那些需要构建文档检索增强生成（RAG）系统、进行批量文本分析或希望让 AI 助手直接“阅读”本地文件的用户。虽然生成的内容也具备一定可读性，但其核心优势在于为机器",93400,"2026-04-06T19:52:38",[52,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":67,"readme_en":68,"readme_zh":69,"quickstart_zh":70,"use_case_zh":71,"hero_image_url":72,"owner_login":73,"owner_name":74,"owner_avatar_url":75,"owner_bio":76,"owner_company":77,"owner_location":77,"owner_email":78,"owner_twitter":79,"owner_website":80,"owner_url":81,"languages":82,"stars":91,"forks":92,"last_commit_at":93,"license":94,"difficulty_score":10,"env_os":95,"env_gpu":96,"env_ram":97,"env_deps":98,"category_tags":107,"github_topics":109,"view_count":32,"oss_zip_url":77,"oss_zip_packed_at":77,"status":17,"created_at":119,"updated_at":120,"faqs":121,"releases":151},6712,"microsoft\u002Frenderformer","renderformer","Official Code Release for [SIGGRAPH 2025] RenderFormer: Transformer-based Neural Rendering of Triangle Meshes with Global Illumination","RenderFormer 是一款基于 Transformer 架构的创新神经渲染工具，旨在直接从三角形网格场景生成具备完整全局光照效果的高质量图像。传统渲染方法往往依赖复杂的物理模拟计算，导致速度较慢或需要针对每个场景重新训练模型，而 RenderFormer 巧妙地将渲染过程转化为“序列到序列”的转换任务：它将带有反射属性的三角形视为输入令牌序列，直接输出对应的像素块序列。\n\n该工具的核心优势在于其通用的泛化能力，无需针对特定场景进行微调或额外训练，即可处理包含镜面反射、复杂阴影、漫反射间接照明及多光源等高级光照效果的场景。其技术亮点在于采用了两阶段流水线设计：第一阶段独立于视角建模三角形间的光线传输，第二阶段则结合视角信息将光线束转化为最终像素值，两阶段均基于 Transformer 架构并通过极少的先验约束学习而成。\n\nRenderFormer 非常适合计算机图形学研究人员、AI 开发者以及需要高效高质量渲染解决方案的技术设计师使用。无论是探索神经渲染的前沿算法，还是快速验证复杂光照下的场景表现，它都提供了一个强大且灵活的开源平台，让用户能够轻松尝试将自定义的三维场景转化为逼真的","RenderFormer 是一款基于 Transformer 架构的创新神经渲染工具，旨在直接从三角形网格场景生成具备完整全局光照效果的高质量图像。传统渲染方法往往依赖复杂的物理模拟计算，导致速度较慢或需要针对每个场景重新训练模型，而 RenderFormer 巧妙地将渲染过程转化为“序列到序列”的转换任务：它将带有反射属性的三角形视为输入令牌序列，直接输出对应的像素块序列。\n\n该工具的核心优势在于其通用的泛化能力，无需针对特定场景进行微调或额外训练，即可处理包含镜面反射、复杂阴影、漫反射间接照明及多光源等高级光照效果的场景。其技术亮点在于采用了两阶段流水线设计：第一阶段独立于视角建模三角形间的光线传输，第二阶段则结合视角信息将光线束转化为最终像素值，两阶段均基于 Transformer 架构并通过极少的先验约束学习而成。\n\nRenderFormer 非常适合计算机图形学研究人员、AI 开发者以及需要高效高质量渲染解决方案的技术设计师使用。无论是探索神经渲染的前沿算法，还是快速验证复杂光照下的场景表现，它都提供了一个强大且灵活的开源平台，让用户能够轻松尝试将自定义的三维场景转化为逼真的视觉作品。","\u003Cp align=\"center\">\n\n  \u003Cdiv align=\"center\">\n    \u003Cimg width=320 src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmicrosoft_renderformer_readme_a384454e9efb.png\">\n  \u003C\u002Fdiv>\n\n  \u003Ch1 align=\"center\">RenderFormer: Transformer-based Neural Rendering of Triangle Meshes with Global Illumination\u003C\u002Fh1>\n  \u003Cp align=\"center\">\n    \u003Ca href=\"https:\u002F\u002Fwww.chong-zeng.com\u002F\">\u003Cstrong>Chong Zeng\u003C\u002Fstrong>\u003C\u002Fa>\n    ·\n    \u003Ca href=\"https:\u002F\u002Fyuedong.shading.me\u002F\">\u003Cstrong>Yue Dong\u003C\u002Fstrong>\u003C\u002Fa>\n    ·\n    \u003Ca href=\"https:\u002F\u002Fwww.cs.wm.edu\u002F~ppeers\u002F\">\u003Cstrong>Pieter Peers\u003C\u002Fstrong>\u003C\u002Fa>\n    ·\n    \u003Ca href=\"https:\u002F\u002Fsvbrdf.github.io\u002F\">\u003Cstrong>Hongzhi Wu\u003C\u002Fstrong>\u003C\u002Fa>\n    ·\n    \u003Ca href=\"https:\u002F\u002Fscholar.google.com\u002Fcitations?user=P91a-UQAAAAJ&hl=en\">\u003Cstrong>Xin Tong\u003C\u002Fstrong>\u003C\u002Fa>\n  \u003C\u002Fp>\n  \u003Ch2 align=\"center\">SIGGRAPH 2025 Conference Papers\u003C\u002Fh2>\n\n  \u003Cdiv align=\"center\">\n    \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmicrosoft_renderformer_readme_63655cc38a2d.jpg\">\n    Examples of triangle-mesh based scenes rendered with RenderFormer without per-scene training or fine-tuning that include (multiple) specular reflections, complex shadows, diffuse indirect lighting, glossy reflections, soft and hard shadows, and multiple light sources.\n  \u003C\u002Fdiv>\n\n  \u003Cp align=\"center\">\n  \u003Cbr>\n    \u003Ca href=\"https:\u002F\u002Fmicrosoft.github.io\u002Frenderformer\u002F\">\u003Cstrong>Project Page\u003C\u002Fstrong>\u003C\u002Fa>\n    |\n    \u003Ca href=\"https:\u002F\u002Farxiv.org\u002Fabs\u002F2505.21925\">\u003Cstrong>arXiv\u003C\u002Fstrong>\u003C\u002Fa>\n    |\n    \u003Ca href=\"https:\u002F\u002Frenderformer.github.io\u002Fpdfs\u002Frenderformer-paper.pdf\">\u003Cstrong>Paper\u003C\u002Fstrong>\u003C\u002Fa>\n    |\n    \u003Ca href=\"https:\u002F\u002Fhuggingface.co\u002Fmicrosoft\u002Frenderformer-v1.1-swin-large\">\u003Cstrong>Model\u003C\u002Fstrong>\u003C\u002Fa>\n    |\n    \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Frenderformer\">\u003Cstrong>Official Code\u003C\u002Fstrong>\u003C\u002Fa>\n  \u003C\u002Fp>\n\u003C\u002Fp>\n\nRenderFormer is a neural rendering pipeline that directly renders an image from a triangle-based representation of a scene with full global illumination effects and that does not require per-scene training or fine-tuning. Instead of taking a physics-centric approach to rendering, we formulate rendering as a sequence-to-sequence transformation where a sequence of tokens representing triangles with reflectance properties is converted to a sequence of output tokens representing small patches of pixels. RenderFormer follows a two stage pipeline: a view-independent stage that models triangle-to-triangle light transport, and a view-dependent stage that transforms a token representing a bundle of rays to the corresponding pixel values guided by the triangle-sequence from the the view-independent stage. Both stages are based on the transformer architecture and are learned with minimal prior constraints. We demonstrate and evaluate RenderFormer on scenes with varying complexity in shape and light transport.\n\n# Table of Content\n\n- [Installation](#installation)\n  * [Prerequisites](#prerequisites)\n  * [Environment Setup](#environment-setup)\n  * [Pretrained Models](#pretrained-models)\n- [Usage](#usage-1)\n  * [Image Rendering](#image-rendering)\n    + [Scene Conversion](#scene-conversion)\n    + [Rendering a Single Image Using Inference Script](#rendering-a-single-image-using-inference-script)\n      - [Available Arguments of the Inference Script](#available-arguments-of-the-inference-script)\n    + [Inference with `RenderFormerRenderingPipeline`](#inference-with--renderformerrenderingpipeline-)\n  * [Video Rendering](#video-rendering)\n    + [Download Example Data](#download-example-data)\n    + [Rendering a Video Using Inference Script](#rendering-a-video-using-inference-script)\n      - [Available Arguments of the Inference Script](#available-arguments-of-the-inference-script-1)\n- [Bring Your Own Scene!](#bring-your-own-scene-)\n  * [Scene Definition JSON](#scene-definition-json)\n    + [Scene Structure](#scene-structure)\n    + [Object Configuration](#object-configuration)\n    + [Camera Configuration](#camera-configuration)\n    + [Example Scene](#example-scene)\n    + [HDF5 Data Fields](#hdf5-data-fields)\n    + [Remesh Objects](#remesh-objects)\n    + [Rendering Reference with Blender](#rendering-reference-with-blender)\n  * [Blender Extension](#blender-extension)\n  * [Scene Setting Tips](#scene-setting-tips)\n- [Acknowledgements](#acknowledgements)\n- [License](#license)\n- [Citation](#citation)\n\n# Installation\n\n## Prerequisites\n\n- **System**: The code is tested on **Linux**, **MacOS** and **Windows**.\n- **Hardware**: The code has been tested on both **NVIDIA CUDA GPUs** and **Apple Metal GPUs**. The minimal GPU memory requirement is 8GB.\n\n## Environment Setup\n\nFirst set up an environment with PyTorch 2.0+. For CUDA users, you can install Flash Attention from https:\u002F\u002Fgithub.com\u002FDao-AILab\u002Fflash-attention.\n\nThe rest of the dependencies can be installed through:\n\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Frenderformer\ncd renderformer\npip install -r requirements.txt\npython3 -c \"import imageio; imageio.plugins.freeimage.download()\"  # Needed for HDR image IO\n```\n\n## Pretrained Models\n\n| Model | Params | Link | Model ID |\n|-------|--------|------|----------|\n| RenderFormer-V1-Base | 205M | [Hugging Face](https:\u002F\u002Fhuggingface.co\u002Fmicrosoft\u002Frenderformer-v1-base) | `microsoft\u002Frenderformer-v1-base` |\n| RenderFormer-V1.1-Large | 483M | [Hugging Face](https:\u002F\u002Fhuggingface.co\u002Fmicrosoft\u002Frenderformer-v1.1-swin-large) | `microsoft\u002Frenderformer-v1.1-swin-large` |\n\n\u003Cdetails>\n\u003Csummary>Note on the released models\u003C\u002Fsummary>\n\u003Cbr>\nWe found a shader bug in the training data that we used in the submission. We re-trained the models with the corrected shader and released the new models. Thus the model performance and output might be different from the results in the paper.\n\u003C\u002Fdetails>\n\n# Usage\n\n## Image Rendering\n\n### Scene Conversion\n\nWe put example scene config JSON files at [`examples`](.\u002Fexamples). To render a scene, first convert a scene config JSON file into our HDF5 scene format:\n\n```bash\npython3 scene_processor\u002Fconvert_scene.py examples\u002Fcbox.json --output_h5_path tmp\u002Fcbox\u002Fcbox.h5\n```\n\n### Rendering a Single Image Using Inference Script\n\n```bash\npython3 infer.py --h5_file tmp\u002Fcbox\u002Fcbox.h5 --output_dir output\u002Fcbox\u002F\n```\n\nYou should now see `output\u002Fcbox\u002Fcbox_view_0.exr` and `output\u002Fcbox\u002Fcbox_view_0.png` under your output folder. `.exr` is the HDR Linear output from RenderFormer, and `.png` is the LDR version of the rendered image. You can enable different tone mappers through `--tone_mapper` to achieve better visual results.\n\nThe script will automatically fallback to use torch scaled dot product attention if Flash Attention is not found on the system. We also provide an environment `ATTN_IMPL` for you to choose which attention implementation to use:\n\n```bash\n# Use SDPA intentionally\nATTN_IMPL=sdpa python3 infer.py --h5_file tmp\u002Fcbox\u002Fcbox.h5 --output_dir output\u002Fcbox\u002F\n```\n\nPlease check the [image render shell script](.\u002Frender-images.sh) for more examples.\n\n#### Available Arguments of the Inference Script\n\n```bash\n--h5_file H5_FILE     Path to the input H5 file\n--model_id MODEL_ID   Model ID on Hugging Face or local path\n--precision {bf16,fp16,fp32}\n                      Precision for inference (Default: fp16)\n--resolution RESOLUTION\n                      Resolution for inference (Default: 512)\n--output_dir OUTPUT_DIR\n                      Output directory (Default: same as input H5 file)\n--tone_mapper {none,agx,filmic,pbr_neutral}\n                      Tone mapper for inference (Default: none)\n```\n\n### Inference with `RenderFormerRenderingPipeline`\n\nYou can achieve batch rendering with `RenderFormerRenderingPipeline` by providing a batch of input scene and rendering camera parameters.\n\nMinimal example (without meaningful inputs, just for testing):\n\n```python\nimport torch\nfrom renderformer import RenderFormerRenderingPipeline\n\npipeline = RenderFormerRenderingPipeline.from_pretrained(\"microsoft\u002Frenderformer-v1.1-swin-large\")\ndevice = torch.device('cuda')\npipeline.to(device)\n\nBATCH_SIZE = 2\nNUM_TRIANGLES = 1024\nTEX_PATCH_SIZE = 32\nNUM_VIEWS = 4\n\ntriangles = torch.randn((BATCH_SIZE, NUM_TRIANGLES, 3, 3), device=device)\ntexture = torch.randn((BATCH_SIZE, NUM_TRIANGLES, 13, TEX_PATCH_SIZE, TEX_PATCH_SIZE), device=device)\nmask = torch.ones((BATCH_SIZE, NUM_TRIANGLES), dtype=torch.bool, device=device)\nvn = torch.randn((BATCH_SIZE, NUM_TRIANGLES, 3, 3), device=device)\nc2w = torch.randn((BATCH_SIZE, NUM_VIEWS, 4, 4), device=device)\nfov = torch.randn((BATCH_SIZE, NUM_VIEWS, 1), device=device)\n\nrendered_imgs = pipeline(\n    triangles=triangles,\n    texture=texture,\n    mask=mask,\n    vn=vn,\n    c2w=c2w,\n    fov=fov,\n    resolution=512,\n    torch_dtype=torch.float16,\n)\nprint(\"Inference completed. Rendered Linear HDR images shape:\", rendered_imgs.shape)\n# Inference completed. Rendered Linear HDR images shape: torch.Size([2, 4, 512, 512, 3])\n```\n\nPlease check [`infer.py`](.\u002Finfer.py) and [`rendering_pipeline.py`](.\u002Frenderformer\u002Fpipelines\u002Frendering_pipeline.py) for detailed usages.\n\n## Video Rendering\n\n### Download Example Data\n\nWe put example video input data on [Hugging Face](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Frenderformer\u002Frenderformer-video-data). You can download and unzip them with [this script](.\u002Fdownload_video_data.sh).\n\n### Rendering a Video Using Inference Script\n\n```bash\npython3 batch_infer.py --h5_folder renderformer-video-data\u002Fsubmission-videos\u002Fcbox-roughness\u002F --output_dir output\u002Fvideos\u002Fcbox-roughness\n```\n\nPlease check the [video render shell script](.\u002Frender-videos.sh) for more examples.\n\n#### Available Arguments of the Inference Script\n\n```bash\n--h5_folder H5_FOLDER\n                      Path to the folder containing input H5 files\n--model_id MODEL_ID   Model ID on Hugging Face or local path\n--precision {bf16,fp16,fp32}\n                      Precision for inference\n--resolution RESOLUTION\n                      Resolution for inference\n--batch_size BATCH_SIZE\n                      Batch size for inference\n--padding_length PADDING_LENGTH\n                      Padding length for inference\n--num_workers NUM_WORKERS\n                      Number of workers for data loading\n--output_dir OUTPUT_DIR\n                      Output directory for rendered images (default: same as input folder)\n--save_video          Merge rendered images into a video at video.mp4.\n--tone_mapper {none,agx,filmic,pbr_neutral}\n                      Tone mapper for inference\n```\n\n# Bring Your Own Scene!\n\n## Scene Definition JSON\n\nRenderFormer uses a JSON-based scene description format that defines the geometry, materials, lighting, and camera setup for your scene. The scene configuration is defined using a hierarchical structure with the following key components:\n\n### Scene Structure\n\n- `scene_name`: A descriptive name for your scene\n- `version`: The version of the scene description format (currently \"1.0\")\n- `objects`: A dictionary of objects in the scene, including both geometry and lighting\n- `cameras`: A list of camera configurations for rendering\n\n### Object Configuration\n\nEach object in the scene requires:\n\n- `mesh_path`: Path to the .obj mesh file\n- `material`: Material properties including:\n  - `diffuse`: RGB diffuse color [r, g, b]\n  - `specular`: RGB specular color [r, g, b] (We currently only support white specular, and diffuse + specular should be no larger than 1.0)\n  - `roughness`: Surface roughness (0.01 to 1.0)\n  - `emissive`: RGB emission color [r, g, b] (We currently only support white emission, and only on light source triangles)\n  - `smooth_shading`: Whether to use smooth shading on this object\n  - `rand_tri_diffuse_seed`: Optional seed for random triangle coloring, if none, use the diffuse color directly\n  - `random_diffuse_max`: Maximum value for random diffuse color assignment (max diffuse color + specular color should be no larger than 1.0)\n  - `random_diffuse_type`: Type of random diffuse color assignment, either per triangle or per shading group\n- `transform`: Object transformation including:\n  - `translation`: [x, y, z] position\n  - `rotation`: [x, y, z] rotation in degrees\n  - `scale`: [x, y, z] scale factors\n  - `normalize`: Whether to normalize object to unit sphere\n- `remesh`: Whether to remesh the object\n- `remesh_target_face_num`: Target face number of the remeshed object\n\n### Camera Configuration\n\nEach camera requires:\n\n- `position`: [x, y, z] camera position\n- `look_at`: [x, y, z] target point\n- `up`: [x, y, z] up vector\n- `fov`: Field of view in degrees\n\n### Example Scene\n\nWe recommend start from the `examples\u002Finit-template.json` and modify it to your needs. For more complex examples, refer to the scene configurations in the `examples` directory.\n\n### HDF5 Data Fields\n\nThe HDF5 file contains the following fields:\n\n- `triangles`: [N, 3, 3] array of triangle vertices\n- `texture`: [N, 13, 32, 32] array of texture patches\n- `vn`: [N, 3, 3] array of vertex normals\n- `c2w`: [N, 4, 4] array of camera-to-world matrices\n- `fov`: [N] array of field of view\n\nWe use the same camera coordinate system as Blender (-Z = view direction, +Y = up, +X = right), be mindful of the coordinate system when implementing your own HDF5 converter.\n\nPlease refer to [`scene_processor\u002Fto_h5.py`](.\u002Fscene_processor\u002Fto_h5.py) for more details.\n\n### Remesh Objects\n\nWe provide a simple remeshing tool in [`scene_processor\u002Fremesh.py`](.\u002Fscene_processor\u002Fremesh.py). You can use it to remesh your objects before putting them into the scene.\n\nWe also provide fields in the scene config JSON file (`remesh` and `remesh_target_face_num`) to allow you to remesh the object during scene conversion process.\n\n```bash\npython3 scene_processor\u002Fremesh.py --input path\u002Fto\u002Fyour\u002Fhigh_res_mesh.obj ----output remeshed_object.obj --target_face_num 1024\n```\n\n### Rendering Reference with Blender\n\nWe provide a script at [`scene_processor\u002Fto_blend.py`](.\u002Fscene_processor\u002Fto_blend.py) to render the reference images with Blender. It converts the JSON described scene into a Blender scene, so you can render the scene with Blender or save it as a blend file for later use.\n\nExample usage:\n\n```bash\npython3 scene_processor\u002Fto_blend.py examples\u002Fcbox.json --output_dir tmp\u002Fcbox --dump_blend --save_img --spp 4096\n```\n\nFor detailed usage, please check the script.\n\n## Blender Extension\n\nWe provide a Blender Extension to simplify the process of setting up a scene for RenderFormer. Please refer to the [Blender Extension](https:\u002F\u002Fgithub.com\u002FiamNCJ\u002Frenderformer-blender-extension) for more details.\n\n## Scene Setting Tips\n\n1. Always start from the `examples\u002Finit-template.json`.\n2. Please limit the scene in our training data range, extrapolation can work but not guaranteed.\n   - Camera distance to scene center in [1.5, 2.0], fov in [30, 60] degrees\n   - Scene bounding box in [-0.5, 0.5] in x, y, z\n   - Light sources: up to 8 triangles (please use the triangle mesh at [`examples\u002Ftemplates\u002Flighting\u002Ftri.obj`](.\u002Fexamples\u002Ftemplates\u002Flighting\u002Ftri.obj)), each scale in [2.0, 2.5], distance to scene center in [2.1, 2.7], emission values summed in [2500, 5000]\n   - Total number of triangles: training data covers up to 4096 triangles, but extending to 8192 triangles during inference usually still works.\n   - All training objects are water-tight and simplified with QSlim. Uniform triangle sizes are preferred. If you find your object not working, try to remesh it with our provided script or other remeshing tools.\n\n# Acknowledgements\n\nWe borrowed some code from the following repositories. We thank the authors for their contributions.\n\n- [DPT](https:\u002F\u002Fgithub.com\u002Fisl-org\u002FDPT\u002F)\n- [nerfstudio](https:\u002F\u002Fgithub.com\u002Fnerfstudio-project\u002Fnerfstudio\u002F)\n- [rotary-embedding-torch](https:\u002F\u002Fgithub.com\u002Flucidrains\u002Frotary-embedding-torch)\n\nIn addition to the 3D model from [Objaverse](https:\u002F\u002Fobjaverse.allenai.org\u002F), we express our profound appreciation to the contributors of the 3D models that we used in the examples.\n\n- [**Shader Ball**](https:\u002F\u002Frgl.s3.eu-central-1.amazonaws.com\u002Fscenes\u002Fmatpreview.zip): by Wenzel Jakob from [Mitsuba Gallery](https:\u002F\u002Fmitsuba.readthedocs.io\u002Fen\u002Fstable\u002Fsrc\u002Fgallery.html)\n- **Stanford Bunny & Lucy**: from [The Stanford 3D Scanning Repository](https:\u002F\u002Fgraphics.stanford.edu\u002Fdata\u002F3Dscanrep\u002F)\n- **Cornell Box**: from [Cornell Box Data, Cornell University Program of Computer Graphics](https:\u002F\u002Fwww.graphics.cornell.edu\u002Fonline\u002Fbox\u002F)\n- **Utah Teapot**: from [Utah Model Repository](https:\u002F\u002Fusers.cs.utah.edu\u002F~dejohnso\u002Fmodels\u002Fteapot.html)\n- **Veach MIS**: From Eric Veach and Leonidas J. Guibas. 1995. Optimally combining sampling techniques for Monte Carlo rendering\n- **Spot**: By Keenan Crane from [Keenan's 3D Model Repository](https:\u002F\u002Fwww.cs.cmu.edu\u002F~kmcrane\u002FProjects\u002FModelRepository\u002F)\n- [**Klein Bottle**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fklein-bottle-ce95eaceb29544aaa47db7a586811b09): By Fausto Javier Da Rosa \n- **Constant Width**: Original mesh from [Small volume bodies of constant width](https:\u002F\u002Farxiv.org\u002Fabs\u002F2405.18501). Derived mesh from [Keenan's 3D Model Repository](https:\u002F\u002Fwww.cs.cmu.edu\u002F~kmcrane\u002FProjects\u002FModelRepository\u002F)\n- [**Jewelry**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fjewelry-4373121e41f94727bb802b78ce6b566f): By elbenZ\n- [**Banana**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fbanana-923ea2b1927d4c24ab88860ee416ce6c), [**Easter Basket**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Feaster-basket-427232bfdd344c46ab878c4bfcb8f904), [**Water Bottle**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fwater-bottle-c002e2aa017d49ae8e6dbd295cca3914), [**Bronco**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fbronco-37e3760bfde44beeb39e6fd69b690637), [**Heart**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fheart-puzzle-e762b2d7de6749e79f54e0e6a0ff96be): By Microsoft\n- [**Lowpoly Fox**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Flowpoly-fox-01674a892f414c0681afdeb563cc8e13): By Vlad Zaichyk\n- [**Lowpoly Crystals**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Flowpoly-crystals-d94bccb1305d409482eba04d736fb7dd): By Mongze\n- [**Bowling Pin**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fbowling-pin-e0be50c74a4a479f8d412a4050f8fe05): By SINOFWRATH\n- [**Cube Cascade**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fcube-cascade-11ba2667998d45f8b3ad178b27a34725), [**Marching Cubes**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fmarching-cubes-4d2a15a193764305875c62bd5dff757d): By Tycho Magnetic Anomaly\n- [**Dancing Crab**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fdancing-crab-uca-mjoebergi-280863886fee409ab3c8168f07caa89f): By Bohdan Lvov\n- [**Magical Gyroscope**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fmagical-gyroscope-a3f110c0d2944a2dbff78709eb8f3984): By reddification\n- [**Capoeira Cube**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fcapoeira-cube-753c8e3fdab64478b497fc5c6d8c88d9): By mortaleiros\n- [**P.U.C. Security Bot**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fpuc-security-bot-7-ee0a6da142b94d2bbf1d65526bec3d3e): By Gouhadouken\n\n# License\n\nRenderFormer model and the majority of the code are licensed under the MIT License. The following submodules may have different licenses:\n\n- [renderformer-liger-kernel](https:\u002F\u002Fgithub.com\u002FiamNCJ\u002Frenderformer-liger-kernel): Redistributed Liger Kernel for RenderFormer integration. It's derived from original [Liger Kernel](https:\u002F\u002Fgithub.com\u002Flinkedin\u002FLiger-Kernel) and licensed under the BSD 2-Clause \"Simplified\" [License](https:\u002F\u002Fgithub.com\u002FiamNCJ\u002Frenderformer-liger-kernel\u002Fblob\u002Fmain\u002FLICENSE.txt).\n- [simple-ocio](https:\u002F\u002Fgithub.com\u002FiamNCJ\u002Fsimple-ocio): We use this tool to simplify OpenColorIO usage for tone-mapping. This package redistributes the complete [Blender Color Management](https:\u002F\u002Fprojects.blender.org\u002Fblender\u002Fblender\u002Fsrc\u002Fbranch\u002Fmain\u002Frelease\u002Fdatafiles\u002Fcolormanagement) directory. The full license text is available at [ocio-license.txt](https:\u002F\u002Fgithub.com\u002FiamNCJ\u002Fsimple-ocio\u002Fblob\u002Fmain\u002Fsimple_ocio\u002Focio_data\u002Focio-license.txt) and the headers of each configuration file. The package itself is still licensed under the MIT [License](https:\u002F\u002Fgithub.com\u002FiamNCJ\u002Fsimple-ocio\u002Fblob\u002Fmain\u002FLICENSE).\n\n# Citation\n\nIf you find this work helpful, please cite our paper:\n\n```bibtex\n@inproceedings {zeng2025renderformer,\n    title      = {RenderFormer: Transformer-based Neural Rendering of Triangle Meshes with Global Illumination},\n    author     = {Chong Zeng and Yue Dong and Pieter Peers and Hongzhi Wu and Xin Tong},\n    booktitle  = {ACM SIGGRAPH 2025 Conference Papers},\n    year       = {2025}\n}\n```\n","\u003Cp align=\"center\">\n\n  \u003Cdiv align=\"center\">\n    \u003Cimg width=320 src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmicrosoft_renderformer_readme_a384454e9efb.png\">\n  \u003C\u002Fdiv>\n\n  \u003Ch1 align=\"center\">RenderFormer：基于Transformer的三角网格全局光照神经渲染\u003C\u002Fh1>\n  \u003Cp align=\"center\">\n    \u003Ca href=\"https:\u002F\u002Fwww.chong-zeng.com\u002F\">\u003Cstrong>曾冲\u003C\u002Fstrong>\u003C\u002Fa>\n    ·\n    \u003Ca href=\"https:\u002F\u002Fyuedong.shading.me\u002F\">\u003Cstrong>董岳\u003C\u002Fstrong>\u003C\u002Fa>\n    ·\n    \u003Ca href=\"https:\u002F\u002Fwww.cs.wm.edu\u002F~ppeers\u002F\">\u003Cstrong>皮特·皮尔斯\u003C\u002Fstrong>\u003C\u002Fa>\n    ·\n    \u003Ca href=\"https:\u002F\u002Fsvbrdf.github.io\u002F\">\u003Cstrong>吴洪志\u003C\u002Fstrong>\u003C\u002Fa>\n    ·\n    \u003Ca href=\"https:\u002F\u002Fscholar.google.com\u002Fcitations?user=P91a-UQAAAAJ&hl=en\">\u003Cstrong>佟欣\u003C\u002Fstrong>\u003C\u002Fa>\n  \u003C\u002Fp>\n  \u003Ch2 align=\"center\">SIGGRAPH 2025 会议论文\u003C\u002Fh2>\n\n  \u003Cdiv align=\"center\">\n    \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmicrosoft_renderformer_readme_63655cc38a2d.jpg\">\n    使用RenderFormer渲染的基于三角网格的场景示例，无需针对每个场景进行训练或微调，即可呈现（多重）镜面反射、复杂阴影、漫反射间接光照、光泽反射、软硬阴影以及多个光源。\n  \u003C\u002Fdiv>\n\n  \u003Cp align=\"center\">\n  \u003Cbr>\n    \u003Ca href=\"https:\u002F\u002Fmicrosoft.github.io\u002Frenderformer\u002F\">\u003Cstrong>项目主页\u003C\u002Fstrong>\u003C\u002Fa>\n    |\n    \u003Ca href=\"https:\u002F\u002Farxiv.org\u002Fabs\u002F2505.21925\">\u003Cstrong>arXiv\u003C\u002Fstrong>\u003C\u002Fa>\n    |\n    \u003Ca href=\"https:\u002F\u002Frenderformer.github.io\u002Fpdfs\u002Frenderformer-paper.pdf\">\u003Cstrong>论文\u003C\u002Fstrong>\u003C\u002Fa>\n    |\n    \u003Ca href=\"https:\u002F\u002Fhuggingface.co\u002Fmicrosoft\u002Frenderformer-v1.1-swin-large\">\u003Cstrong>模型\u003C\u002Fstrong>\u003C\u002Fa>\n    |\n    \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Frenderformer\">\u003Cstrong>官方代码\u003C\u002Fstrong>\u003C\u002Fa>\n  \u003C\u002Fp>\n\u003C\u002Fp>\n\nRenderFormer是一种神经渲染管线，能够直接从基于三角形的场景表示中渲染出包含完整全局光照效果的图像，且无需针对每个场景进行训练或微调。我们并未采用以物理为中心的渲染方法，而是将渲染建模为一种序列到序列的转换：将代表具有反射属性的三角形的标记序列转换为代表像素小块的输出标记序列。RenderFormer遵循两阶段流程：一个与视点无关的阶段，用于模拟三角形之间的光线传输；另一个与视点相关的阶段，根据前一阶段生成的三角形序列，将代表光线束的标记转换为对应的像素值。这两个阶段均基于Transformer架构，并在极少先验约束的情况下进行学习。我们在形状和光线传输复杂度各异的场景上展示了并评估了RenderFormer。\n\n# 目录\n\n- [安装](#installation)\n  * [先决条件](#prerequisites)\n  * [环境设置](#environment-setup)\n  * [预训练模型](#pretrained-models)\n- [使用](#usage-1)\n  * [图像渲染](#image-rendering)\n    + [场景转换](#scene-conversion)\n    + [使用推理脚本渲染单张图像](#rendering-a-single-image-using-inference-script)\n      - [推理脚本可用参数](#available-arguments-of-the-inference-script)\n    + [使用`RenderFormerRenderingPipeline`进行推理](#inference-with--renderformerrenderingpipeline-)\n  * [视频渲染](#video-rendering)\n    + [下载示例数据](#download-example-data)\n    + [使用推理脚本渲染视频](#rendering-a-video-using-inference-script)\n      - [推理脚本可用参数](#available-arguments-of-the-inference-script-1)\n- [自定义场景！](#bring-your-own-scene-)\n  * [场景定义JSON](#scene-definition-json)\n    + [场景结构](#scene-structure)\n    + [对象配置](#object-configuration)\n    + [相机配置](#camera-configuration)\n    + [示例场景](#example-scene)\n    + [HDF5数据字段](#hdf5-data-fields)\n    + [重新网格化对象](#remesh-objects)\n    + [Blender渲染参考](#rendering-reference-with-blender)\n  * [Blender插件](#blender-extension)\n  * [场景设置技巧](#scene-setting-tips)\n- [致谢](#acknowledgements)\n- [许可证](#license)\n- [引用](#citation)\n\n# 安装\n\n## 先决条件\n\n- **系统**：代码已在**Linux**、**MacOS**和**Windows**上测试通过。\n- **硬件**：代码已在**NVIDIA CUDA GPU**和**Apple Metal GPU**上测试过。最低GPU显存要求为8GB。\n\n## 环境设置\n\n首先搭建一个支持PyTorch 2.0+的环境。对于CUDA用户，可以从https:\u002F\u002Fgithub.com\u002FDao-AILab\u002Fflash-attention安装Flash Attention。\n\n其余依赖项可通过以下命令安装：\n\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Frenderformer\ncd renderformer\npip install -r requirements.txt\npython3 -c \"import imageio; imageio.plugins.freeimage.download()\"  # 用于HDR图像输入输出\n```\n\n## 预训练模型\n\n| 模型 | 参数量 | 链接 | 模型ID |\n|-------|--------|------|----------|\n| RenderFormer-V1-Base | 2.05亿 | [Hugging Face](https:\u002F\u002Fhuggingface.co\u002Fmicrosoft\u002Frenderformer-v1-base) | `microsoft\u002Frenderformer-v1-base` |\n| RenderFormer-V1.1-Large | 4.83亿 | [Hugging Face](https:\u002F\u002Fhuggingface.co\u002Fmicrosoft\u002Frenderformer-v1.1-swin-large) | `microsoft\u002Frenderformer-v1.1-swin-large` |\n\n\u003Cdetails>\n\u003Csummary>关于已发布模型的说明\u003C\u002Fsummary>\n\u003Cbr>\n我们在提交时使用的训练数据中发现了一个着色器错误。我们使用修正后的着色器重新训练了模型，并发布了新版本。因此，模型性能和输出可能与论文中的结果有所不同。\n\u003C\u002Fdetails>\n\n# 使用\n\n## 图像渲染\n\n### 场景转换\n\n我们在[`examples`](.\u002Fexamples)目录下提供了示例场景配置JSON文件。要渲染场景，首先需将场景配置JSON文件转换为我们的HDF5场景格式：\n\n```bash\npython3 scene_processor\u002Fconvert_scene.py examples\u002Fcbox.json --output_h5_path tmp\u002Fcbox\u002Fcbox.h5\n```\n\n### 使用推理脚本渲染单张图像\n\n```bash\npython3 infer.py --h5_file tmp\u002Fcbox\u002Fcbox.h5 --output_dir output\u002Fcbox\u002F\n```\n\n此时，您应在输出文件夹中看到`output\u002Fcbox\u002Fcbox_view_0.exr`和`output\u002Fcbox\u002Fcbox_view_0.png`。`.exr`是RenderFormer输出的HDR线性图像，而`.png`则是渲染图像的LDR版本。您可以通过`--tone_mapper`参数启用不同的色调映射器，以获得更好的视觉效果。\n\n如果系统中未找到Flash Attention，脚本会自动回退到使用PyTorch的缩放点积注意力。我们还提供了一个环境变量`ATTN_IMPL`，供您选择使用哪种注意力实现：\n\n```bash\n\n# 有意使用 SDPA\nATTN_IMPL=sdpa python3 infer.py --h5_file tmp\u002Fcbox\u002Fcbox.h5 --output_dir output\u002Fcbox\u002F\n```\n\n更多示例请查看 [图像渲染 Shell 脚本](.\u002Frender-images.sh)。\n\n#### 推理脚本可用参数\n\n```bash\n--h5_file H5_FILE     输入 H5 文件路径\n--model_id MODEL_ID   Hugging Face 上的模型 ID 或本地路径\n--precision {bf16,fp16,fp32}\n                      推理精度（默认：fp16）\n--resolution RESOLUTION\n                      推理分辨率（默认：512）\n--output_dir OUTPUT_DIR\n                      输出目录（默认：与输入 H5 文件相同）\n--tone_mapper {none,agx,filmic,pbr_neutral}\n                      推理色调映射器（默认：无）\n```\n\n### 使用 `RenderFormerRenderingPipeline` 进行推理\n\n通过提供一批输入场景和渲染相机参数，您可以使用 `RenderFormerRenderingPipeline` 实现批量渲染。\n\n最小示例（无实际意义的输入，仅用于测试）：\n\n```python\nimport torch\nfrom renderformer import RenderFormerRenderingPipeline\n\npipeline = RenderFormerRenderingPipeline.from_pretrained(\"microsoft\u002Frenderformer-v1.1-swin-large\")\ndevice = torch.device('cuda')\npipeline.to(device)\n\nBATCH_SIZE = 2\nNUM_TRIANGLES = 1024\nTEX_PATCH_SIZE = 32\nNUM_VIEWS = 4\n\ntriangles = torch.randn((BATCH_SIZE, NUM_TRIANGLES, 3, 3), device=device)\ntexture = torch.randn((BATCH_SIZE, NUM_TRIANGLES, 13, TEX_PATCH_SIZE, TEX_PATCH_SIZE), device=device)\nmask = torch.ones((BATCH_SIZE, NUM_TRIANGLES), dtype=torch.bool, device=device)\nvn = torch.randn((BATCH_SIZE, NUM_TRIANGLES, 3, 3), device=device)\nc2w = torch.randn((BATCH_SIZE, NUM_VIEWS, 4, 4), device=device)\nfov = torch.randn((BATCH_SIZE, NUM_VIEWS, 1), device=device)\n\nrendered_imgs = pipeline(\n    triangles=triangles,\n    texture=texture,\n    mask=mask,\n    vn=vn,\n    c2w=c2w,\n    fov=fov,\n    resolution=512,\n    torch_dtype=torch.float16,\n)\nprint(\"推理完成。渲染的线性 HDR 图像形状：\", rendered_imgs.shape)\n# 推理完成。渲染的线性 HDR 图像形状：torch.Size([2, 4, 512, 512, 3])\n```\n\n详细用法请参阅 [`infer.py`](.\u002Finfer.py) 和 [`rendering_pipeline.py`](.\u002Frenderformer\u002Fpipelines\u002Frendering_pipeline.py)。\n\n## 视频渲染\n\n### 下载示例数据\n\n我们已将示例视频输入数据放置在 [Hugging Face](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Frenderformer\u002Frenderformer-video-data) 上。您可以通过 [此脚本](.\u002Fdownload_video_data.sh) 下载并解压这些数据。\n\n### 使用推理脚本渲染视频\n\n```bash\npython3 batch_infer.py --h5_folder renderformer-video-data\u002Fsubmission-videos\u002Fcbox-roughness\u002F --output_dir output\u002Fvideos\u002Fcbox-roughness\n```\n\n更多示例请查看 [视频渲染 Shell 脚本](.\u002Frender-videos.sh)。\n\n#### 推理脚本可用参数\n\n```bash\n--h5_folder H5_FOLDER\n                      包含输入 H5 文件的文件夹路径\n--model_id MODEL_ID   Hugging Face 上的模型 ID 或本地路径\n--precision {bf16,fp16,fp32}\n                      推理精度\n--resolution RESOLUTION\n                      推理分辨率\n--batch_size BATCH_SIZE\n                      推理批次大小\n--padding_length PADDING_LENGTH\n                      推理填充长度\n--num_workers NUM_WORKERS\n                      数据加载的工作进程数\n--output_dir OUTPUT_DIR\n                      渲染图像的输出目录（默认：与输入文件夹相同）\n--save_video          将渲染图像合并为 video.mp4 视频文件。\n--tone_mapper {none,agx,filmic,pbr_neutral}\n                      推理色调映射器\n```\n\n# 自定义场景！\n\n## 场景定义 JSON\n\nRenderFormer 使用基于 JSON 的场景描述格式，用于定义场景的几何、材质、光照和相机设置。场景配置采用分层结构，包含以下关键组件：\n\n### 场景结构\n\n- `scene_name`: 场景的描述性名称\n- `version`: 场景描述格式的版本（当前为“1.0”）\n- `objects`: 场景中对象的字典，包括几何和光源\n- `cameras`: 渲染用相机配置列表\n\n### 对象配置\n\n场景中的每个对象需要：\n\n- `mesh_path`: .obj 网格文件的路径\n- `material`: 材质属性，包括：\n  - `diffuse`: RGB 漫反射颜色 [r, g, b]\n  - `specular`: RGB 镜面反射颜色 [r, g, b]（目前仅支持白色镜面反射，且漫反射 + 镜面反射之和不得超过 1.0）\n  - `roughness`: 表面粗糙度（0.01 至 1.0）\n  - `emissive`: RGB 发光颜色 [r, g, b]（目前仅支持白色发光，且仅限于光源三角形）\n  - `smooth_shading`: 是否对此对象启用平滑着色\n  - `rand_tri_diffuse_seed`: 随机三角形着色的可选种子，若未指定，则直接使用漫反射颜色\n  - `random_diffuse_max`: 随机漫反射颜色的最大值（漫反射 + 镜面反射之和不得超过 1.0）\n  - `random_diffuse_type`: 随机漫反射颜色分配类型，可按三角形或着色组分配\n- `transform`: 对象变换，包括：\n  - `translation`: [x, y, z] 位置\n  - `rotation`: [x, y, z] 旋转角度\n  - `scale`: [x, y, z] 缩放因子\n  - `normalize`: 是否将对象归一化到单位球体\n- `remesh`: 是否对对象进行重新网格化\n- `remesh_target_face_num`: 重新网格化后的目标面数\n\n### 相机配置\n\n每个相机需要：\n\n- `position`: [x, y, z] 相机位置\n- `look_at`: [x, y, z] 目标点\n- `up`: [x, y, z] 上方向量\n- `fov`: 视场角（以度为单位）\n\n### 示例场景\n\n建议从 `examples\u002Finit-template.json` 开始，并根据需求进行修改。对于更复杂的场景，请参考 `examples` 目录中的场景配置。\n\n### HDF5 数据字段\n\nHDF5 文件包含以下字段：\n\n- `triangles`: [N, 3, 3] 三角形顶点数组\n- `texture`: [N, 13, 32, 32] 纹理贴图数组\n- `vn`: [N, 3, 3] 顶点法向量数组\n- `c2w`: [N, 4, 4] 相机到世界矩阵数组\n- `fov`: [N] 视场角数组\n\n我们使用的相机坐标系与 Blender 一致（-Z = 观察方向，+Y = 上方，+X = 右侧），在实现自己的 HDF5 转换器时请注意坐标系问题。\n\n更多详情请参阅 [`scene_processor\u002Fto_h5.py`](.\u002Fscene_processor\u002Fto_h5.py)。\n\n### 重新网格化对象\n\n我们在 [`scene_processor\u002Fremesh.py`](.\u002Fscene_processor\u002Fremesh.py) 中提供了一个简单的重新网格化工具。您可以在将对象放入场景之前使用该工具对其进行重新网格化。\n\n此外，场景配置 JSON 文件中还提供了 `remesh` 和 `remesh_target_face_num` 字段，允许您在场景转换过程中对对象进行重新网格化。\n\n```bash\npython3 scene_processor\u002Fremesh.py --input path\u002Fto\u002Fyour\u002Fhigh_res_mesh.obj ----output remeshed_object.obj --target_face_num 1024\n```\n\n### 使用 Blender 渲染参考图像\n\n我们提供了一个脚本 [`scene_processor\u002Fto_blend.py`](.\u002Fscene_processor\u002Fto_blend.py)，用于使用 Blender 渲染参考图像。该脚本会将 JSON 描述的场景转换为 Blender 场景，这样你就可以用 Blender 渲染场景，或者将其保存为 Blend 文件以供后续使用。\n\n示例用法：\n\n```bash\npython3 scene_processor\u002Fto_blend.py examples\u002Fcbox.json --output_dir tmp\u002Fcbox --dump_blend --save_img --spp 4096\n```\n\n有关详细用法，请查看该脚本。\n\n## Blender 扩展\n\n我们提供了一个 Blender 扩展，以简化为 RenderFormer 设置场景的过程。更多详情请参阅 [Blender 扩展](https:\u002F\u002Fgithub.com\u002FiamNCJ\u002Frenderformer-blender-extension)。\n\n## 场景设置提示\n\n1. 始终从 `examples\u002Finit-template.json` 开始。\n2. 请将场景限制在我们的训练数据范围内，外推虽然可能有效，但无法保证。\n   - 相机到场景中心的距离在 [1.5, 2.0] 米之间，视场角在 [30, 60] 度之间。\n   - 场景包围盒在 x、y、z 方向上的范围均为 [-0.5, 0.5]。\n   - 光源：最多 8 个三角形（请使用 [`examples\u002Ftemplates\u002Flighting\u002Ftri.obj`](.\u002Fexamples\u002Ftemplates\u002Flighting\u002Ftri.obj) 中的三角网格），每个光源的缩放比例在 [2.0, 2.5] 之间，到场景中心的距离在 [2.1, 2.7] 米之间，所有光源的总发光强度之和应在 [2500, 5000] 之间。\n   - 三角形总数：训练数据涵盖最多 4096 个三角形，但在推理时扩展到 8192 个三角形通常仍然可行。\n   - 所有训练对象都是水密且经过 QSlim 简化的。建议使用均匀的三角形大小。如果发现你的对象无法正常工作，可以尝试使用我们提供的脚本或其他网格重拓扑工具对其进行重新网格化。\n\n# 致谢\n\n我们借鉴了以下仓库中的一些代码，在此感谢作者们的贡献：\n\n- [DPT](https:\u002F\u002Fgithub.com\u002Fisl-org\u002FDPT\u002F)\n- [nerfstudio](https:\u002F\u002Fgithub.com\u002Fnerfstudio-project\u002Fnerfstudio\u002F)\n- [rotary-embedding-torch](https:\u002F\u002Fgithub.com\u002Flucidrains\u002Frotary-embedding-torch)\n\n除了来自 [Objaverse](https:\u002F\u002Fobjaverse.allenai.org\u002F) 的 3D 模型外，我们还对我们在示例中使用的 3D 模型的贡献者表示深深的感谢：\n\n- [**着色球**](https:\u002F\u002Frgl.s3.eu-central-1.amazonaws.com\u002Fscenes\u002Fmatpreview.zip)：由 Wenzel Jakob 来自 [Mitsuba Gallery](https:\u002F\u002Fmitsuba.readthedocs.io\u002Fen\u002Fstable\u002Fsrc\u002Fgallery.html)\n- **斯坦福兔子与露西**：来自 [斯坦福 3D 扫描库](https:\u002F\u002Fgraphics.stanford.edu\u002Fdata\u002F3Dscanrep\u002F)\n- **康奈尔盒子**：来自 [康奈尔大学计算机图形学项目中的康奈尔盒子数据](https:\u002F\u002Fwww.graphics.cornell.edu\u002Fonline\u002Fbox\u002F)\n- **犹他茶壶**：来自 [犹他模型库](https:\u002F\u002Fusers.cs.utah.edu\u002F~dejohnso\u002Fmodels\u002Fteapot.html)\n- **Veach MIS**：由 Eric Veach 和 Leonidas J. Guibas 提出。1995 年发表的论文《蒙特卡洛渲染中采样技术的最优组合》\n- **Spot**：由 Keenan Crane 来自 [Keenan 的 3D 模型库](https:\u002F\u002Fwww.cs.cmu.edu\u002F~kmcrane\u002FProjects\u002FModelRepository\u002F)\n- [**克莱因瓶**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fklein-bottle-ce95eaceb29544aaa47db7a586811b09)：由 Fausto Javier Da Rosa 创作\n- **常宽体**：原始网格来自 [小体积常宽体](https:\u002F\u002Farxiv.org\u002Fabs\u002F2405.18501)。衍生网格来自 [Keenan 的 3D 模型库](https:\u002F\u002Fwww.cs.cmu.edu\u002F~kmcrane\u002FProjects\u002FModelRepository\u002F)\n- [**珠宝**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fjewelry-4373121e41f94727bb802b78ce6b566f)：由 elbenZ 创作\n- [**香蕉**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fbanana-923ea2b1927d4c24ab88860ee416ce6c)、[**复活节篮子**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Feaster-basket-427232bfdd344c46ab878c4bfcb8f904)、[**水瓶**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fwater-bottle-c002e2aa017d49ae8e6dbd295cca3914)、[**野马车**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fbronco-37e3760bfde44beeb39e6fd69b690637)、[**心形拼图**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fheart-puzzle-e762b2d7de6749e79f54e0e6a0ff96be)：均由 Microsoft 创作\n- [**低多边形狐狸**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Flowpoly-fox-01674a892f414c0681afdeb563cc8e13)：由 Vlad Zaichyk 创作\n- [**低多边形水晶**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Flowpoly-crystals-d94bccb1305d409482eba04d736fb7dd)：由 Mongze 创作\n- [**保龄球瓶**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fbowling-pin-e0be50c74a4a479f8d412a4050f8fe05)：由 SINOFWRATH 创作\n- [**立方体瀑布**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fcube-cascade-11ba2667998d45f8b3ad178b27a34725)、[**漫步立方体**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fmarching-cubes-4d2a15a193764305875c62bd5dff757d)：由 Tycho Magnetic Anomaly 创作\n- [**跳舞的螃蟹**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fdancing-crab-uca-mjoebergi-280863886fee409ab3c8168f07caa89f)：由 Bohdan Lvov 创作\n- [**魔法陀螺仪**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fmagical-gyroscope-a3f110c0d2944a2dbff78709eb8f3984)：由 reddification 创作\n- [**卡波耶拉立方体**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fcapoeira-cube-753c8e3fdab64478b497fc5c6d8c88d9)：由 mortaleiros 创作\n- [**P.U.C. 安全机器人**](https:\u002F\u002Fsketchfab.com\u002F3d-models\u002Fpuc-security-bot-7-ee0a6da142b94d2bbf1d65526bec3d3e)：由 Gouhadouken 创作\n\n# 许可证\n\nRenderFormer 模型及大部分代码采用 MIT 许可证授权。以下子模块可能有不同的许可证：\n\n- [renderformer-liger-kernel](https:\u002F\u002Fgithub.com\u002FiamNCJ\u002Frenderformer-liger-kernel)：为 RenderFormer 集成而重新分发的 Liger Kernel。它源自原始的 [Liger Kernel](https:\u002F\u002Fgithub.com\u002Flinkedin\u002FLiger-Kernel)，并采用 BSD 2-Clause “简化” 许可证授权（详见 [LICENSE.txt](https:\u002F\u002Fgithub.com\u002FiamNCJ\u002Frenderformer-liger-kernel\u002Fblob\u002Fmain\u002FLICENSE.txt)）。\n- [simple-ocio](https:\u002F\u002Fgithub.com\u002FiamNCJ\u002Fsimple-ocio)：我们使用此工具来简化 OpenColorIO 在色调映射中的使用。该包重新分发了完整的 [Blender 颜色管理目录](https:\u002F\u002Fprojects.blender.org\u002Fblender\u002Fblender\u002Fsrc\u002Fbranch\u002Fmain\u002Frelease\u002Fdatafiles\u002Fcolormanagement)。完整的许可证文本可在 [ocio-license.txt](https:\u002F\u002Fgithub.com\u002FiamNCJ\u002Fsimple-ocio\u002Fblob\u002Fmain\u002Fsimple_ocio\u002Focio_data\u002Focio-license.txt) 以及各配置文件的头部找到。该包本身仍采用 MIT 许可证授权（详见 [LICENSE](https:\u002F\u002Fgithub.com\u002FiamNCJ\u002Fsimple-ocio\u002Fblob\u002Fmain\u002FLICENSE)）。\n\n# 引用\n\n如果您觉得这项工作对您有所帮助，请引用我们的论文：\n\n```bibtex\n@inproceedings {zeng2025renderformer,\n    title      = {RenderFormer: 基于 Transformer 的三角网格全局光照神经渲染},\n    author     = {Chong Zeng 和 Yue Dong 和 Pieter Peers 和 Hongzhi Wu 和 Xin Tong},\n    booktitle  = {ACM SIGGRAPH 2025 会议论文集},\n    year       = {2025}\n}\n```","# RenderFormer 快速上手指南\n\nRenderFormer 是一个基于 Transformer 的神经渲染管线，能够直接从三角形网格场景生成包含全局光照（如镜面反射、复杂阴影、漫反射间接照明等）的图像，且无需针对特定场景进行训练或微调。\n\n## 环境准备\n\n### 系统要求\n- **操作系统**：Linux、macOS 或 Windows\n- **硬件要求**：\n  - GPU：NVIDIA CUDA GPU 或 Apple Metal GPU\n  - 显存：最低 8GB\n\n### 前置依赖\n- Python 环境\n- PyTorch 2.0+\n- (可选) Flash Attention：CUDA 用户建议安装以提升性能\n\n## 安装步骤\n\n1. **克隆仓库并进入目录**\n   ```bash\n   git clone https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Frenderformer\n   cd renderformer\n   ```\n\n2. **安装依赖包**\n   ```bash\n   pip install -r requirements.txt\n   ```\n   > **提示**：国内用户可使用清华源加速安装：\n   > `pip install -r requirements.txt -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple`\n\n3. **下载 FreeImage 插件（用于 HDR 图像读写）**\n   ```bash\n   python3 -c \"import imageio; imageio.plugins.freeimage.download()\"\n   ```\n\n4. **(可选) 安装 Flash Attention**\n   如果是 NVIDIA GPU 用户，推荐安装：\n   ```bash\n   pip install flash-attn --no-build-isolation\n   ```\n\n5. **获取预训练模型**\n   模型将自动从 Hugging Face 下载，也可手动指定本地路径。\n   - **推荐模型**：`microsoft\u002Frenderformer-v1.1-swin-large` (483M 参数)\n   - **基础模型**：`microsoft\u002Frenderformer-v1-base` (205M 参数)\n\n## 基本使用\n\n以下流程演示如何将一个示例场景转换为渲染格式并生成图像。\n\n### 1. 场景转换\n首先将示例场景配置文件（JSON）转换为 RenderFormer 所需的 HDF5 格式：\n\n```bash\npython3 scene_processor\u002Fconvert_scene.py examples\u002Fcbox.json --output_h5_path tmp\u002Fcbox\u002Fcbox.h5\n```\n\n### 2. 渲染单张图像\n使用推理脚本对转换后的场景进行渲染：\n\n```bash\npython3 infer.py --h5_file tmp\u002Fcbox\u002Fcbox.h5 --output_dir output\u002Fcbox\u002F\n```\n\n**输出说明**：\n- `output\u002Fcbox\u002Fcbox_view_0.exr`：线性 HDR 输出结果\n- `output\u002Fcbox\u002Fcbox_view_0.png`：经过色调映射的 LDR 图片\n\n**常用参数**：\n- `--model_id`：指定模型 ID（默认自动加载）\n- `--resolution`：渲染分辨率（默认 512）\n- `--tone_mapper`：色调映射器，可选 `none`, `agx`, `filmic`, `pbr_neutral`\n\n示例（使用 Filmic 色调映射）：\n```bash\npython3 infer.py --h5_file tmp\u002Fcbox\u002Fcbox.h5 --output_dir output\u002Fcbox\u002F --tone_mapper filmic\n```\n\n### 3. Python 代码调用（批量推理）\n你也可以直接在 Python 代码中使用 `RenderFormerRenderingPipeline` 进行批量渲染：\n\n```python\nimport torch\nfrom renderformer import RenderFormerRenderingPipeline\n\n# 加载预训练模型\npipeline = RenderFormerRenderingPipeline.from_pretrained(\"microsoft\u002Frenderformer-v1.1-swin-large\")\ndevice = torch.device('cuda')\npipeline.to(device)\n\n# 构造输入数据示例 (实际使用时请替换为真实的场景数据)\nBATCH_SIZE = 2\nNUM_TRIANGLES = 1024\nTEX_PATCH_SIZE = 32\nNUM_VIEWS = 4\n\ntriangles = torch.randn((BATCH_SIZE, NUM_TRIANGLES, 3, 3), device=device)\ntexture = torch.randn((BATCH_SIZE, NUM_TRIANGLES, 13, TEX_PATCH_SIZE, TEX_PATCH_SIZE), device=device)\nmask = torch.ones((BATCH_SIZE, NUM_TRIANGLES), dtype=torch.bool, device=device)\nvn = torch.randn((BATCH_SIZE, NUM_TRIANGLES, 3, 3), device=device)\nc2w = torch.randn((BATCH_SIZE, NUM_VIEWS, 4, 4), device=device)\nfov = torch.randn((BATCH_SIZE, NUM_VIEWS, 1), device=device)\n\n# 执行渲染\nrendered_imgs = pipeline(\n    triangles=triangles,\n    texture=texture,\n    mask=mask,\n    vn=vn,\n    c2w=c2w,\n    fov=fov,\n    resolution=512,\n    torch_dtype=torch.float16,\n)\n\nprint(\"渲染完成。输出形状:\", rendered_imgs.shape)\n# 输出: torch.Size([2, 4, 512, 512, 3])\n```","某游戏工作室的美术团队需要在短时间内为一款新 RPG 生成大量包含复杂光影互动的过场动画预览，且场景中包含大量高反光材质和动态光源。\n\n### 没有 renderformer 时\n- **渲染耗时极长**：传统光线追踪算法计算全局光照（如多次镜面反射、柔和阴影）需要数小时甚至数天，严重拖慢迭代速度。\n- **调试成本高昂**：每次调整材质或灯光后，必须重新等待漫长的渲染过程才能查看效果，导致创意验证周期被拉长。\n- **硬件资源紧张**：为了加速渲染，团队不得不占用大量高性能 GPU 集群，挤占了其他项目的算力资源。\n- **泛化能力差**：若更换全新场景，往往需要针对特定场景重新训练神经网络或手动微调参数，无法即插即用。\n\n### 使用 renderformer 后\n- **实时级推理速度**：renderformer 基于 Transformer 架构直接将三角网格序列转换为像素，无需逐场景训练，能在秒级内输出含完整全局光照的图像。\n- **即时反馈循环**：美术师修改场景配置后，可立即通过推理脚本看到包含复杂漫反射间接照明和高光反射的最终效果，大幅提升创作效率。\n- **降低算力门槛**：由于不再依赖昂贵的物理模拟计算，普通工作站即可流畅运行，释放了集群资源用于最终成品渲染。\n- **零样本泛化能力**：无论是简单室内还是拥有多光源的复杂室外场景，renderformer 均能直接处理，无需任何额外的微调或预训练。\n\nrenderformer 通过将渲染重构为序列到序列的转换任务，彻底打破了高质量全局光照渲染在速度与灵活性上的传统瓶颈。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmicrosoft_renderformer_a384454e.png","microsoft","Microsoft","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fmicrosoft_4900709c.png","Open source projects and samples from Microsoft",null,"opensource@microsoft.com","OpenAtMicrosoft","https:\u002F\u002Fopensource.microsoft.com","https:\u002F\u002Fgithub.com\u002Fmicrosoft",[83,87],{"name":84,"color":85,"percentage":86},"Python","#3572A5",94.5,{"name":88,"color":89,"percentage":90},"Shell","#89e051",5.5,942,58,"2026-04-08T19:36:40","MIT","Linux, macOS, Windows","必需。支持 NVIDIA CUDA GPU 或 Apple Metal GPU。最低显存要求为 8GB。","未说明",{"notes":99,"python":100,"dependencies":101},"1. 若未安装 Flash Attention，脚本会自动回退到 torch scaled dot product attention，也可通过设置环境变量 ATTN_IMPL=sdpa 强制使用。2. 首次运行前需执行命令下载 freeimage 插件以支持 HDR 图像读写：`python3 -c \"import imageio; imageio.plugins.freeimage.download()\"`。3. 提供两个预训练模型版本：Base (205M 参数) 和 Large (483M 参数)，托管于 Hugging Face。4. 输出支持 HDR (.exr) 和 LDR (.png) 格式，可通过参数选择色调映射器。","未说明 (需满足 PyTorch 2.0+ 要求)",[102,103,104,105,106],"torch>=2.0","flash-attn (可选，仅 CUDA)","imageio","transformers","diffusers",[35,108,14],"其他",[110,111,112,113,114,115,116,117,118,64],"3d","3d-graphics","global-illumination","neural-rendering","pytorch","renderer","rendering","sequence-to-sequence","transformer","2026-03-27T02:49:30.150509","2026-04-12T05:23:26.994087",[122,127,132,137,142,146],{"id":123,"question_zh":124,"answer_zh":125,"source_url":126},30300,"为什么项目中缺少 LICENSE 文件，应该使用什么许可证？","该项目最初缺少 LICENSE 文件，这会影响用户的合规使用。Microsoft 项目通常使用标准的 MIT 许可证。用户可以参考 Microsoft 的仓库模板获取标准许可证文本，或访问相关文档了解开源许可证的更多信息。此类缺失通常会自动触发拉取请求来补充文件。","https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Frenderformer\u002Fissues\u002F1",{"id":128,"question_zh":129,"answer_zh":130,"source_url":131},30296,"训练期间光源的大小是如何初始化的？三角形的尺寸和顶点是如何生成的？","关于光源初始化的详细信息实际上已在 README.md 文件中说明。具体而言，光源（即具有漫反射发射的三角形）的数量在 1 到 8 个之间，强度在 2,500 到 5,000 w\u002Funit^2 之间均匀采样。它们的位置遵循与相机类似的程序，但距离在 2.1 到 2.7 个单位之间均匀采样。关于三角形具体尺寸和顶点生成的细节，请查阅项目 README 文件的第 320-325 行。","https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Frenderformer\u002Fissues\u002F10",{"id":133,"question_zh":134,"answer_zh":135,"source_url":136},30297,"是否可以提供原始的训练脚本或用于验证的最小样本数据集？","目前无法发布原始训练脚本或样本数据，因为代码依赖于几个尚未通过公司合规审查的内部模块。不过，论文中已完整描述了所有训练细节。数据结构与当前 `scene_processor\u002Fto_h5.py` 脚本生成的结构相同，唯一的区别是每个样本需要额外包含一个字段：通过 Blender 渲染的 HDR 地面真实图像（ground-truth image）。如果在复现结果时遇到实现问题，可以直接联系作者团队获取帮助。","https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Frenderformer\u002Fissues\u002F7",{"id":138,"question_zh":139,"answer_zh":140,"source_url":141},30298,"如何将非开源或非外部协作的仓库迁移到 Microsoft 内部的 GitHub (GHEC EMU)？","为了保护 Microsoft 的安全，不相关于开源项目或不需要与第三方协作的私有或内部仓库必须迁移到 GitHub inside Microsoft。拥有仓库 `admin` 权限的用户需在此 Issue 下评论以选择加入或退出：\n1. **选择加入迁移**：评论 `@gimsvc optin --date \u003C目标迁移日期 mm-dd-yyyy>`（例如：`@gimsvc optin --date 03-15-2023`）。\n2. **选择退出迁移**：评论 `@gimsvc optout --reason \u003C原因>`。原因选项包括：`staging`（即将开源或公开）、`collaboration`（用于外部协作）、`delete`（不再需要将被删除）或 `other`。若未响应，仓库可能会被自动归档。","https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Frenderformer\u002Fissues\u002F4",{"id":143,"question_zh":144,"answer_zh":145,"source_url":141},30299,"如果选择退出迁移到 Microsoft 内部 GitHub，后续会有什么影响？","如果您成功选择退出（opt-out）迁移，系统会确认您的操作。为了继续保护 Microsoft 的安全，您预计每 **120** 天会在该仓库中看到一个新的 Issue 被创建，要求您再次确认是选择迁移还是再次选择退出。您需要定期响应这些提醒以维持当前状态。",{"id":147,"question_zh":148,"answer_zh":149,"source_url":150},30301,"项目缺少哪些 Microsoft 项目必备的重要文件？","该仓库曾缺少 Microsoft 项目应具备的一些重要文件（如特定的模板或配置文件）。维护团队通常会通过开启拉取请求（Pull Request）来自动添加这些缺失的文件。一旦 PR 合并，相关问题会自动关闭。Microsoft 团队成员可参考内部开源指南了解更多关于此维护工作的细节。","https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Frenderformer\u002Fissues\u002F3",[]]