[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-aim-uofa--AdelaiDet":3,"tool-aim-uofa--AdelaiDet":62},[4,18,26,35,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",108322,2,"2026-04-10T11:39:34",[14,15,13],{"id":36,"name":37,"github_repo":38,"description_zh":39,"stars":40,"difficulty_score":32,"last_commit_at":41,"category_tags":42,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[43,13,15,14],"插件",{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":10,"last_commit_at":50,"category_tags":51,"status":17},4487,"LLMs-from-scratch","rasbt\u002FLLMs-from-scratch","LLMs-from-scratch 是一个基于 PyTorch 的开源教育项目，旨在引导用户从零开始一步步构建一个类似 ChatGPT 的大型语言模型（LLM）。它不仅是同名技术著作的官方代码库，更提供了一套完整的实践方案，涵盖模型开发、预训练及微调的全过程。\n\n该项目主要解决了大模型领域“黑盒化”的学习痛点。许多开发者虽能调用现成模型，却难以深入理解其内部架构与训练机制。通过亲手编写每一行核心代码，用户能够透彻掌握 Transformer 架构、注意力机制等关键原理，从而真正理解大模型是如何“思考”的。此外，项目还包含了加载大型预训练权重进行微调的代码，帮助用户将理论知识延伸至实际应用。\n\nLLMs-from-scratch 特别适合希望深入底层原理的 AI 开发者、研究人员以及计算机专业的学生。对于不满足于仅使用 API，而是渴望探究模型构建细节的技术人员而言，这是极佳的学习资源。其独特的技术亮点在于“循序渐进”的教学设计：将复杂的系统工程拆解为清晰的步骤，配合详细的图表与示例，让构建一个虽小但功能完备的大模型变得触手可及。无论你是想夯实理论基础，还是为未来研发更大规模的模型做准备",90106,"2026-04-06T11:19:32",[52,15,13,14],"语言模型",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":10,"last_commit_at":59,"category_tags":60,"status":17},4292,"Deep-Live-Cam","hacksider\u002FDeep-Live-Cam","Deep-Live-Cam 是一款专注于实时换脸与视频生成的开源工具，用户仅需一张静态照片，即可通过“一键操作”实现摄像头画面的即时变脸或制作深度伪造视频。它有效解决了传统换脸技术流程繁琐、对硬件配置要求极高以及难以实时预览的痛点，让高质量的数字内容创作变得触手可及。\n\n这款工具不仅适合开发者和技术研究人员探索算法边界，更因其极简的操作逻辑（仅需三步：选脸、选摄像头、启动），广泛适用于普通用户、内容创作者、设计师及直播主播。无论是为了动画角色定制、服装展示模特替换，还是制作趣味短视频和直播互动，Deep-Live-Cam 都能提供流畅的支持。\n\n其核心技术亮点在于强大的实时处理能力，支持口型遮罩（Mouth Mask）以保留使用者原始的嘴部动作，确保表情自然精准；同时具备“人脸映射”功能，可同时对画面中的多个主体应用不同面孔。此外，项目内置了严格的内容安全过滤机制，自动拦截涉及裸露、暴力等不当素材，并倡导用户在获得授权及明确标注的前提下合规使用，体现了技术发展与伦理责任的平衡。",88924,"2026-04-06T03:28:53",[14,15,13,61],"视频",{"id":63,"github_repo":64,"name":65,"description_en":66,"description_zh":67,"ai_summary_zh":67,"readme_en":68,"readme_zh":69,"quickstart_zh":70,"use_case_zh":71,"hero_image_url":72,"owner_login":73,"owner_name":74,"owner_avatar_url":75,"owner_bio":76,"owner_company":77,"owner_location":77,"owner_email":77,"owner_twitter":77,"owner_website":77,"owner_url":78,"languages":79,"stars":100,"forks":101,"last_commit_at":102,"license":103,"difficulty_score":104,"env_os":105,"env_gpu":106,"env_ram":107,"env_deps":108,"category_tags":117,"github_topics":118,"view_count":32,"oss_zip_url":77,"oss_zip_packed_at":77,"status":17,"created_at":134,"updated_at":135,"faqs":136,"releases":169},7642,"aim-uofa\u002FAdelaiDet","AdelaiDet","AdelaiDet is an open source toolbox for multiple instance-level detection and recognition tasks.","AdelaiDet 是一个基于 Detectron2 构建的开源工具箱，专为解决多种实例级检测与识别任务而设计。它有效整合了目标检测、实例分割、文本识别及关键点检测等复杂视觉任务，帮助开发者摆脱重复搭建基础架构的困扰，直接聚焦于算法优化与应用落地。\n\n这款工具非常适合计算机视觉领域的研究人员、算法工程师以及希望快速验证前沿模型的开发者使用。其核心亮点在于“一站式”集成了多个具有影响力的高性能算法，包括无需锚框的 FCOS、高效实时的 BlendMask 与 SOLOv2、面向场景文本理解的 ABCNet 系列，以及轻量级的 BoxInst 等。用户不仅可以复现这些先进的学术成果，还能直接调用在 COCO 等权威数据集上预训练好的高精度模型。此外，项目已将模型文件迁移至 Hugging Face 平台，确保了资源获取的稳定性与便捷性。无论是进行学术研究对比，还是构建工业级视觉应用，AdelaiDet 都提供了坚实且灵活的技术底座。","\u003Cdiv align=\"center\">\n    \u003Cimg src=\"docs\u002Fadel-logo.svg\" width=\"160\" >\n\u003C\u002Fdiv>\n\n#  AdelaiDet\n\nAs of Jan. 2024, the CloudStor server is dead. Model files are hosted on huggingface:\n\n- https:\u002F\u002Fhuggingface.co\u002FZjuCv\u002FAdelaiDet\u002Ftree\u002Fmain\n- https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-FCOS\u002Ftree\u002Fmain\n- https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-CondInst\u002Ftree\u002Fmain\n- https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-BoxInst\u002Ftree\u002Fmain\n\n\n*AdelaiDet* is an open source toolbox for multiple instance-level recognition tasks on top of [Detectron2](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fdetectron2).\nAll instance-level recognition works from our group are open-sourced here.\n\nTo date, AdelaiDet implements the following algorithms:\n\n* [FCOS](configs\u002FFCOS-Detection\u002FREADME.md)\n* [BlendMask](configs\u002FBlendMask\u002FREADME.md)\n* [MEInst](configs\u002FMEInst-InstanceSegmentation\u002FREADME.md)\n* [ABCNet](configs\u002FBAText\u002FREADME.md)\n* [ABCNetv2](configs\u002FBAText#quick-start-abcnetv2) \n* [CondInst](configs\u002FCondInst\u002FREADME.md)\n* [SOLO](https:\u002F\u002Farxiv.org\u002Fabs\u002F1912.04488) ([mmdet version](https:\u002F\u002Fgithub.com\u002FWXinlong\u002FSOLO))\n* [SOLOv2](configs\u002FSOLOv2\u002FREADME.md)\n* [BoxInst](configs\u002FBoxInst\u002FREADME.md) ([video demo](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=NuF8NAYf5L8))\n* [DenseCL](configs\u002FDenseCL\u002FREADME.md)\n* [FCPose](configs\u002FFCPose\u002FREADME.md)\n\n\n\n## Models\n### COCO Object Detecton Baselines with [FCOS](https:\u002F\u002Farxiv.org\u002Fabs\u002F1904.01355)\nName | inf. time | box AP | download\n--- |:---:|:---:|:---\n[FCOS_R_50_1x](configs\u002FFCOS-Detection\u002FR_50_1x.yaml) | 16 FPS | 38.7 | [model](https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-FCOS\u002Fresolve\u002Fmain\u002FFCOS_R_50_1x.pth?download=true)\n[FCOS_MS_R_101_2x](configs\u002FFCOS-Detection\u002FMS_R_101_2x.yaml) | 12 FPS | 43.1 | [model](https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-FCOS\u002Fresolve\u002Fmain\u002FFCOS_MS_R_101_2x.pth?download=true)\n[FCOS_MS_X_101_32x8d_2x](configs\u002FFCOS-Detection\u002FMS_X_101_32x8d_2x.yaml) | 6.6 FPS | 43.9 | [model](https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-FCOS\u002Fresolve\u002Fmain\u002FFCOS_MS_X_101_32x8d_2x.pth?download=true)\n[FCOS_MS_X_101_32x8d_dcnv2_2x](configs\u002FFCOS-Detection\u002FMS_X_101_32x8d_2x_dcnv2.yaml) | 4.6 FPS | 46.6 | [model](https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-FCOS\u002Fresolve\u002Fmain\u002FFCOS_MS_X_101_32x8d_dcnv2_2x.pth?download=true)\n[FCOS_RT_MS_DLA_34_4x_shtw](configs\u002FFCOS-Detection\u002FFCOS_RT\u002FMS_DLA_34_4x_syncbn_shared_towers.yaml) | 52 FPS | 39.1 | [model](https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-FCOS\u002Fresolve\u002Fmain\u002FFCOS_RT_MS_DLA_34_4x_syncbn_shared_towers.pth?download=true)\n\nMore models can be found in FCOS [README.md](configs\u002FFCOS-Detection\u002FREADME.md).\n\n### COCO Instance Segmentation Baselines with [BlendMask](https:\u002F\u002Farxiv.org\u002Fabs\u002F2001.00309)\n\nModel | Name |inf. time | box AP | mask AP | download\n--- |:---:|:---:|:---:|:---:|:---:\nMask R-CNN | [R_101_3x](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fdetectron2\u002Fblob\u002Fmaster\u002Fconfigs\u002FCOCO-InstanceSegmentation\u002Fmask_rcnn_R_101_FPN_3x.yaml) | 10 FPS | 42.9 | 38.6 |\nBlendMask | [R_101_3x](configs\u002FBlendMask\u002FR_101_3x.yaml) | 11 FPS | 44.8 | 39.5 | [model](https:\u002F\u002Fhuggingface.co\u002FZjuCv\u002FAdelaiDet\u002Fblob\u002Fmain\u002FR_101_3x.pth)\nBlendMask | [R_101_dcni3_5x](configs\u002FBlendMask\u002FR_101_dcni3_5x.yaml) | 10 FPS | 46.8 | 41.1 | [model](https:\u002F\u002Fhuggingface.co\u002FZjuCv\u002FAdelaiDet\u002Fblob\u002Fmain\u002FR_101_dcni3_5x.pth)\n\nFor more models and information, please refer to BlendMask [README.md](configs\u002FBlendMask\u002FREADME.md).\n\n### COCO Instance Segmentation Baselines with [MEInst](https:\u002F\u002Farxiv.org\u002Fabs\u002F2003.11712)\n\nName | inf. time | box AP | mask AP | download\n--- |:---:|:---:|:---:|:---:\n[MEInst_R_50_3x](https:\u002F\u002Fgithub.com\u002Faim-uofa\u002FAdelaiDet\u002Fconfigs\u002FMEInst-InstanceSegmentation\u002FMEInst_R_50_3x.yaml) | 12 FPS | 43.6 | 34.5 | [model](https:\u002F\u002Fhuggingface.co\u002FZjuCv\u002FAdelaiDet\u002Fblob\u002Fmain\u002FMEInst_R_50_3x.pth)\n\nFor more models and information, please refer to MEInst [README.md](configs\u002FMEInst-InstanceSegmentation\u002FREADME.md).\n\n### Total_Text results with [ABCNet](configs\u002FBAText\u002FREADME.md)\n\nName | inf. time | e2e-hmean | det-hmean | download\n---  |:---------:|:---------:|:---------:|:---:\n[v1-totaltext](configs\u002FBAText\u002FTotalText\u002Fattn_R_50.yaml) | 11 FPS | 67.1 | 86.0 | [model](https:\u002F\u002Fhuggingface.co\u002FZjuCv\u002FAdelaiDet\u002Fblob\u002Fmain\u002Ftt_e2e_attn_R_50.pth)\n[v2-totaltext](configs\u002FBAText\u002FTotalText\u002Fv2_attn_R_50.yaml) | 7.7 FPS | 71.8 | 87.2 | [model](https:\u002F\u002Fhuggingface.co\u002FZjuCv\u002FAdelaiDet\u002Fblob\u002Fmain\u002Fmodel_v2_totaltext.pth)\n\nFor more models and information, please refer to ABCNet [README.md](configs\u002FBAText\u002FREADME.md).\n\n### COCO Instance Segmentation Baselines with [CondInst](https:\u002F\u002Farxiv.org\u002Fabs\u002F2003.05664)\n\nName | inf. time | box AP | mask AP | download\n--- |:---:|:---:|:---:|:---:\n[CondInst_MS_R_50_1x](configs\u002FCondInst\u002FMS_R_50_1x.yaml) | 14 FPS | 39.7 | 35.7 | [model](https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-CondInst\u002Fresolve\u002Fmain\u002FCondInst_MS_R_50_1x.pth?download=true)\n[CondInst_MS_R_50_BiFPN_3x_sem](configs\u002FCondInst\u002FMS_R_50_BiFPN_3x_sem.yaml) | 13 FPS | 44.7 | 39.4 | [model](https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-CondInst\u002Fresolve\u002Fmain\u002FCondInst_MS_R_50_BiFPN_3x_sem.pth?download=true)\n[CondInst_MS_R_101_3x](configs\u002FCondInst\u002FMS_R_101_3x.yaml) | 11 FPS | 43.3 | 38.6 | [model](https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-CondInst\u002Fresolve\u002Fmain\u002FCondInst_MS_R_101_3x.pth?download=true)\n[CondInst_MS_R_101_BiFPN_3x_sem](configs\u002FCondInst\u002FMS_R_101_BiFPN_3x_sem.yaml) | 10 FPS | 45.7 | 40.2 | [model](https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-CondInst\u002Fresolve\u002Fmain\u002FCondInst_R_101_BiFPN_3x_sem.pth?download=true)\n\nFor more models and information, please refer to CondInst [README.md](configs\u002FCondInst\u002FREADME.md).\n\nNote that:\n- Inference time for all projects is measured on a NVIDIA 1080Ti with batch size 1.\n- APs are evaluated on COCO2017 val split unless specified.\n\n\n## Installation\n\nFirst install Detectron2 following the official guide: [INSTALL.md](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fdetectron2\u002Fblob\u002Fmaster\u002FINSTALL.md).\n\n*Please use Detectron2 with commit id [9eb4831](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fdetectron2\u002Fcommit\u002F9eb4831f742ae6a13b8edb61d07b619392fb6543) if you have any issues related to Detectron2.*\n\nThen build AdelaiDet with:\n\n```\ngit clone https:\u002F\u002Fgithub.com\u002Faim-uofa\u002FAdelaiDet.git\ncd AdelaiDet\npython setup.py build develop\n```\n\nIf you are using docker, a pre-built image can be pulled with:\n\n```\ndocker pull tianzhi0549\u002Fadet:latest\n```\n\nSome projects may require special setup, please follow their own `README.md` in [configs](configs).\n\n## Quick Start\n\n### Inference with Pre-trained Models\n\n1. Pick a model and its config file, for example, `fcos_R_50_1x.yaml`.\n2. Download the model `wget https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-FCOS\u002Fresolve\u002Fmain\u002FFCOS_R_50_1x.pth?download=true -O fcos_R_50_1x.pth`\n3. Run the demo with\n```\npython demo\u002Fdemo.py \\\n    --config-file configs\u002FFCOS-Detection\u002FR_50_1x.yaml \\\n    --input input1.jpg input2.jpg \\\n    --opts MODEL.WEIGHTS fcos_R_50_1x.pth\n```\n\n### Train Your Own Models\n\nTo train a model with \"train_net.py\", first\nsetup the corresponding datasets following\n[datasets\u002FREADME.md](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fdetectron2\u002Fblob\u002Fmaster\u002Fdatasets\u002FREADME.md),\nthen run:\n\n```\nOMP_NUM_THREADS=1 python tools\u002Ftrain_net.py \\\n    --config-file configs\u002FFCOS-Detection\u002FR_50_1x.yaml \\\n    --num-gpus 8 \\\n    OUTPUT_DIR training_dir\u002Ffcos_R_50_1x\n```\nTo evaluate the model after training, run:\n\n```\nOMP_NUM_THREADS=1 python tools\u002Ftrain_net.py \\\n    --config-file configs\u002FFCOS-Detection\u002FR_50_1x.yaml \\\n    --eval-only \\\n    --num-gpus 8 \\\n    OUTPUT_DIR training_dir\u002Ffcos_R_50_1x \\\n    MODEL.WEIGHTS training_dir\u002Ffcos_R_50_1x\u002Fmodel_final.pth\n```\nNote that:\n- The configs are made for 8-GPU training. To train on another number of GPUs, change the `--num-gpus`.\n- If you want to measure the inference time, please change `--num-gpus` to 1.\n- We set `OMP_NUM_THREADS=1` by default, which achieves the best speed on our machines, please change it as needed.\n- This quick start is made for FCOS. If you are using other projects, please check the projects' own `README.md` in [configs](configs). \n\n\n## Acknowledgements\n\nThe authors are grateful to\nNvidia, Huawei Noah's Ark Lab, ByteDance, Adobe who generously donated GPU computing in the past a few years.\n\n## Citing AdelaiDet\n\nIf you use this toolbox in your research or wish to refer to the baseline results published here, please use the following BibTeX entries:\n\n```BibTeX\n\n@misc{tian2019adelaidet,\n  author =       {Tian, Zhi and Chen, Hao and Wang, Xinlong and Liu, Yuliang and Shen, Chunhua},\n  title =        {{AdelaiDet}: A Toolbox for Instance-level Recognition Tasks},\n  howpublished = {\\url{https:\u002F\u002Fgit.io\u002Fadelaidet}},\n  year =         {2019}\n}\n```\nand relevant publications:\n```BibTeX\n\n@inproceedings{tian2019fcos,\n  title     =  {{FCOS}: Fully Convolutional One-Stage Object Detection},\n  author    =  {Tian, Zhi and Shen, Chunhua and Chen, Hao and He, Tong},\n  booktitle =  {Proc. Int. Conf. Computer Vision (ICCV)},\n  year      =  {2019}\n}\n\n@article{tian2021fcos,\n  title   =  {{FCOS}: A Simple and Strong Anchor-free Object Detector},\n  author  =  {Tian, Zhi and Shen, Chunhua and Chen, Hao and He, Tong},\n  journal =  {IEEE T. Pattern Analysis and Machine Intelligence (TPAMI)},\n  year    =  {2021}\n}\n\n@inproceedings{chen2020blendmask,\n  title     =  {{BlendMask}: Top-Down Meets Bottom-Up for Instance Segmentation},\n  author    =  {Chen, Hao and Sun, Kunyang and Tian, Zhi and Shen, Chunhua and Huang, Yongming and Yan, Youliang},\n  booktitle =  {Proc. IEEE Conf. Computer Vision and Pattern Recognition (CVPR)},\n  year      =  {2020}\n}\n\n@inproceedings{zhang2020MEInst,\n  title     =  {Mask Encoding for Single Shot Instance Segmentation},\n  author    =  {Zhang, Rufeng and Tian, Zhi and Shen, Chunhua and You, Mingyu and Yan, Youliang},\n  booktitle =  {Proc. IEEE Conf. Computer Vision and Pattern Recognition (CVPR)},\n  year      =  {2020}\n}\n\n@inproceedings{liu2020abcnet,\n  title     =  {{ABCNet}: Real-time Scene Text Spotting with Adaptive {B}ezier-Curve Network},\n  author    =  {Liu, Yuliang and Chen, Hao and Shen, Chunhua and He, Tong and Jin, Lianwen and Wang, Liangwei},\n  booktitle =  {Proc. IEEE Conf. Computer Vision and Pattern Recognition (CVPR)},\n  year      =  {2020}\n}\n\n@ARTICLE{9525302,\n  author={Liu, Yuliang and Shen, Chunhua and Jin, Lianwen and He, Tong and Chen, Peng and Liu, Chongyu and Chen, Hao},\n  journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, \n  title={ABCNet v2: Adaptive Bezier-Curve Network for Real-time End-to-end Text Spotting}, \n  year={2021},\n  volume={},\n  number={},\n  pages={1-1},\n  doi={10.1109\u002FTPAMI.2021.3107437}\n}\n\n@inproceedings{wang2020solo,\n  title     =  {{SOLO}: Segmenting Objects by Locations},\n  author    =  {Wang, Xinlong and Kong, Tao and Shen, Chunhua and Jiang, Yuning and Li, Lei},\n  booktitle =  {Proc. Eur. Conf. Computer Vision (ECCV)},\n  year      =  {2020}\n}\n\n@inproceedings{wang2020solov2,\n  title     =  {{SOLOv2}: Dynamic and Fast Instance Segmentation},\n  author    =  {Wang, Xinlong and Zhang, Rufeng and Kong, Tao and Li, Lei and Shen, Chunhua},\n  booktitle =  {Proc. Advances in Neural Information Processing Systems (NeurIPS)},\n  year      =  {2020}\n}\n\n@article{wang2021solo,\n  title   =  {{SOLO}: A Simple Framework for Instance Segmentation},\n  author  =  {Wang, Xinlong and Zhang, Rufeng and Shen, Chunhua and Kong, Tao and Li, Lei},\n  journal =  {IEEE T. Pattern Analysis and Machine Intelligence (TPAMI)},\n  year    =  {2021}\n}\n\n@article{tian2019directpose,\n  title   =  {{DirectPose}: Direct End-to-End Multi-Person Pose Estimation},\n  author  =  {Tian, Zhi and Chen, Hao and Shen, Chunhua},\n  journal =  {arXiv preprint arXiv:1911.07451},\n  year    =  {2019}\n}\n\n@inproceedings{tian2020conditional,\n  title     =  {Conditional Convolutions for Instance Segmentation},\n  author    =  {Tian, Zhi and Shen, Chunhua and Chen, Hao},\n  booktitle =  {Proc. Eur. Conf. Computer Vision (ECCV)},\n  year      =  {2020}\n}\n\n@article{CondInst2022Tian,\n  title   = {Instance and Panoptic Segmentation Using Conditional Convolutions},\n  author  = {Tian, Zhi and Zhang, Bowen and Chen, Hao and Shen, Chunhua},\n  journal = {IEEE T. Pattern Analysis and Machine Intelligence (TPAMI)},\n  year    = {2022}\n}\n\n@inproceedings{tian2021boxinst,\n  title     =  {{BoxInst}: High-Performance Instance Segmentation with Box Annotations},\n  author    =  {Tian, Zhi and Shen, Chunhua and Wang, Xinlong and Chen, Hao},\n  booktitle =  {Proc. IEEE Conf. Computer Vision and Pattern Recognition (CVPR)},\n  year      =  {2021}\n}\n\n@inproceedings{wang2021densecl,\n  title     =   {Dense Contrastive Learning for Self-Supervised Visual Pre-Training},\n  author    =   {Wang, Xinlong and Zhang, Rufeng and Shen, Chunhua and Kong, Tao and Li, Lei},\n  booktitle =   {Proc. IEEE Conf. Computer Vision and Pattern Recognition (CVPR)},\n  year      =   {2021}\n}\n\n@inproceedings{Mao2021pose,\n  title     =   {{FCPose}: Fully Convolutional Multi-Person Pose Estimation With Dynamic Instance-Aware Convolutions},\n  author    =   {Mao, Weian and  Tian, Zhi  and Wang, Xinlong  and Shen, Chunhua},\n  booktitle =   {Proc. IEEE Conf. Computer Vision and Pattern Recognition (CVPR)},\n  year      =   {2021}\n}\n```\n\n## License\n\nFor academic use, this project is licensed under the 2-clause BSD License - see the LICENSE file for details. For commercial use, please contact [Chunhua Shen](mailto:chhshen@gmail.com).\n","\u003Cdiv align=\"center\">\n    \u003Cimg src=\"docs\u002Fadel-logo.svg\" width=\"160\" >\n\u003C\u002Fdiv>\n\n#  AdelaiDet\n\n截至2024年1月，CloudStor服务器已停止服务。模型文件现托管在Hugging Face上：\n\n- https:\u002F\u002Fhuggingface.co\u002FZjuCv\u002FAdelaiDet\u002Ftree\u002Fmain\n- https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-FCOS\u002Ftree\u002Fmain\n- https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-CondInst\u002Ftree\u002Fmain\n- https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-BoxInst\u002Ftree\u002Fmain\n\n\n*AdelaiDet* 是一个基于 [Detectron2](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fdetectron2) 的开源工具箱，用于多种实例级识别任务。我们团队的所有实例级识别相关工作均在此开源。\n\n截至目前，AdelaiDet 已实现以下算法：\n\n* [FCOS](configs\u002FFCOS-Detection\u002FREADME.md)\n* [BlendMask](configs\u002FBlendMask\u002FREADME.md)\n* [MEInst](configs\u002FMEInst-InstanceSegmentation\u002FREADME.md)\n* [ABCNet](configs\u002FBAText\u002FREADME.md)\n* [ABCNetv2](configs\u002FBAText#quick-start-abcnetv2) \n* [CondInst](configs\u002FCondInst\u002FREADME.md)\n* [SOLO](https:\u002F\u002Farxiv.org\u002Fabs\u002F1912.04488) ([mmdet版本](https:\u002F\u002Fgithub.com\u002FWXinlong\u002FSOLO))\n* [SOLOv2](configs\u002FSOLOv2\u002FREADME.md)\n* [BoxInst](configs\u002FBoxInst\u002FREADME.md) ([视频演示](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=NuF8NAYf5L8))\n* [DenseCL](configs\u002FDenseCL\u002FREADME.md)\n* [FCPose](configs\u002FFCPose\u002FREADME.md)\n\n\n\n## 模型\n### COCO目标检测基准，使用[FCOS](https:\u002F\u002Farxiv.org\u002Fabs\u002F1904.01355)\n名称 | 推理时间 | box AP | 下载\n--- |:---:|:---:|:---\n[FCOS_R_50_1x](configs\u002FFCOS-Detection\u002FR_50_1x.yaml) | 16 FPS | 38.7 | [模型](https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-FCOS\u002Fresolve\u002Fmain\u002FFCOS_R_50_1x.pth?download=true)\n[FCOS_MS_R_101_2x](configs\u002FFCOS-Detection\u002FMS_R_101_2x.yaml) | 12 FPS | 43.1 | [模型](https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-FCOS\u002Fresolve\u002Fmain\u002FFCOS_MS_R_101_2x.pth?download=true)\n[FCOS_MS_X_101_32x8d_2x](configs\u002FFCOS-Detection\u002FMS_X_101_32x8d_2x.yaml) | 6.6 FPS | 43.9 | [模型](https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-FCOS\u002Fresolve\u002Fmain\u002FFCOS_MS_X_101_32x8d_2x.pth?download=true)\n[FCOS_MS_X_101_32x8d_dcnv2_2x](configs\u002FFCOS-Detection\u002FMS_X_101_32x8d_2x_dcnv2.yaml) | 4.6 FPS | 46.6 | [模型](https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-FCOS\u002Fresolve\u002Fmain\u002FFCOS_MS_X_101_32x8d_dcnv2_2x.pth?download=true)\n[FCOS_RT_MS_DLA_34_4x_shtw](configs\u002FFCOS-Detection\u002FFCOS_RT\u002FMS_DLA_34_4x_syncbn_shared_towers.yaml) | 52 FPS | 39.1 | [模型](https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-FCOS\u002Fresolve\u002Fmain\u002FFCOS_RT_MS_DLA_34_4x_syncbn_shared_towers.pth?download=true)\n\n更多模型请参见 FCOS 的 [README.md](configs\u002FFCOS-Detection\u002FREADME.md)。\n\n### COCO实例分割基准，使用[BlendMask](https:\u002F\u002Farxiv.org\u002Fabs\u002F2001.00309)\n\n模型 | 名称 | 推理时间 | box AP | mask AP | 下载\n--- |:---:|:---:|:---:|:---:|:---:\nMask R-CNN | [R_101_3x](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fdetectron2\u002Fblob\u002Fmaster\u002Fconfigs\u002FCOCO-InstanceSegmentation\u002Fmask_rcnn_R_101_FPN_3x.yaml) | 10 FPS | 42.9 | 38.6 |\nBlendMask | [R_101_3x](configs\u002FBlendMask\u002FR_101_3x.yaml) | 11 FPS | 44.8 | 39.5 | [模型](https:\u002F\u002Fhuggingface.co\u002FZjuCv\u002FAdelaiDet\u002Fblob\u002Fmain\u002FR_101_3x.pth)\nBlendMask | [R_101_dcni3_5x](configs\u002FBlendMask\u002FR_101_dcni3_5x.yaml) | 10 FPS | 46.8 | 41.1 | [模型](https:\u002F\u002Fhuggingface.co\u002FZjuCv\u002FAdelaiDet\u002Fblob\u002Fmain\u002FR_101_dcni3_5x.pth)\n\n更多模型和信息，请参考 BlendMask 的 [README.md](configs\u002FBlendMask\u002FREADME.md)。\n\n### COCO实例分割基准，使用[MEInst](https:\u002F\u002Farxiv.org\u002Fabs\u002F2003.11712)\n\n名称 | 推理时间 | box AP | mask AP | 下载\n--- |:---:|:---:|:---:|:---:\n[MEInst_R_50_3x](https:\u002F\u002Fgithub.com\u002Faim-uofa\u002FAdelaiDet\u002Fconfigs\u002FMEInst-InstanceSegmentation\u002FMEInst_R_50_3x.yaml) | 12 FPS | 43.6 | 34.5 | [模型](https:\u002F\u002Fhuggingface.co\u002FZjuCv\u002FAdelaiDet\u002Fblob\u002Fmain\u002FMEInst_R_50_3x.pth)\n\n更多模型和信息，请参考 MEInst 的 [README.md](configs\u002FMEInst-InstanceSegmentation\u002FREADME.md)。\n\n### Total_Text结果，使用[ABCNet](configs\u002FBAText\u002FREADME.md)\n\n名称 | 推理时间 | e2e-hmean | det-hmean | 下载\n---  |:---------:|:---------:|:---------:|:---:\n[v1-totaltext](configs\u002FBAText\u002FTotalText\u002Fattn_R_50.yaml) | 11 FPS | 67.1 | 86.0 | [模型](https:\u002F\u002Fhuggingface.co\u002FZjuCv\u002FAdelaiDet\u002Fblob\u002Fmain\u002Ftt_e2e_attn_R_50.pth)\n[v2-totaltext](configs\u002FBAText\u002FTotalText\u002Fv2_attn_R_50.yaml) | 7.7 FPS | 71.8 | 87.2 | [模型](https:\u002F\u002Fhuggingface.co\u002FZjuCv\u002FAdelaiDet\u002Fblob\u002Fmain\u002Fmodel_v2_totaltext.pth)\n\n更多模型和信息，请参考 ABCNet 的 [README.md](configs\u002FBAText\u002FREADME.md)。\n\n### COCO实例分割基准，使用[CondInst](https:\u002F\u002Farxiv.org\u002Fabs\u002F2003.05664)\n\n名称 | 推理时间 | box AP | mask AP | 下载\n--- |:---:|:---:|:---:|:---:\n[CondInst_MS_R_50_1x](configs\u002FCondInst\u002FMS_R_50_1x.yaml) | 14 FPS | 39.7 | 35.7 | [模型](https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-CondInst\u002Fresolve\u002Fmain\u002FCondInst_MS_R_50_1x.pth?download=true)\n[CondInst_MS_R_50_BiFPN_3x_sem](configs\u002FCondInst\u002FMS_R_50_BiFPN_3x_sem.yaml) | 13 FPS | 44.7 | 39.4 | [模型](https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-CondInst\u002Fresolve\u002Fmain\u002FCondInst_MS_R_50_BiFPN_3x_sem.pth?download=true)\n[CondInst_MS_R_101_3x](configs\u002FCondInst\u002FMS_R_101_3x.yaml) | 11 FPS | 43.3 | 38.6 | [模型](https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-CondInst\u002Fresolve\u002Fmain\u002FCondInst_MS_R_101_3x.pth?download=true)\n[CondInst_MS_R_101_BiFPN_3x_sem](configs\u002FCondInst\u002FMS_R_101_BiFPN_3x_sem.yaml) | 10 FPS | 45.7 | 40.2 | [模型](https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-CondInst\u002Fresolve\u002Fmain\u002FCondInst_R_101_BiFPN_3x_sem.pth?download=true)\n\n更多模型和信息，请参考 CondInst 的 [README.md](configs\u002FCondInst\u002FREADME.md)。\n\n请注意：\n- 所有项目的推理时间均在 NVIDIA 1080Ti 上以批大小为1进行测量。\n- AP 均在 COCO2017 验证集上评估，除非另有说明。\n\n\n## 安装\n\n首先按照官方指南安装 Detectron2：[INSTALL.md](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fdetectron2\u002Fblob\u002Fmaster\u002FINSTALL.md)。\n\n*如果您遇到与 Detectron2 相关的问题，请务必使用 commit id 为 [9eb4831](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fdetectron2\u002Fcommit\u002F9eb4831f742ae6a13b8edb61d07b619392fb6543) 的 Detectron2 版本。*\n\n然后通过以下步骤构建 AdelaiDet：\n\n```\ngit clone https:\u002F\u002Fgithub.com\u002Faim-uofa\u002FAdelaiDet.git\ncd AdelaiDet\npython setup.py build develop\n```\n\n如果您使用 Docker，可以拉取预构建的镜像：\n\n```\ndocker pull tianzhi0549\u002Fadet:latest\n```\n\n部分项目可能需要特殊设置，请参考各自在 [configs](configs) 中的 `README.md`。\n\n## 快速入门\n\n### 使用预训练模型进行推理\n\n1. 选择一个模型及其配置文件，例如 `fcos_R_50_1x.yaml`。\n2. 下载模型：`wget https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-FCOS\u002Fresolve\u002Fmain\u002FFCOS_R_50_1x.pth?download=true -O fcos_R_50_1x.pth`\n3. 运行示例程序：\n```\npython demo\u002Fdemo.py \\\n    --config-file configs\u002FFCOS-Detection\u002FR_50_1x.yaml \\\n    --input input1.jpg input2.jpg \\\n    --opts MODEL.WEIGHTS fcos_R_50_1x.pth\n```\n\n### 训练您自己的模型\n\n要使用 `train_net.py` 训练模型，首先请按照 [datasets\u002FREADME.md](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fdetectron2\u002Fblob\u002Fmaster\u002Fdatasets\u002FREADME.md) 中的说明设置相应的数据集，然后运行以下命令：\n\n```\nOMP_NUM_THREADS=1 python tools\u002Ftrain_net.py \\\n    --config-file configs\u002FFCOS-Detection\u002FR_50_1x.yaml \\\n    --num-gpus 8 \\\n    OUTPUT_DIR training_dir\u002Ffcos_R_50_1x\n```\n\n训练完成后，要评估模型，请运行以下命令：\n\n```\nOMP_NUM_THREADS=1 python tools\u002Ftrain_net.py \\\n    --config-file configs\u002FFCOS-Detection\u002FR_50_1x.yaml \\\n    --eval-only \\\n    --num-gpus 8 \\\n    OUTPUT_DIR training_dir\u002Ffcos_R_50_1x \\\n    MODEL.WEIGHTS training_dir\u002Ffcos_R_50_1x\u002Fmodel_final.pth\n```\n\n请注意：\n- 配置文件是为 8 卡训练设计的。如果要在不同数量的 GPU 上训练，请修改 `--num-gpus` 参数。\n- 如果需要测量推理时间，请将 `--num-gpus` 改为 1。\n- 默认设置了 `OMP_NUM_THREADS=1`，这在我们的机器上能获得最佳速度，您可以根据需要进行调整。\n- 此快速入门指南适用于 FCOS。如果您使用的是其他项目，请参阅 [configs](configs) 目录下的相应项目的 `README.md` 文件。\n\n\n## 致谢\n\n作者衷心感谢英伟达、华为诺亚方舟实验室、字节跳动和 Adobe，在过去几年中慷慨捐赠了 GPU 算力。\n\n## 引用 AdelaiDet\n\n如果您在研究中使用本工具箱，或希望引用此处发布的基线结果，请使用以下 BibTeX 条目：\n\n```BibTeX\n\n@misc{tian2019adelaidet,\n  author =       {田植、陈浩、王新龙、刘宇亮、沈春华},\n  title =        {{AdelaiDet}: 用于实例级识别任务的工具箱},\n  howpublished = {\\url{https:\u002F\u002Fgit.io\u002Fadelaidet}},\n  year =         {2019}\n}\n```\n以及相关论文：\n```BibTeX\n\n@inproceedings{tian2019fcos,\n  title     =  {{FCOS}: 全卷积单阶段目标检测},\n  author    =  {田植、沈春华、陈浩、何通},\n  booktitle =  {国际计算机视觉会议（ICCV）论文集},\n  year      =  {2019}\n}\n\n@article{tian2021fcos,\n  title   =  {{FCOS}: 一种简单而强大的无锚框目标检测器},\n  author  =  {田植、沈春华、陈浩、何通},\n  journal =  {IEEE 模式分析与机器智能汇刊（TPAMI）},\n  year    =  {2021}\n}\n\n@inproceedings{chen2020blendmask,\n  title     =  {{BlendMask}: 自顶向下结合自底向上实现实例分割},\n  author    =  {陈浩、孙坤阳、田植、沈春华、黄永明、严友良},\n  booktitle =  {IEEE 计算机视觉与模式识别会议（CVPR）论文集},\n  year      =  {2020}\n}\n\n@inproceedings{zhang2020MEInst,\n  title     =  {用于单次实例分割的掩码编码},\n  author    =  {张汝峰、田植、沈春华、尤明宇、严友良},\n  booktitle =  {IEEE 计算机视觉与模式识别会议（CVPR）论文集},\n  year      =  {2020}\n}\n\n@inproceedings{liu2020abcnet,\n  title     =  {{ABCNet}: 基于自适应贝塞尔曲线网络的实时场景文本定位},\n  author    =  {刘宇亮、陈浩、沈春华、何通、金连文、王良伟},\n  booktitle =  {IEEE 计算机视觉与模式识别会议（CVPR）论文集},\n  year      =  {2020}\n}\n\n@ARTICLE{9525302,\n  author={刘宇亮、沈春华、金连文、何通、陈鹏、刘崇宇、陈浩},\n  journal={IEEE 模式分析与机器智能汇刊}, \n  title={ABCNet v2: 用于实时端到端文本定位的自适应贝塞尔曲线网络}, \n  year={2021},\n  volume={},\n  number={},\n  pages={1-1},\n  doi={10.1109\u002FTPAMI.2021.3107437}\n}\n\n@inproceedings{wang2020solo,\n  title     =  {{SOLO}: 基于位置的物体分割},\n  author    =  {王新龙、孔涛、沈春华、蒋宇宁、李雷},\n  booktitle =  {欧洲计算机视觉会议（ECCV）论文集},\n  year      =  {2020}\n}\n\n@inproceedings{wang2020solov2,\n  title     =  {{SOLOv2}: 动态且快速的实例分割},\n  author    =  {王新龙、张汝峰、孔涛、李雷、沈春华},\n  booktitle =  {神经信息处理系统进展会议（NeurIPS）论文集},\n  year      =  {2020}\n}\n\n@article{wang2021solo,\n  title   =  {{SOLO}: 一种简单的实例分割框架},\n  author  =  {王新龙、张汝峰、沈春华、孔涛、李雷},\n  journal =  {IEEE 模式分析与机器智能汇刊（TPAMI）},\n  year    =  {2021}\n}\n\n@article{tian2019directpose,\n  title   =  {{DirectPose}: 直接端到端多人姿态估计},\n  author  =  {田植、陈浩、沈春华},\n  journal =  {arXiv 预印本 arXiv:1911.07451},\n  year    =  {2019}\n}\n\n@inproceedings{tian2020conditional,\n  title     =  {用于实例分割的条件卷积},\n  author    =  {田植、沈春华、陈浩},\n  booktitle =  {欧洲计算机视觉会议（ECCV）论文集},\n  year      =  {2020}\n}\n\n@article{CondInst2022Tian,\n  title   = {利用条件卷积进行实例分割和全景分割},\n  author  = {田植、张博文、陈浩、沈春华},\n  journal = {IEEE 模式分析与机器智能汇刊（TPAMI）},\n  year    =  {2022}\n}\n\n@inproceedings{tian2021boxinst,\n  title     =  {{BoxInst}: 基于边界框标注的高性能实例分割},\n  author    =  {田植、沈春华、王新龙、陈浩},\n  booktitle =  {IEEE 计算机视觉与模式识别会议（CVPR）论文集},\n  year      =  {2021}\n}\n\n@inproceedings{wang2021densecl,\n  title     =  {用于自监督视觉预训练的密集对比学习},\n  author    =  {王新龙、张汝峰、沈春华、孔涛、李雷},\n  booktitle =  {IEEE 计算机视觉与模式识别会议（CVPR）论文集},\n  year      =  {2021}\n}\n\n@inproceedings{Mao2021pose,\n  title     =  {{FCPose}: 全卷积多人姿态估计，采用动态实例感知卷积},\n  author    =  {毛伟安、田植、王新龙、沈春华},\n  booktitle =  {IEEE 计算机视觉与模式识别会议（CVPR）论文集},\n  year      =  {2021}\n}\n```\n\n## 许可证\n\n本项目面向学术用途，采用两条款 BSD 许可证——详情请参阅 LICENSE 文件。如需商业用途，请联系 [Chunhua Shen](mailto:chhshen@gmail.com)。","# AdelaiDet 快速上手指南\n\nAdelaiDet 是一个基于 [Detectron2](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fdetectron2) 的开源工具箱，专注于多种实例级识别任务（如目标检测、实例分割、文本检测等）。本项目集成了 FCOS、BlendMask、SOLOv2、ABCNet 等前沿算法。\n\n> **注意**：截至 2024 年 1 月，原 CloudStor 服务器已停止服务，所有模型文件已迁移至 Hugging Face。\n\n## 1. 环境准备\n\n在开始之前，请确保满足以下系统要求和依赖：\n\n*   **操作系统**: Linux (推荐 Ubuntu 18.04+)\n*   **Python**: 3.6 - 3.9\n*   **PyTorch**: 1.7+ (需与 CUDA 版本匹配)\n*   **CUDA**: 建议 10.2 或 11.0+\n*   **前置依赖**: 必须先安装 **Detectron2**。\n\n### 安装 Detectron2\n请严格按照 Detectron2 官方指南进行安装。为避免兼容性问题，**强烈建议使用特定 commit 版本**：\n\n```bash\n# 克隆 detectron2 并切换到指定版本\ngit clone https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fdetectron2.git\ncd detectron2\ngit checkout 9eb4831f742ae6a13b8edb61d07b619392fb6543\n\n# 安装 (请根据你的 CUDA 版本调整 cu 参数，例如 cu111)\npip install -e 'git+https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fdetectron2.git@9eb4831f742ae6a13b8edb61d07b619392fb6543#egg=detectron2'\n```\n*如果在国内安装缓慢，可配置 pip 国内镜像源（如清华源）：*\n```bash\npip install -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple -e 'git+https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fdetectron2.git@9eb4831f742ae6a13b8edb61d07b619392fb6543#egg=detectron2'\n```\n\n## 2. 安装步骤\n\n安装好 Detectron2 后，即可克隆并编译 AdelaiDet：\n\n```bash\n# 克隆项目\ngit clone https:\u002F\u002Fgithub.com\u002Faim-uofa\u002FAdelaiDet.git\ncd AdelaiDet\n\n# 编译安装\npython setup.py build develop\n```\n\n### Docker 用户（可选）\n如果你偏好使用 Docker，可以直接拉取预构建的镜像，免去环境配置烦恼：\n\n```bash\ndocker pull tianzhi0549\u002Fadet:latest\n```\n\n## 3. 基本使用\n\n### 3.1 使用预训练模型进行推理\n\n以下以 **FCOS** 目标检测模型为例，演示如何对图片进行推理。\n\n**第一步：下载模型**\n模型现已托管在 Hugging Face。你可以使用 `wget` 下载（国内用户若访问 HF 慢，可尝试通过镜像站或代理下载）：\n\n```bash\nwget https:\u002F\u002Fhuggingface.co\u002Ftianzhi\u002FAdelaiDet-FCOS\u002Fresolve\u002Fmain\u002FFCOS_R_50_1x.pth?download=true -O fcos_R_50_1x.pth\n```\n\n**第二步：运行推理**\n准备好输入图片（如 `input1.jpg`），运行以下命令：\n\n```bash\npython demo\u002Fdemo.py \\\n    --config-file configs\u002FFCOS-Detection\u002FR_50_1x.yaml \\\n    --input input1.jpg input2.jpg \\\n    --opts MODEL.WEIGHTS fcos_R_50_1x.pth\n```\n\n*注：其他算法（如 BlendMask, SOLOv2, ABCNet 等）只需替换 `--config-file` 路径和对应的模型权重文件即可，具体配置文件请参考 `configs\u002F` 目录下的说明。*\n\n### 3.2 训练自己的模型\n\n在训练前，请确保已按照 Detectron2 的标准格式准备好了数据集（参考 `datasets\u002FREADME.md`）。\n\n以下以单节点 8 卡训练 FCOS 为例：\n\n```bash\nOMP_NUM_THREADS=1 python tools\u002Ftrain_net.py \\\n    --config-file configs\u002FFCOS-Detection\u002FR_50_1x.yaml \\\n    --num-gpus 8 \\\n    OUTPUT_DIR training_dir\u002Ffcos_R_50_1x\n```\n\n**训练完成后评估模型：**\n\n```bash\nOMP_NUM_THREADS=1 python tools\u002Ftrain_net.py \\\n    --config-file configs\u002FFCOS-Detection\u002FR_50_1x.yaml \\\n    --eval-only \\\n    --num-gpus 8 \\\n    OUTPUT_DIR training_dir\u002Ffcos_R_50_1x \\\n    MODEL.WEIGHTS training_dir\u002Ffcos_R_50_1x\u002Fmodel_final.pth\n```\n\n**关键参数说明：**\n*   `--num-gpus`: 根据实际显卡数量调整。若仅测试推理速度，请设为 `1`。\n*   `OMP_NUM_THREADS=1`: 默认设置以优化 CPU 线程性能，可根据机器情况调整。\n*   `OUTPUT_DIR`: 指定模型保存路径。","某电商物流团队需要开发一套自动化系统，从复杂的快递面单和商品包装图中精准提取弯曲或倾斜的文字信息，并分割出重叠的货物实例以核对库存。\n\n### 没有 AdelaiDet 时\n- **文字识别率低**：传统 OCR 工具难以处理面单上常见的弧形、透视变形文字，导致大量运单号需人工二次录入。\n- **实例分割模糊**：面对堆叠紧密的包裹，通用检测模型生成的掩码边缘粗糙，无法准确区分相邻货物的具体轮廓。\n- **多任务集成难**：团队需分别搭建文字检测、实例分割和关键点定位三套独立框架，代码维护成本高且推理延迟大。\n- **训练门槛高**：缺乏针对密集小目标和特殊文本的高质量预训练模型，从零训练收敛慢且效果不稳定。\n\n### 使用 AdelaiDet 后\n- **高精度文本提取**：利用内置的 ABCNetv2 算法，直接支持端到端的弯曲文本检测与识别，面单自动录入准确率提升至 98%。\n- **精细实例分割**：部署 BlendMask 或 SOLOv2 模型，生成像素级精准的货物掩码，即使货物紧密堆叠也能清晰分离边界。\n- **统一高效架构**：基于 Detectron2 的统一工具箱，在一个框架内同时运行文字识别、实例分割及姿态估计，推理速度提升 40%。\n- **开箱即用模型**：直接加载 HuggingFace 上托管的 COCO 预训练权重，微调少量数据即可快速上线，大幅缩短研发周期。\n\nAdelaiDet 通过集成多种前沿实例级感知算法，一站式解决了复杂场景下“看不清文字”和“分不准物体”的核心痛点，显著提升了视觉系统的落地效率。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Faim-uofa_AdelaiDet_e3628723.png","aim-uofa","Advanced Intelligent Machines (AIM)","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Faim-uofa_27466af8.png","A research team at Zhejiang University, focusing on Computer Vision and broad AI research ...",null,"https:\u002F\u002Fgithub.com\u002Faim-uofa",[80,84,88,92,96],{"name":81,"color":82,"percentage":83},"Python","#3572A5",89.5,{"name":85,"color":86,"percentage":87},"Cuda","#3A4E3A",5.7,{"name":89,"color":90,"percentage":91},"C++","#f34b7d",3.9,{"name":93,"color":94,"percentage":95},"Shell","#89e051",0.7,{"name":97,"color":98,"percentage":99},"Dockerfile","#384d54",0.2,3483,654,"2026-04-13T14:24:12","NOASSERTION",4,"Linux","必需 NVIDIA GPU。测试环境为 NVIDIA 1080Ti (11GB 显存)。需安装与 Detectron2 兼容的 CUDA 版本 (通常推荐 CUDA 10.1-11.x，具体取决于 PyTorch 版本)。","未说明 (建议 16GB+ 以支持多卡训练)",{"notes":109,"python":110,"dependencies":111},"1. 必须基于 Detectron2 构建，且强烈建议使用特定 commit ID (9eb4831) 以避免兼容性问题。\n2. 官方配置默认针对 8-GPU 训练环境，单卡训练需调整配置。\n3. 推理速度测试基于 batch size 1。\n4. 提供预构建的 Docker 镜像 (tianzhi0549\u002Fadet:latest) 以简化环境部署。\n5. 模型文件现托管于 HuggingFace，需手动下载或通过脚本获取。","未说明 (需匹配 Detectron2 和 PyTorch 的版本要求，通常为 3.6-3.9)",[112,113,114,115,116],"detectron2 (commit 9eb4831)","torch","torchvision","opencv-python","pycocotools",[15],[119,120,121,122,123,124,125,126,127,128,129,130,131,132,133],"fcos","blendmask","abcnet","object-detection","instance-segmentation","ocr","text-recognition","meinst","text-detection","condinst","solo","boxinst","solov2","densecl","adelaidet","2026-03-27T02:49:30.150509","2026-04-15T11:26:11.059013",[137,142,147,152,157,161,165],{"id":138,"question_zh":139,"answer_zh":140,"source_url":141},34236,"训练时出现 'ValueError: cannot convert float NaN to integer' 错误怎么办？","这个问题通常是因为输入图片太小，在进行 crop（裁剪）操作时导致除以零的情况。解决方法是禁用裁剪功能，在配置文件中设置：INPUT.CROP.ENABLED: False。","https:\u002F\u002Fgithub.com\u002Faim-uofa\u002FAdelaiDet\u002Fissues\u002F144",{"id":143,"question_zh":144,"answer_zh":145,"source_url":146},34237,"运行 Blendmask 测试图像时出现 CUDA 或驱动相关错误如何解决？","首先尝试重新安装相关依赖库。如果问题依旧，请更新 NVIDIA 显卡驱动。此外，需检查运行时 CUDA 版本与系统安装的 CUDA 版本是否一致，版本不匹配也会导致此类错误。","https:\u002F\u002Fgithub.com\u002Faim-uofa\u002FAdelaiDet\u002Fissues\u002F48",{"id":148,"question_zh":149,"answer_zh":150,"source_url":151},34238,"训练自定义数据集时出现 'AssertionError: The annotation bounding box is outside of the image!' 错误？","这通常是由于图片的 EXIF 方向标志导致 PIL 读取的图片宽高与实际不符。解决方法是使用 ImageOps.exif_transpose 对图片进行转置并保存，或者使用工具（如 RectLabel）移除 EXIF 方向标志。代码示例：\nwith Image.open(img_path) as pil_img:\n    transposed_img = ImageOps.exif_transpose(pil_img)\n    if pil_img.size != transposed_img.size:\n        transposed_img.save(img_path)","https:\u002F\u002Fgithub.com\u002Faim-uofa\u002FAdelaiDet\u002Fissues\u002F139",{"id":153,"question_zh":154,"answer_zh":155,"source_url":156},34239,"CondInst 模型中共享头（shared head）的正确架构设计是怎样的？","正确的架构（Design A）是将 FPN 特征分为三个独立分支：一个用于分类和中心度预测，一个用于回归预测，另一个专门用于控制器预测（controller_pred）。不要将控制器预测与回归预测合并在同一分支末端，否则会导致梯度冲突，降低 Box AP 和 Mask AP 性能。","https:\u002F\u002Fgithub.com\u002Faim-uofa\u002FAdelaiDet\u002Fissues\u002F39",{"id":158,"question_zh":159,"answer_zh":160,"source_url":151},34240,"ABCNet 训练日志中频繁打印 'transposing image' 是什么意思？","这是正常现象。程序检测到图片的 EXIF 方向信息与当前显示方向不一致，自动调用转置功能调整图片方向以适应网络输入。这通常发生在包含不同方向（横向、纵向）图片的数据集中。",{"id":162,"question_zh":163,"answer_zh":164,"source_url":156},34241,"CondInst 的 Mask 分支使用了什么激活函数、归一化层和初始化方法？","Mask 分支的结构应类似于 BlendMask 中的 basis module，但在此处不需要将特征图从 8x 上采样到 4x。具体的激活函数、归一化层和初始化方法虽未在论文中详细说明，但通常遵循 Detectron2 的标准配置（如 ReLU 激活、GN 归一化等），且这些设计选择对最终性能影响不大。",{"id":166,"question_zh":167,"answer_zh":168,"source_url":141},34242,"如何在非忽略中文文本的数据集上训练 ABCNet 以识别中文？","需要扩大 CTLABELS 列表以包含所有中文字符，而不仅仅是默认的英文字符和符号。如果扩大标签后仍无法识别，请检查数据预处理流程，确保没有因为图片尺寸过小导致裁剪出错（需关闭 INPUT.CROP.ENABLED），并确认标注格式正确。",[]]