[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-fizyr--keras-retinanet":3,"tool-fizyr--keras-retinanet":64},[4,17,27,35,43,56],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":16},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,3,"2026-04-05T11:01:52",[13,14,15],"开发框架","图像","Agent","ready",{"id":18,"name":19,"github_repo":20,"description_zh":21,"stars":22,"difficulty_score":23,"last_commit_at":24,"category_tags":25,"status":16},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",138956,2,"2026-04-05T11:33:21",[13,15,26],"语言模型",{"id":28,"name":29,"github_repo":30,"description_zh":31,"stars":32,"difficulty_score":23,"last_commit_at":33,"category_tags":34,"status":16},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",107662,"2026-04-03T11:11:01",[13,14,15],{"id":36,"name":37,"github_repo":38,"description_zh":39,"stars":40,"difficulty_score":23,"last_commit_at":41,"category_tags":42,"status":16},3704,"NextChat","ChatGPTNextWeb\u002FNextChat","NextChat 是一款轻量且极速的 AI 助手，旨在为用户提供流畅、跨平台的大模型交互体验。它完美解决了用户在多设备间切换时难以保持对话连续性，以及面对众多 AI 模型不知如何统一管理的痛点。无论是日常办公、学习辅助还是创意激发，NextChat 都能让用户随时随地通过网页、iOS、Android、Windows、MacOS 或 Linux 端无缝接入智能服务。\n\n这款工具非常适合普通用户、学生、职场人士以及需要私有化部署的企业团队使用。对于开发者而言，它也提供了便捷的自托管方案，支持一键部署到 Vercel 或 Zeabur 等平台。\n\nNextChat 的核心亮点在于其广泛的模型兼容性，原生支持 Claude、DeepSeek、GPT-4 及 Gemini Pro 等主流大模型，让用户在一个界面即可自由切换不同 AI 能力。此外，它还率先支持 MCP（Model Context Protocol）协议，增强了上下文处理能力。针对企业用户，NextChat 提供专业版解决方案，具备品牌定制、细粒度权限控制、内部知识库整合及安全审计等功能，满足公司对数据隐私和个性化管理的高标准要求。",87618,"2026-04-05T07:20:52",[13,26],{"id":44,"name":45,"github_repo":46,"description_zh":47,"stars":48,"difficulty_score":23,"last_commit_at":49,"category_tags":50,"status":16},2268,"ML-For-Beginners","microsoft\u002FML-For-Beginners","ML-For-Beginners 是由微软推出的一套系统化机器学习入门课程，旨在帮助零基础用户轻松掌握经典机器学习知识。这套课程将学习路径规划为 12 周，包含 26 节精炼课程和 52 道配套测验，内容涵盖从基础概念到实际应用的完整流程，有效解决了初学者面对庞大知识体系时无从下手、缺乏结构化指导的痛点。\n\n无论是希望转型的开发者、需要补充算法背景的研究人员，还是对人工智能充满好奇的普通爱好者，都能从中受益。课程不仅提供了清晰的理论讲解，还强调动手实践，让用户在循序渐进中建立扎实的技能基础。其独特的亮点在于强大的多语言支持，通过自动化机制提供了包括简体中文在内的 50 多种语言版本，极大地降低了全球不同背景用户的学习门槛。此外，项目采用开源协作模式，社区活跃且内容持续更新，确保学习者能获取前沿且准确的技术资讯。如果你正寻找一条清晰、友好且专业的机器学习入门之路，ML-For-Beginners 将是理想的起点。",84991,"2026-04-05T10:45:23",[14,51,52,53,15,54,26,13,55],"数据工具","视频","插件","其他","音频",{"id":57,"name":58,"github_repo":59,"description_zh":60,"stars":61,"difficulty_score":10,"last_commit_at":62,"category_tags":63,"status":16},3128,"ragflow","infiniflow\u002Fragflow","RAGFlow 是一款领先的开源检索增强生成（RAG）引擎，旨在为大语言模型构建更精准、可靠的上下文层。它巧妙地将前沿的 RAG 技术与智能体（Agent）能力相结合，不仅支持从各类文档中高效提取知识，还能让模型基于这些知识进行逻辑推理和任务执行。\n\n在大模型应用中，幻觉问题和知识滞后是常见痛点。RAGFlow 通过深度解析复杂文档结构（如表格、图表及混合排版），显著提升了信息检索的准确度，从而有效减少模型“胡编乱造”的现象，确保回答既有据可依又具备时效性。其内置的智能体机制更进一步，使系统不仅能回答问题，还能自主规划步骤解决复杂问题。\n\n这款工具特别适合开发者、企业技术团队以及 AI 研究人员使用。无论是希望快速搭建私有知识库问答系统，还是致力于探索大模型在垂直领域落地的创新者，都能从中受益。RAGFlow 提供了可视化的工作流编排界面和灵活的 API 接口，既降低了非算法背景用户的上手门槛，也满足了专业开发者对系统深度定制的需求。作为基于 Apache 2.0 协议开源的项目，它正成为连接通用大模型与行业专有知识之间的重要桥梁。",77062,"2026-04-04T04:44:48",[15,14,13,26,54],{"id":65,"github_repo":66,"name":67,"description_en":68,"description_zh":69,"ai_summary_zh":70,"readme_en":71,"readme_zh":72,"quickstart_zh":73,"use_case_zh":74,"hero_image_url":75,"owner_login":76,"owner_name":77,"owner_avatar_url":78,"owner_bio":79,"owner_company":80,"owner_location":80,"owner_email":81,"owner_twitter":80,"owner_website":82,"owner_url":83,"languages":84,"stars":93,"forks":94,"last_commit_at":95,"license":96,"difficulty_score":97,"env_os":98,"env_gpu":99,"env_ram":100,"env_deps":101,"category_tags":108,"github_topics":80,"view_count":23,"oss_zip_url":80,"oss_zip_packed_at":80,"status":16,"created_at":109,"updated_at":110,"faqs":111,"releases":112},3552,"fizyr\u002Fkeras-retinanet","keras-retinanet","Keras implementation of RetinaNet object detection.","keras-retinanet 是基于 Keras 框架实现的 RetinaNet 目标检测工具，复现了论文《Focal Loss for Dense Object Detection》中的核心算法。它主要解决了传统目标检测模型在密集场景下难以平衡正负样本、导致小物体漏检率高的问题，通过引入独特的“焦点损失（Focal Loss）”机制，显著提升了检测精度。\n\n该工具适合熟悉 Python 和深度学习框架的开发者及研究人员使用，特别是那些需要在 Keras 或 TensorFlow 2.3 环境下进行自定义模型训练、评估或部署的用户。虽然官方已建议新项目转向 PyTorch 版本的 torchvision，但 keras-retinanet 对于维护旧有 Keras 项目或希望深入理解 RetinaNet 架构细节的学习者而言，依然具有重要的参考价值。\n\n其技术亮点在于提供了完整的训练与推理流程支持，包括从训练模型到推理模型的便捷转换脚本，并兼容 MS COCO 等主流数据集。用户可以直接调用预训练模型对图像进行快速检测，也能利用提供的脚本轻松开展微调训练。需要注意的是，由于项目已进入","keras-retinanet 是基于 Keras 框架实现的 RetinaNet 目标检测工具，复现了论文《Focal Loss for Dense Object Detection》中的核心算法。它主要解决了传统目标检测模型在密集场景下难以平衡正负样本、导致小物体漏检率高的问题，通过引入独特的“焦点损失（Focal Loss）”机制，显著提升了检测精度。\n\n该工具适合熟悉 Python 和深度学习框架的开发者及研究人员使用，特别是那些需要在 Keras 或 TensorFlow 2.3 环境下进行自定义模型训练、评估或部署的用户。虽然官方已建议新项目转向 PyTorch 版本的 torchvision，但 keras-retinanet 对于维护旧有 Keras 项目或希望深入理解 RetinaNet 架构细节的学习者而言，依然具有重要的参考价值。\n\n其技术亮点在于提供了完整的训练与推理流程支持，包括从训练模型到推理模型的便捷转换脚本，并兼容 MS COCO 等主流数据集。用户可以直接调用预训练模型对图像进行快速检测，也能利用提供的脚本轻松开展微调训练。需要注意的是，由于项目已进入维护模式，建议在使用时严格匹配推荐的 Keras 与 TensorFlow 版本，以确保环境稳定性。","# Keras RetinaNet [![Build Status](https:\u002F\u002Ftravis-ci.org\u002Ffizyr\u002Fkeras-retinanet.svg?branch=master)](https:\u002F\u002Ftravis-ci.org\u002Ffizyr\u002Fkeras-retinanet) [![DOI](https:\u002F\u002Fzenodo.org\u002Fbadge\u002F100249425.svg)](https:\u002F\u002Fzenodo.org\u002Fbadge\u002Flatestdoi\u002F100249425)\n\nKeras implementation of RetinaNet object detection as described in [Focal Loss for Dense Object Detection](https:\u002F\u002Farxiv.org\u002Fabs\u002F1708.02002)\nby Tsung-Yi Lin, Priya Goyal, Ross Girshick, Kaiming He and Piotr Dollár.\n\n## :warning: Deprecated\n\nThis repository is deprecated in favor of the [torchvision](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Fvision\u002F) module.\nThis project should work with keras 2.4 and tensorflow 2.3.0, newer versions might break support.\nFor more information, check [here](https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet\u002Fissues\u002F1471#issuecomment-704187205).\n\n## Installation\n\n1) Clone this repository.\n2) In the repository, execute `pip install . --user`.\n   Note that due to inconsistencies with how `tensorflow` should be installed,\n   this package does not define a dependency on `tensorflow` as it will try to install that (which at least on Arch Linux results in an incorrect installation).\n   Please make sure `tensorflow` is installed as per your systems requirements.\n3) Alternatively, you can run the code directly from the cloned  repository, however you need to run `python setup.py build_ext --inplace` to compile Cython code first.\n4) Optionally, install `pycocotools` if you want to train \u002F test on the MS COCO dataset by running `pip install --user git+https:\u002F\u002Fgithub.com\u002Fcocodataset\u002Fcocoapi.git#subdirectory=PythonAPI`.\n\n## Testing\nAn example of testing the network can be seen in [this Notebook](https:\u002F\u002Fgithub.com\u002Fdelftrobotics\u002Fkeras-retinanet\u002Fblob\u002Fmaster\u002Fexamples\u002FResNet50RetinaNet.ipynb).\nIn general, inference of the network works as follows:\n```python\nboxes, scores, labels = model.predict_on_batch(inputs)\n```\n\nWhere `boxes` are shaped `(None, None, 4)` (for `(x1, y1, x2, y2)`), scores is shaped `(None, None)` (classification score) and labels is shaped `(None, None)` (label corresponding to the score). In all three outputs, the first dimension represents the shape and the second dimension indexes the list of detections.\n\nLoading models can be done in the following manner:\n```python\nfrom keras_retinanet.models import load_model\nmodel = load_model('\u002Fpath\u002Fto\u002Fmodel.h5', backbone_name='resnet50')\n```\n\nExecution time on NVIDIA Pascal Titan X is roughly 75msec for an image of shape `1000x800x3`.\n\n### Converting a training model to inference model\nThe training procedure of `keras-retinanet` works with *training models*. These are stripped down versions compared to the *inference model* and only contains the layers necessary for training (regression and classification values). If you wish to do inference on a model (perform object detection on an image), you need to convert the trained model to an inference model. This is done as follows:\n\n```shell\n# Running directly from the repository:\nkeras_retinanet\u002Fbin\u002Fconvert_model.py \u002Fpath\u002Fto\u002Ftraining\u002Fmodel.h5 \u002Fpath\u002Fto\u002Fsave\u002Finference\u002Fmodel.h5\n\n# Using the installed script:\nretinanet-convert-model \u002Fpath\u002Fto\u002Ftraining\u002Fmodel.h5 \u002Fpath\u002Fto\u002Fsave\u002Finference\u002Fmodel.h5\n```\n\nMost scripts (like `retinanet-evaluate`) also support converting on the fly, using the `--convert-model` argument.\n\n\n## Training\n`keras-retinanet` can be trained using [this](https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet\u002Fblob\u002Fmaster\u002Fkeras_retinanet\u002Fbin\u002Ftrain.py) script.\nNote that the train script uses relative imports since it is inside the `keras_retinanet` package.\nIf you want to adjust the script for your own use outside of this repository,\nyou will need to switch it to use absolute imports.\n\nIf you installed `keras-retinanet` correctly, the train script will be installed as `retinanet-train`.\nHowever, if you make local modifications to the `keras-retinanet` repository, you should run the script directly from the repository.\nThat will ensure that your local changes will be used by the train script.\n\nThe default backbone is `resnet50`. You can change this using the `--backbone=xxx` argument in the running script.\n`xxx` can be one of the backbones in resnet models (`resnet50`, `resnet101`, `resnet152`), mobilenet models (`mobilenet128_1.0`, `mobilenet128_0.75`, `mobilenet160_1.0`, etc), densenet models or vgg models. The different options are defined by each model in their corresponding python scripts (`resnet.py`, `mobilenet.py`, etc).\n\nTrained models can't be used directly for inference. To convert a trained model to an inference model, check [here](https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet#converting-a-training-model-to-inference-model).\n\n### Usage\nFor training on [Pascal VOC](http:\u002F\u002Fhost.robots.ox.ac.uk\u002Fpascal\u002FVOC\u002F), run:\n```shell\n# Running directly from the repository:\nkeras_retinanet\u002Fbin\u002Ftrain.py pascal \u002Fpath\u002Fto\u002FVOCdevkit\u002FVOC2007\n\n# Using the installed script:\nretinanet-train pascal \u002Fpath\u002Fto\u002FVOCdevkit\u002FVOC2007\n```\n\nFor training on [MS COCO](http:\u002F\u002Fcocodataset.org\u002F#home), run:\n```shell\n# Running directly from the repository:\nkeras_retinanet\u002Fbin\u002Ftrain.py coco \u002Fpath\u002Fto\u002FMS\u002FCOCO\n\n# Using the installed script:\nretinanet-train coco \u002Fpath\u002Fto\u002FMS\u002FCOCO\n```\n\nFor training on Open Images Dataset [OID](https:\u002F\u002Fstorage.googleapis.com\u002Fopenimages\u002Fweb\u002Findex.html)\nor taking place to the [OID challenges](https:\u002F\u002Fstorage.googleapis.com\u002Fopenimages\u002Fweb\u002Fchallenge.html), run:\n```shell\n# Running directly from the repository:\nkeras_retinanet\u002Fbin\u002Ftrain.py oid \u002Fpath\u002Fto\u002FOID\n\n# Using the installed script:\nretinanet-train oid \u002Fpath\u002Fto\u002FOID\n\n# You can also specify a list of labels if you want to train on a subset\n# by adding the argument 'labels_filter':\nkeras_retinanet\u002Fbin\u002Ftrain.py oid \u002Fpath\u002Fto\u002FOID --labels-filter=Helmet,Tree\n\n# You can also specify a parent label if you want to train on a branch\n# from the semantic hierarchical tree (i.e a parent and all children)\n(https:\u002F\u002Fstorage.googleapis.com\u002Fopenimages\u002Fchallenge_2018\u002Fbbox_labels_500_hierarchy_visualizer\u002Fcircle.html)\n# by adding the argument 'parent-label':\nkeras_retinanet\u002Fbin\u002Ftrain.py oid \u002Fpath\u002Fto\u002FOID --parent-label=Boat\n```\n\n\nFor training on [KITTI](http:\u002F\u002Fwww.cvlibs.net\u002Fdatasets\u002Fkitti\u002Feval_object.php), run:\n```shell\n# Running directly from the repository:\nkeras_retinanet\u002Fbin\u002Ftrain.py kitti \u002Fpath\u002Fto\u002FKITTI\n\n# Using the installed script:\nretinanet-train kitti \u002Fpath\u002Fto\u002FKITTI\n\nIf you want to prepare the dataset you can use the following script:\nhttps:\u002F\u002Fgithub.com\u002FNVIDIA\u002FDIGITS\u002Fblob\u002Fmaster\u002Fexamples\u002Fobject-detection\u002Fprepare_kitti_data.py\n```\n\n\nFor training on a [custom dataset], a CSV file can be used as a way to pass the data.\nSee below for more details on the format of these CSV files.\nTo train using your CSV, run:\n```shell\n# Running directly from the repository:\nkeras_retinanet\u002Fbin\u002Ftrain.py csv \u002Fpath\u002Fto\u002Fcsv\u002Ffile\u002Fcontaining\u002Fannotations \u002Fpath\u002Fto\u002Fcsv\u002Ffile\u002Fcontaining\u002Fclasses\n\n# Using the installed script:\nretinanet-train csv \u002Fpath\u002Fto\u002Fcsv\u002Ffile\u002Fcontaining\u002Fannotations \u002Fpath\u002Fto\u002Fcsv\u002Ffile\u002Fcontaining\u002Fclasses\n```\n\nIn general, the steps to train on your own datasets are:\n1) Create a model by calling for instance `keras_retinanet.models.backbone('resnet50').retinanet(num_classes=80)` and compile it.\n   Empirically, the following compile arguments have been found to work well:\n```python\nmodel.compile(\n    loss={\n        'regression'    : keras_retinanet.losses.smooth_l1(),\n        'classification': keras_retinanet.losses.focal()\n    },\n    optimizer=keras.optimizers.Adam(lr=1e-5, clipnorm=0.001)\n)\n```\n2) Create generators for training and testing data (an example is show in [`keras_retinanet.preprocessing.pascal_voc.PascalVocGenerator`](https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet\u002Fblob\u002Fmaster\u002Fkeras_retinanet\u002Fpreprocessing\u002Fpascal_voc.py)).\n3) Use `model.fit_generator` to start training.\n\n## Pretrained models\n\nAll models can be downloaded from the [releases page](https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet\u002Freleases).\n\n### MS COCO\n\nResults using the `cocoapi` are shown below (note: according to the paper, this configuration should achieve a mAP of 0.357).\n\n```\n Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.350\n Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.537\n Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.374\n Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.191\n Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.383\n Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.472\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.306\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.491\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.533\n Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.345\n Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.577\n Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.681\n```\n\n### Open Images Dataset\nThere are 3 RetinaNet models based on ResNet50, ResNet101 and ResNet152 trained on all [500 classes](https:\u002F\u002Fgithub.com\u002FZFTurbo\u002FKeras-RetinaNet-for-Open-Images-Challenge-2018\u002Fblob\u002Fmaster\u002Fa00_utils_and_constants.py#L130) of the Open Images Dataset (thanks to @ZFTurbo).\n\n| Backbone  | Image Size (px) | Small validation mAP | LB (Public) |\n| --------- | --------------- | -------------------- | ----------- |\n| ResNet50  | 768 - 1024      | 0.4594               | 0.4223      |\n| ResNet101 | 768 - 1024      | 0.4986               | 0.4520      |\n| ResNet152 | 600 - 800       | 0.4991               | 0.4651      |\n\nFor more information, check [@ZFTurbo's](https:\u002F\u002Fgithub.com\u002FZFTurbo\u002FKeras-RetinaNet-for-Open-Images-Challenge-2018) repository.\n\n## CSV datasets\nThe `CSVGenerator` provides an easy way to define your own datasets.\nIt uses two CSV files: one file containing annotations and one file containing a class name to ID mapping.\n\n### Annotations format\nThe CSV file with annotations should contain one annotation per line.\nImages with multiple bounding boxes should use one row per bounding box.\nNote that indexing for pixel values starts at 0.\nThe expected format of each line is:\n```\npath\u002Fto\u002Fimage.jpg,x1,y1,x2,y2,class_name\n```\nBy default the CSV generator will look for images relative to the directory of the annotations file.\n\nSome images may not contain any labeled objects.\nTo add these images to the dataset as negative examples,\nadd an annotation where `x1`, `y1`, `x2`, `y2` and `class_name` are all empty:\n```\npath\u002Fto\u002Fimage.jpg,,,,,\n```\n\nA full example:\n```\n\u002Fdata\u002Fimgs\u002Fimg_001.jpg,837,346,981,456,cow\n\u002Fdata\u002Fimgs\u002Fimg_002.jpg,215,312,279,391,cat\n\u002Fdata\u002Fimgs\u002Fimg_002.jpg,22,5,89,84,bird\n\u002Fdata\u002Fimgs\u002Fimg_003.jpg,,,,,\n```\n\nThis defines a dataset with 3 images.\n`img_001.jpg` contains a cow.\n`img_002.jpg` contains a cat and a bird.\n`img_003.jpg` contains no interesting objects\u002Fanimals.\n\n\n### Class mapping format\nThe class name to ID mapping file should contain one mapping per line.\nEach line should use the following format:\n```\nclass_name,id\n```\n\nIndexing for classes starts at 0.\nDo not include a background class as it is implicit.\n\nFor example:\n```\ncow,0\ncat,1\nbird,2\n```\n\n## Anchor optimization\n\nIn some cases, the default anchor configuration is not suitable for detecting objects in your dataset, for example, if your objects are smaller than the 32x32px (size of the smallest anchors). In this case, it might be suitable to modify the anchor configuration, this can be done automatically by following the steps in the [anchor-optimization](https:\u002F\u002Fgithub.com\u002Fmartinzlocha\u002Fanchor-optimization\u002F) repository. To use the generated configuration check [here](https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet-test-data\u002Fblob\u002Fmaster\u002Fconfig\u002Fconfig.ini) for an example config file and then pass it to `train.py` using the `--config` parameter.\n\n## Debugging\nCreating your own dataset does not always work out of the box. There is a [`debug.py`](https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet\u002Fblob\u002Fmaster\u002Fkeras_retinanet\u002Fbin\u002Fdebug.py) tool to help find the most common mistakes.\n\nParticularly helpful is the `--annotations` flag which displays your annotations on the images from your dataset. Annotations are colored in green when there are anchors available and colored in red when there are no anchors available. If an annotation doesn't have anchors available, it means it won't contribute to training. It is normal for a small amount of annotations to show up in red, but if most or all annotations are red there is cause for concern. The most common issues are that the annotations are too small or too oddly shaped (stretched out).\n\n## Results\n\n### MS COCO\n\n## Status\nExample output images using `keras-retinanet` are shown below.\n\n\u003Cp align=\"center\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffizyr_keras-retinanet_readme_a533b1db172b.png\" alt=\"Example result of RetinaNet on MS COCO\"\u002F>\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffizyr_keras-retinanet_readme_4a295a1841ec.png\" alt=\"Example result of RetinaNet on MS COCO\"\u002F>\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffizyr_keras-retinanet_readme_4abc69c608a9.png\" alt=\"Example result of RetinaNet on MS COCO\"\u002F>\n\u003C\u002Fp>\n\n### Projects using keras-retinanet\n* [Improving Apple Detection and Counting Using RetinaNet](https:\u002F\u002Fgithub.com\u002Fnikostsagk\u002FApple-detection). This work aims to investigate the apple detection problem through the deployment of the Keras RetinaNet.\n* [Improving RetinaNet for CT Lesion Detection with Dense Masks from Weak RECIST Labels](https:\u002F\u002Farxiv.org\u002Fabs\u002F1906.02283). Research project for detecting lesions in CT using keras-retinanet.\n* [NudeNet](https:\u002F\u002Fgithub.com\u002Fbedapudi6788\u002FNudeNet). Project that focuses on detecting and censoring of nudity.\n* [Individual tree-crown detection in RGB imagery using self-supervised deep learning neural networks](https:\u002F\u002Fwww.biorxiv.org\u002Fcontent\u002F10.1101\u002F532952v1). Research project focused on improving the performance of remotely sensed tree surveys.\n* [ESRI Object Detection Challenge 2019](https:\u002F\u002Fgithub.com\u002Fkunwar31\u002FESRI_Object_Detection). Winning implementation of the ESRI Object Detection Challenge 2019.\n* [Lunar Rockfall Detector Project](https:\u002F\u002Fieeexplore.ieee.org\u002Fdocument\u002F8587120). The aim of this project is to [map lunar rockfalls on a global scale](https:\u002F\u002Fwww.nature.com\u002Farticles\u002Fs41467-020-16653-3) using the available > 2 million satellite images.\n* [Mars Rockfall Detector Project](https:\u002F\u002Fieeexplore.ieee.org\u002Fdocument\u002F9103997). The aim of this project is to map rockfalls on Mars.\n* [NATO Innovation Challenge](https:\u002F\u002Fmedium.com\u002Fdata-from-the-trenches\u002Fobject-detection-with-deep-learning-on-aerial-imagery-2465078db8a9). The winning team of the NATO Innovation Challenge used keras-retinanet to detect cars in aerial images ([COWC dataset](https:\u002F\u002Fgdo152.llnl.gov\u002Fcowc\u002F)).\n* [Microsoft Research for Horovod on Azure](https:\u002F\u002Fblogs.technet.microsoft.com\u002Fmachinelearning\u002F2018\u002F06\u002F20\u002Fhow-to-do-distributed-deep-learning-for-object-detection-using-horovod-on-azure\u002F). A research project by Microsoft, using keras-retinanet to distribute training over multiple GPUs using Horovod on Azure.\n* [Anno-Mage](https:\u002F\u002Fvirajmavani.github.io\u002Fsaiat\u002F). A tool that helps you annotate images, using input from the keras-retinanet COCO model as suggestions.\n* [Telenav.AI](https:\u002F\u002Fgithub.com\u002FTelenav\u002FTelenav.AI\u002Ftree\u002Fmaster\u002Fretinanet). For the detection of traffic signs using keras-retinanet.\n* [Towards Deep Placental Histology Phenotyping](https:\u002F\u002Fgithub.com\u002FNellaker-group\u002FTowardsDeepPhenotyping). This research project uses keras-retinanet for analysing the placenta at a cellular level.\n* [4k video example](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=KYueHEMGRos). This demo shows the use of keras-retinanet on a 4k input video.\n* [boring-detector](https:\u002F\u002Fgithub.com\u002Flexfridman\u002Fboring-detector). I suppose not all projects need to solve life's biggest questions. This project detects the \"The Boring Company\" hats in videos.\n* [comet.ml](https:\u002F\u002Ftowardsdatascience.com\u002Fhow-i-monitor-and-track-my-machine-learning-experiments-from-anywhere-described-in-13-tweets-ec3d0870af99). Using keras-retinanet in combination with [comet.ml](https:\u002F\u002Fcomet.ml) to interactively inspect and compare experiments.\n* [Weights and Biases](https:\u002F\u002Fapp.wandb.ai\u002Fsyllogismos\u002Fkeras-retinanet\u002Freports?view=carey%2FObject%20Detection%20with%20RetinaNet). Trained keras-retinanet on coco dataset from beginning on resnet50 and resnet101 backends.\n* [Google Open Images Challenge 2018 15th place solution](https:\u002F\u002Fgithub.com\u002FZFTurbo\u002FKeras-RetinaNet-for-Open-Images-Challenge-2018). Pretrained weights for keras-retinanet based on ResNet50, ResNet101 and ResNet152 trained on open images dataset.\n* [poke.AI](https:\u002F\u002Fgithub.com\u002FRaghav-B\u002Fpoke.AI). An experimental AI that attempts to master the 3rd Generation Pokemon games. Using keras-retinanet for in-game mapping and localization.\n* [retinanetjs](https:\u002F\u002Fgithub.com\u002Ffaustomorales\u002Fretinanetjs). A wrapper to run RetinaNet inference in the browser \u002F Node.js. You can also take a look at the [example app](https:\u002F\u002Ffaustomorales.github.io\u002Fretinanetjs-example-app\u002F).\n* [CRFNet](https:\u002F\u002Fgithub.com\u002FTUMFTM\u002FCameraRadarFusionNet). This network fuses radar and camera data to perform object detection for autonomous driving applications.\n* [LogoDet](https:\u002F\u002Fgithub.com\u002FnotAI-tech\u002FLogoDet). Project for detecting company logos in images.\n* [AIR: Aerial Inspection RetinaNet](https:\u002F\u002Fgithub.com\u002FAccenture\u002FAIR). A deep learning solution for supporting land search and rescue missions with drones.\n\n\nIf you have a project based on `keras-retinanet` and would like to have it published here, shoot me a message on Slack.\n\n### Notes\n* This repository requires Tensorflow 2.3.0 or higher.\n* This repository is [tested](https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet\u002Fblob\u002Fmaster\u002F.travis.yml) using OpenCV 3.4.\n* This repository is [tested](https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet\u002Fblob\u002Fmaster\u002F.travis.yml) using Python 2.7 and 3.6.\n\nContributions to this project are welcome.\n\n### Discussions\nFeel free to join the `#keras-retinanet` [Keras Slack](https:\u002F\u002Fkeras-slack-autojoin.herokuapp.com\u002F) channel for discussions and questions.\n\n## FAQ\n* **I get the warning `UserWarning: No training configuration found in save file: the model was not compiled. Compile it manually.`, should I be worried?** This warning can safely be ignored during inference.\n* **I get the error `ValueError: not enough values to unpack (expected 3, got 2)` during inference, what to do?**. This is because you are using a train model to do inference. See https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet#converting-a-training-model-to-inference-model for more information.\n* **How do I do transfer learning?** The easiest solution is to use the `--weights` argument when training. Keras will load models, even if the number of classes don't match (it will simply skip loading of weights when there is a mismatch). Run for example `retinanet-train --weights snapshots\u002Fsome_coco_model.h5 pascal \u002Fpath\u002Fto\u002Fpascal` to transfer weights from a COCO model to a PascalVOC training session. If your dataset is small, you can also use the `--freeze-backbone` argument to freeze the backbone layers.\n* **How do I change the number \u002F shape of the anchors?** The train tool allows to pass a configuration file, where the anchor parameters can be adjusted. Check [here](https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet-test-data\u002Fblob\u002Fmaster\u002Fconfig\u002Fconfig.ini) for an example config file.\n* **I get a loss of `0`, what is going on?** This mostly happens when none of the anchors \"fit\" on your objects, because they are most likely too small or elongated. You can verify this using the [debug](https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet#debugging) tool.\n* **I have an older model, can I use it after an update of keras-retinanet?** This depends on what has changed. If it is a change that doesn't affect the weights then you can \"update\" models by creating a new retinanet model, loading your old weights using `model.load_weights(weights_path, by_name=True)` and saving this model. If the change has been too significant, you should retrain your model (you can try to load in the weights from your old model when starting training, this might be a better starting position than ImageNet).\n* **I get the error `ModuleNotFoundError: No module named 'keras_retinanet.utils.compute_overlap'`, how do I fix this?** Most likely you are running the code from the cloned repository. This is fine, but you need to compile some extensions for this to work (`python setup.py build_ext --inplace`).\n* **How do I train on my own dataset?** The steps to train on your dataset are roughly as follows:\n* 1. Prepare your dataset in the CSV format (a training and validation split is advised).\n* 2. Check that your dataset is correct using `retinanet-debug`.\n* 3. Train retinanet, preferably using the pretrained COCO weights (this gives a **far** better starting point, making training much quicker and accurate). You can optionally perform evaluation of your validation set during training to keep track of how well it performs (advised).\n* 4. Convert your training model to an inference model.\n* 5. Evaluate your inference model on your test or validation set.\n* 6. Profit!\n","# Keras RetinaNet [![构建状态](https:\u002F\u002Ftravis-ci.org\u002Ffizyr\u002Fkeras-retinanet.svg?branch=master)](https:\u002F\u002Ftravis-ci.org\u002Ffizyr\u002Fkeras-retinanet) [![DOI](https:\u002F\u002Fzenodo.org\u002Fbadge\u002F100249425.svg)](https:\u002F\u002Fzenodo.org\u002Fbadge\u002Flatestdoi\u002F100249425)\n\nKeras 中实现的 RetinaNet 目标检测模型，如 Tsung-Yi Lin、Priya Goyal、Ross Girshick、Kaiming He 和 Piotr Dollár 在论文《用于密集目标检测的焦点损失》（[Focal Loss for Dense Object Detection](https:\u002F\u002Farxiv.org\u002Fabs\u002F1708.02002)）中所述。\n\n## :warning: 已弃用\n\n此仓库已被弃用，建议使用 [torchvision](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Fvision\u002F) 模块。  \n该项目应能与 Keras 2.4 和 TensorFlow 2.3.0 兼容，更高版本可能会导致支持中断。  \n更多信息请参阅 [此处](https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet\u002Fissues\u002F1471#issuecomment-704187205)。\n\n## 安装\n\n1) 克隆本仓库。  \n2) 在仓库目录下执行 `pip install . --user`。  \n   注意：由于 TensorFlow 的安装方式存在不一致性，本包未将 TensorFlow 定义为依赖项，因为这会导致尝试自动安装 TensorFlow（至少在 Arch Linux 上会导致错误安装）。请确保根据您的系统要求正确安装 TensorFlow。  \n3) 或者，您也可以直接从克隆的仓库运行代码，但需要先运行 `python setup.py build_ext --inplace` 编译 Cython 代码。  \n4) 如果希望在 MS COCO 数据集上进行训练或测试，可选择安装 `pycocotools`，方法是运行 `pip install --user git+https:\u002F\u002Fgithub.com\u002Fcocodataset\u002Fcocoapi.git#subdirectory=PythonAPI`。\n\n## 测试\n网络测试的示例可在 [此 Notebook](https:\u002F\u002Fgithub.com\u002Fdelftrobotics\u002Fkeras-retinanet\u002Fblob\u002Fmaster\u002Fexamples\u002FResNet50RetinaNet.ipynb) 中查看。  \n通常，网络推理的流程如下：\n```python\nboxes, scores, labels = model.predict_on_batch(inputs)\n```\n\n其中 `boxes` 的形状为 `(None, None, 4)`（表示 `(x1, y1, x2, y2)`），`scores` 的形状为 `(None, None)`（分类得分），`labels` 的形状为 `(None, None)`（与得分对应的标签）。在三个输出中，第一个维度表示批次大小，第二个维度索引检测结果列表。\n\n加载模型的方式如下：\n```python\nfrom keras_retinanet.models import load_model\nmodel = load_model('\u002Fpath\u002Fto\u002Fmodel.h5', backbone_name='resnet50')\n```\n\n在 NVIDIA Pascal Titan X 上，处理一张 `1000x800x3` 大小的图像大约需要 75 毫秒。\n\n### 将训练模型转换为推理模型\n`keras-retinanet` 的训练过程使用的是“训练模型”。这些模型相比“推理模型”进行了精简，仅包含训练所需的层（回归和分类分支）。如果您希望对模型进行推理（即在图像上执行目标检测），则需要将训练好的模型转换为推理模型。转换方法如下：\n\n```shell\n# 直接从仓库运行：\nkeras_retinanet\u002Fbin\u002Fconvert_model.py \u002Fpath\u002Fto\u002Ftraining\u002Fmodel.h5 \u002Fpath\u002Fto\u002Fsave\u002Finference\u002Fmodel.h5\n\n# 使用已安装的脚本：\nretinanet-convert-model \u002Fpath\u002Fto\u002Ftraining\u002Fmodel.h5 \u002Fpath\u002Fto\u002Fsave\u002Finference\u002Fmodel.h5\n```\n\n大多数脚本（如 `retinanet-evaluate`）也支持通过 `--convert-model` 参数即时转换。\n\n## 训练\n`keras-retinanet` 可以使用 [此脚本](https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet\u002Fblob\u002Fmaster\u002Fkeras_retinanet\u002Fbin\u002Ftrain.py) 进行训练。  \n请注意，该训练脚本使用相对导入，因为它位于 `keras_retinanet` 包内。  \n如果您希望在本仓库之外自定义使用该脚本，则需要将其改为使用绝对导入。\n\n如果正确安装了 `keras-retinanet`，训练脚本将被安装为 `retinanet-train`。  \n然而，如果您对 `keras-retinanet` 仓库进行了本地修改，则应直接从仓库运行该脚本，以确保训练脚本能够应用您的更改。\n\n默认骨干网络为 `resnet50`。您可以通过运行脚本时添加 `--backbone=xxx` 参数来更改骨干网络。  \n`xxx` 可以是 ResNet 模型中的任一种骨干（`resnet50`、`resnet101`、`resnet152`）、MobileNet 模型（`mobilenet128_1.0`、`mobilenet128_0.75`、`mobilenet160_1.0` 等）、DenseNet 模型或 VGG 模型。不同选项由各自模型的 Python 脚本（如 `resnet.py`、`mobilenet.py` 等）定义。\n\n训练好的模型不能直接用于推理。要将训练模型转换为推理模型，请参阅 [此处](https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet#converting-a-training-model-to-inference-model)。\n\n### 使用方法\n#### 在 Pascal VOC 数据集上训练\n```shell\n# 直接从仓库运行：\nkeras_retinanet\u002Fbin\u002Ftrain.py pascal \u002Fpath\u002Fto\u002FVOCdevkit\u002FVOC2007\n\n# 使用已安装的脚本：\nretinanet-train pascal \u002Fpath\u002Fto\u002FVOCdevkit\u002FVOC2007\n```\n\n#### 在 MS COCO 数据集上训练\n```shell\n# 直接从仓库运行：\nkeras_retinanet\u002Fbin\u002Ftrain.py coco \u002Fpath\u002Fto\u002FMS\u002FCOCO\n\n# 使用已安装的脚本：\nretinanet-train coco \u002Fpath\u002Fto\u002FMS\u002FCOCO\n```\n\n#### 在 Open Images Dataset (OID) 上训练\n或参加 [OID 挑战赛](https:\u002F\u002Fstorage.googleapis.com\u002Fopenimages\u002Fweb\u002Fchallenge.html)，可以按以下方式训练：\n```shell\n# 直接从仓库运行：\nkeras_retinanet\u002Fbin\u002Ftrain.py oid \u002Fpath\u002Fto\u002FOID\n\n# 使用已安装的脚本：\nretinanet-train oid \u002Fpath\u002Fto\u002FOID\n\n# 如果只想在特定子集上训练，可以添加 `labels_filter` 参数指定标签列表：\nkeras_retinanet\u002Fbin\u002Ftrain.py oid \u002Fpath\u002Fto\u002FOID --labels-filter=Helmet,Tree\n\n# 如果想基于语义层次树的一个父节点及其所有子节点进行训练，可以添加 `parent-label` 参数：\nkeras_retinanet\u002Fbin\u002Ftrain.py oid \u002Fpath\u002Fto\u002FOID --parent-label=Boat\n```\n\n#### 在 KITTI 数据集上训练\n```shell\n# 直接从仓库运行：\nkeras_retinanet\u002Fbin\u002Ftrain.py kitti \u002Fpath\u002Fto\u002FKITTI\n\n# 使用已安装的脚本：\nretinanet-train kitti \u002Fpath\u002Fto\u002FKITTI\n\n# 如果需要准备数据集，可以使用以下脚本：\nhttps:\u002F\u002Fgithub.com\u002FNVIDIA\u002FDIGITS\u002Fblob\u002Fmaster\u002Fexamples\u002Fobject-detection\u002Fprepare_kitti_data.py\n```\n\n#### 在自定义数据集上训练\n可以使用 CSV 文件作为数据输入方式。  \n有关 CSV 文件格式的详细信息，请参见下文。  \n使用 CSV 文件训练的方法如下：\n```shell\n# 直接从仓库运行：\nkeras_retinanet\u002Fbin\u002Ftrain.py csv \u002Fpath\u002Fto\u002Fcsv\u002Ffile\u002Fcontaining\u002Fannotations \u002Fpath\u002Fto\u002Fcsv\u002Ffile\u002Fcontaining\u002Fclasses\n\n# 使用已安装的脚本：\nretinanet-train csv \u002Fpath\u002Fto\u002Fcsv\u002Ffile\u002Fcontaining\u002Fannotations \u002Fpath\u002Fto\u002Fcsv\u002Ffile\u002Fcontaining\u002Fclasses\n```\n\n一般来说，使用自己的数据集进行训练的步骤如下：\n1) 创建一个模型，例如通过调用 `keras_retinanet.models.backbone('resnet50').retinanet(num_classes=80)` 并对其进行编译。\n   根据经验，以下编译参数表现良好：\n```python\nmodel.compile(\n    loss={\n        'regression'    : keras_retinanet.losses.smooth_l1(),\n        'classification': keras_retinanet.losses.focal()\n    },\n    optimizer=keras.optimizers.Adam(lr=1e-5, clipnorm=0.001)\n)\n```\n2) 为训练和测试数据创建生成器（示例可见于 [`keras_retinanet.preprocessing.pascal_voc.PascalVocGenerator`](https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet\u002Fblob\u002Fmaster\u002Fkeras_retinanet\u002Fpreprocessing\u002Fpascal_voc.py)）。\n3) 使用 `model.fit_generator` 开始训练。\n\n## 预训练模型\n\n所有模型均可从 [releases 页面](https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet\u002Freleases) 下载。\n\n### MS COCO\n\n使用 `cocoapi` 得到的结果如下所示（注意：根据论文，此配置应达到 mAP 0.357）。\n\n```\n 平均精度 (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.350\n 平均精度 (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.537\n 平均精度 (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.374\n 平均精度 (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.191\n 平均精度 (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.383\n 平均精度 (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.472\n 平均召回率 (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.306\n 平均召回率 (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.491\n 平均召回率 (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.533\n 平均召回率 (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.345\n 平均召回率 (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.577\n 平均召回率 (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.681\n```\n\n### Open Images 数据集\n基于 ResNet50、ResNet101 和 ResNet152 的 3 个 RetinaNet 模型已在 Open Images 数据集的所有 [500 个类别](https:\u002F\u002Fgithub.com\u002FZFTurbo\u002FKeras-RetinaNet-for-Open-Images-Challenge-2018\u002Fblob\u002Fmaster\u002Fa00_utils_and_constants.py#L130) 上进行了训练（感谢 @ZFTurbo）。\n\n| 主干网络  | 图像尺寸 (px) | 小验证集 mAP | LB (公开) |\n| --------- | --------------- | -------------------- | ----------- |\n| ResNet50  | 768 - 1024      | 0.4594               | 0.4223      |\n| ResNet101 | 768 - 1024      | 0.4986               | 0.4520      |\n| ResNet152 | 600 - 800       | 0.4991               | 0.4651      |\n\n更多信息请查看 [@ZFTurbo 的](https:\u002F\u002Fgithub.com\u002FZFTurbo\u002FKeras-RetinaNet-for-Open-Images-Challenge-2018)仓库。\n\n## CSV 数据集\n`CSVGenerator` 提供了一种简便的方式来定义您自己的数据集。\n它使用两个 CSV 文件：一个包含标注信息，另一个包含类别名称到 ID 的映射。\n\n### 标注格式\n包含标注信息的 CSV 文件应每行一条标注。\n对于有多个人物框的图像，每个人物框应占一行。\n请注意，像素值的索引从 0 开始。\n每行的预期格式为：\n```\npath\u002Fto\u002Fimage.jpg,x1,y1,x2,y2,class_name\n```\n默认情况下，CSV 生成器会相对于标注文件所在的目录查找图像。\n\n有些图像可能不包含任何标记的对象。\n要将这些图像作为负样本添加到数据集中，\n可以添加一条标注，其中 `x1`、`y1`、`x2`、`y2` 和 `class_name` 均为空：\n```\npath\u002Fto\u002Fimage.jpg,,,,,\n```\n\n完整示例：\n```\n\u002Fdata\u002Fimgs\u002Fimg_001.jpg,837,346,981,456,cow\n\u002Fdata\u002Fimgs\u002Fimg_002.jpg,215,312,279,391,cat\n\u002Fdata\u002Fimgs\u002Fimg_002.jpg,22,5,89,84,bird\n\u002Fdata\u002Fimgs\u002Fimg_003.jpg,,,,,\n```\n\n这定义了一个包含 3 张图像的数据集。\n`img_001.jpg` 包含一头牛。\n`img_002.jpg` 包含一只猫和一只鸟。\n`img_003.jpg` 不包含任何感兴趣的物体或动物。\n\n\n### 类别映射格式\n类别名称到 ID 的映射文件应每行一条映射。\n每行应采用以下格式：\n```\nclass_name,id\n```\n\n类别的索引从 0 开始。\n不要包含背景类，因为它默认存在。\n\n例如：\n```\ncow,0\ncat,1\nbird,2\n```\n\n## 锚点优化\n\n在某些情况下，默认的锚点配置不适合检测您数据集中的目标，例如当您的目标小于 32×32 像素（最小锚点的尺寸）时。在这种情况下，修改锚点配置可能是合适的，可以通过遵循 [anchor-optimization](https:\u002F\u002Fgithub.com\u002Fmartinzlocha\u002Fanchor-optimization\u002F) 仓库中的步骤自动完成。要使用生成的配置，请参阅 [此处](https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet-test-data\u002Fblob\u002Fmaster\u002Fconfig\u002Fconfig.ini) 的示例配置文件，然后使用 `--config` 参数将其传递给 `train.py`。\n\n## 调试\n创建您自己的数据集并不总是能立即成功。有一个名为 [`debug.py`](https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet\u002Fblob\u002Fmaster\u002Fkeras_retinanet\u002Fbin\u002Fdebug.py) 的工具可以帮助您找到最常见的错误。\n特别是 `--annotations` 标志非常有用，它可以将您的标注显示在数据集中的图像上。当有可用的锚点时，标注会以绿色显示；如果没有可用的锚点，则以红色显示。如果某个标注没有可用的锚点，那么它将不会对训练产生贡献。少量标注显示为红色是正常的，但如果大多数或全部标注都显示为红色，则需要引起重视。最常见的问题是标注太小或形状过于奇怪（拉长变形）。\n\n## 结果\n\n### MS COCO\n\n## 状态\n下面展示了使用 `keras-retinanet` 的示例输出图像。\n\n\u003Cp align=\"center\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffizyr_keras-retinanet_readme_a533b1db172b.png\" alt=\"RetinaNet 在 MS COCO 上的示例结果\"\u002F>\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffizyr_keras-retinanet_readme_4a295a1841ec.png\" alt=\"RetinaNet 在 MS COCO 上的示例结果\"\u002F>\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffizyr_keras-retinanet_readme_4abc69c608a9.png\" alt=\"RetinaNet 在 MS COCO 上的示例结果\"\u002F>\n\u003C\u002Fp>\n\n### 使用 keras-retinanet 的项目\n* [使用 RetinaNet 改进苹果检测与计数](https:\u002F\u002Fgithub.com\u002Fnikostsagk\u002FApple-detection)。该项目旨在通过部署 Keras RetinaNet 来研究苹果检测问题。\n* [利用弱 RECIST 标注生成的密集掩码改进 CT 病灶检测的 RetinaNet](https:\u002F\u002Farxiv.org\u002Fabs\u002F1906.02283)。一项使用 keras-retinanet 进行 CT 图像病灶检测的研究项目。\n* [NudeNet](https:\u002F\u002Fgithub.com\u002Fbedapudi6788\u002FNudeNet)。一个专注于检测和屏蔽裸露内容的项目。\n* [基于自监督深度学习神经网络的 RGB 影像个体树冠检测](https:\u002F\u002Fwww.biorxiv.org\u002Fcontent\u002F10.1101\u002F532952v1)。一项专注于提升遥感树木调查性能的研究项目。\n* [ESRI 对象检测挑战赛 2019](https:\u002F\u002Fgithub.com\u002Fkunwar31\u002FESRI_Object_Detection)。ESRI 对象检测挑战赛 2019 的获奖实现。\n* [月球落石检测项目](https:\u002F\u002Fieeexplore.ieee.org\u002Fdocument\u002F8587120)。该项目旨在利用现有的超过 200 万张卫星图像，[在全球范围内绘制月球落石分布图](https:\u002F\u002Fwww.nature.com\u002Farticles\u002Fs41467-020-16653-3)。\n* [火星落石检测项目](https:\u002F\u002Fieeexplore.ieee.org\u002Fdocument\u002F9103997)。该项目的目标是绘制火星上的落石分布图。\n* [北约创新挑战赛](https:\u002F\u002Fmedium.com\u002Fdata-from-the-trenches\u002Fobject-detection-with-deep-learning-on-aerial-imagery-2465078db8a9)。北约创新挑战赛的获胜团队使用 keras-retinanet 在航拍图像中检测车辆（[COWC 数据集](https:\u002F\u002Fgdo152.llnl.gov\u002Fcowc\u002F)）。\n* [微软研究院关于 Azure 上 Horovod 的研究](https:\u002F\u002Fblogs.technet.microsoft.com\u002Fmachinelearning\u002F2018\u002F06\u002F20\u002Fhow-to-do-distributed-deep-learning-for-object-detection-using-horovod-on-azure\u002F)。微软的一项研究项目，利用 keras-retinanet 结合 Azure 上的 Horovod 将训练分布式到多块 GPU 上。\n* [Anno-Mage](https:\u002F\u002Fvirajmavani.github.io\u002Fsaiat\u002F)。一款借助 keras-retinanet COCO 模型的建议来帮助标注图像的工具。\n* [Telenav.AI](https:\u002F\u002Fgithub.com\u002FTelenav\u002FTelenav.AI\u002Ftree\u002Fmaster\u002Fretinanet)。用于使用 keras-retinanet 检测交通标志。\n* [迈向深度胎盘组织学表型分析](https:\u002F\u002Fgithub.com\u002FNellaker-group\u002FTowardsDeepPhenotyping)。该研究项目使用 keras-retinanet 在细胞水平上分析胎盘。\n* [4K 视频示例](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=KYueHEMGRos)。该演示展示了在 4K 输入视频上使用 keras-retinanet 的效果。\n* [boring-detector](https:\u002F\u002Fgithub.com\u002Flexfridman\u002Fboring-detector)。我想并非所有项目都需要解决人生的重大问题。该项目用于检测视频中的“The Boring Company”帽子。\n* [comet.ml](https:\u002F\u002Ftowardsdatascience.com\u002Fhow-i-monitor-and-track-my-machine-learning-experiments-from-anywhere-described-in-13-tweets-ec3d0870af99)。将 keras-retinanet 与 comet.ml 结合使用，以交互方式检查和比较实验。\n* [Weights and Biases](https:\u002F\u002Fapp.wandb.ai\u002Fsyllogismos\u002Fkeras-retinanet\u002Freports?view=carey%2FObject%20Detection%20with%20RetinaNet)。从头开始在 resnet50 和 resnet101 后端上训练了 keras-retinanet，并在 coco 数据集上进行微调。\n* [Google Open Images 挑战赛 2018 第 15 名解决方案](https:\u002F\u002Fgithub.com\u002FZFTurbo\u002FKeras-RetinaNet-for-Open-Images-Challenge-2018)。基于 ResNet50、ResNet101 和 ResNet152 预训练的 keras-retinanet 权重，这些权重是在 Open Images 数据集上训练得到的。\n* [poke.AI](https:\u002F\u002Fgithub.com\u002FRaghav-B\u002Fpoke.AI)。一款尝试掌握第三代宝可梦游戏的实验性 AI。使用 keras-retinanet 进行游戏内地图构建和定位。\n* [retinanetjs](https:\u002F\u002Fgithub.com\u002Ffaustomorales\u002Fretinanetjs)。一个用于在浏览器或 Node.js 中运行 RetinaNet 推理的封装库。你也可以查看其[示例应用](https:\u002F\u002Ffaustomorales.github.io\u002Fretinanetjs-example-app\u002F)。\n* [CRFNet](https:\u002F\u002Fgithub.com\u002FTUMFTM\u002FCameraRadarFusionNet)。该网络融合雷达和摄像头数据，用于自动驾驶场景下的目标检测。\n* [LogoDet](https:\u002F\u002Fgithub.com\u002FnotAI-tech\u002FLogoDet)。一个用于检测图像中公司 logo 的项目。\n* [AIR: 航空巡检 RetinaNet](https:\u002F\u002Fgithub.com\u002FAccenture\u002FAIR)。一种利用无人机支持陆地搜救任务的深度学习解决方案。\n\n\n如果你有一个基于 `keras-retinanet` 的项目，并希望在此处发布，请在 Slack 上给我发消息。\n\n### 注意事项\n* 本仓库需要 Tensorflow 2.3.0 或更高版本。\n* 本仓库已使用 OpenCV 3.4 进行[测试](https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet\u002Fblob\u002Fmaster\u002F.travis.yml)。\n* 本仓库已使用 Python 2.7 和 3.6 进行[测试](https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet\u002Fblob\u002Fmaster\u002F.travis.yml)。\n\n欢迎为该项目贡献代码。\n\n### 讨论\n欢迎加入 Keras Slack 的 `#keras-retinanet` 频道，参与讨论和提问。\n\n## 常见问题解答\n* **我收到警告 `UserWarning: 在保存文件中未找到训练配置：模型未编译。请手动编译。`，需要担心吗？** 在推理阶段，可以安全地忽略此警告。\n* **我在推理时遇到错误 `ValueError: 解包值不足（期望 3 个，得到 2 个）`，该怎么办？** 这是因为您正在使用训练好的模型进行推理。更多信息请参阅：https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet#converting-a-training-model-to-inference-model。\n* **如何进行迁移学习？** 最简单的方法是在训练时使用 `--weights` 参数。Keras 可以加载模型，即使类别数不匹配（它会自动跳过不匹配部分的权重加载）。例如，运行 `retinanet-train --weights snapshots\u002Fsome_coco_model.h5 pascal \u002Fpath\u002Fto\u002Fpascal`，即可将 COCO 模型的权重迁移到 Pascal VOC 数据集的训练中。如果您的数据集较小，还可以使用 `--freeze-backbone` 参数冻结骨干网络层。\n* **如何更改锚框的数量或形状？** 训练工具允许传入一个配置文件，在其中可以调整锚框参数。示例配置文件请参见：[这里](https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet-test-data\u002Fblob\u002Fmaster\u002Fconfig\u002Fconfig.ini)。\n* **损失值为 `0`，这是怎么回事？** 这通常是因为没有锚框能够与您的目标对象匹配，很可能是因为锚框太小或过于细长。您可以使用 [调试工具](https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet#debugging) 来验证这一点。\n* **我有一个旧模型，在更新 keras-retinanet 后还能使用吗？** 这取决于具体发生了哪些变化。如果这些变化不影响权重，则可以通过创建一个新的 RetinaNet 模型，使用 `model.load_weights(weights_path, by_name=True)` 加载旧模型的权重，然后保存新模型来“更新”模型。如果变化较大，则建议重新训练模型（您可以在开始训练时尝试加载旧模型的权重，这可能比从 ImageNet 初始化有更好的起点）。\n* **我收到错误 `ModuleNotFoundError: 没有名为 'keras_retinanet.utils.compute_overlap' 的模块`，该如何解决？** 很可能是您直接从克隆的仓库中运行代码。这样做是可以的，但需要编译一些扩展才能正常工作（执行 `python setup.py build_ext --inplace`）。\n* **如何在我的数据集上训练模型？** 在自己的数据集上训练的步骤大致如下：\n* 1. 将数据集准备成 CSV 格式（建议划分训练集和验证集）。\n* 2. 使用 `retinanet-debug` 检查数据集是否正确。\n* 3. 训练 RetinaNet，最好使用预训练的 COCO 权重（这样可以获得**更好**的起点，使训练更快且更准确）。您也可以在训练过程中对验证集进行评估，以跟踪模型的表现（建议）。\n* 4. 将训练好的模型转换为推理模型。\n* 5. 在测试集或验证集上评估您的推理模型。\n* 6. 大功告成！","# Keras RetinaNet 快速上手指南\n\n> **⚠️ 重要提示**：本项目已停止维护（Deprecated），官方推荐使用 [torchvision](https:\u002F\u002Fgithub.com\u002Fpytorch\u002Fvision\u002F) 模块。本项目仅兼容 Keras 2.4 和 TensorFlow 2.3.0，更高版本可能导致运行失败。如需在生产环境使用，请谨慎评估或迁移至 PyTorch 版本。\n\n## 环境准备\n\n*   **操作系统**: Linux \u002F macOS (Windows 支持有限，需自行配置编译环境)\n*   **Python**: 3.6 - 3.8 (推荐)\n*   **核心依赖**:\n    *   TensorFlow == 2.3.0\n    *   Keras == 2.4.3\n    *   Cython\n    *   opencv-python\n    *   pillow\n    *   matplotlib\n*   **可选依赖**: `pycocotools` (如需使用 MS COCO 数据集)\n\n**国内加速建议**：\n在安装 Python 依赖时，建议使用清华或阿里镜像源以提升下载速度：\n```bash\npip install -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple \u003Cpackage_name>\n```\n\n## 安装步骤\n\n### 1. 克隆仓库\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Ffizyr\u002Fkeras-retinanet.git\ncd keras-retinanet\n```\n\n### 2. 安装 TensorFlow (前置条件)\n由于包内未强制指定 TensorFlow 版本以避免冲突，请先手动安装兼容版本：\n```bash\npip install -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple tensorflow==2.3.0 keras==2.4.3\n```\n\n### 3. 安装 keras-retinanet\n在项目根目录下执行以下命令进行安装（包含 Cython 扩展编译）：\n```bash\npip install . --user\n```\n\n*或者，如果您希望直接运行源码而不进行全局安装，需先编译扩展：*\n```bash\npython setup.py build_ext --inplace\n```\n\n### 4. (可选) 安装 COCO 工具\n如果需要训练或测试 MS COCO 数据集：\n```bash\npip install --user git+https:\u002F\u002Fgithub.com\u002Fcocodataset\u002Fcocoapi.git#subdirectory=PythonAPI\n```\n*注：国内网络若拉取 GitHub 失败，可尝试下载该仓库 zip 包解压后本地安装。*\n\n## 基本使用\n\n### 1. 模型推理 (Inference)\n训练好的模型不能直接用于推理，必须先转换为推理模型。\n\n**转换模型：**\n```bash\n# 语法：retinanet-convert-model \u003C训练模型路径> \u003C保存路径>\nretinanet-convert-model \u002Fpath\u002Fto\u002Ftraining\u002Fmodel.h5 \u002Fpath\u002Fto\u002Finference_model.h5\n```\n\n**执行预测：**\n您可以使用 Python 脚本加载模型并进行检测：\n\n```python\nfrom keras_retinanet.models import load_model\nimport cv2\n\n# 加载推理模型\nmodel = load_model('\u002Fpath\u002Fto\u002Finference_model.h5', backbone_name='resnet50')\n\n# 读取并预处理图片\nimage = cv2.imread('test.jpg')\nimage = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\ninputs = model.preprocess_image(image)\ninputs, scale = model.resize_image(inputs)\n\n# 执行预测\nboxes, scores, labels = model.predict_on_batch(inputs)\n\n# 结果处理 (boxes 格式为 x1, y1, x2, y2)\n# 需根据 scale 还原坐标，并根据 scores 过滤低置信度框\n```\n\n### 2. 快速训练 (以自定义 CSV 数据集为例)\nKeras RetinaNet 支持通过 CSV 文件定义自定义数据集。\n\n**数据格式准备：**\n1.  **annotations.csv**: 每行格式为 `图像路径,x1,y1,x2,y2,类别名`\n2.  **classes.csv**: 每行格式为 `类别名,ID` (ID 从 0 开始)\n\n**启动训练：**\n```bash\n# 语法：retinanet-train csv \u003C标注文件> \u003C类别映射文件>\nretinanet-train csv \u002Fpath\u002Fto\u002Fannotations.csv \u002Fpath\u002Fto\u002Fclasses.csv --backbone=resnet50\n```\n\n**常用参数：**\n*   `--backbone`: 选择骨干网络 (resnet50, resnet101, mobilenet128_1.0 等)\n*   `--epochs`: 训练轮数\n*   `--snapshot-path`: 模型保存路径\n\n训练完成后，请使用上述“转换模型”步骤将生成的快照转换为推理模型即可使用。","某电商物流团队需要开发一套自动系统，从传送带监控视频中实时识别并统计不同尺寸的包裹，以优化分拣效率。\n\n### 没有 keras-retinanet 时\n- **小目标漏检严重**：传统检测算法难以捕捉远距离或堆叠在一起的小型包裹，导致计数准确率不足 70%。\n- **开发周期漫长**：团队需从零复现复杂的 Focal Loss 损失函数和锚框机制，耗费数周时间调试模型架构。\n- **推理速度不达标**：自行搭建的模型在普通 GPU 上处理单帧图像耗时超过 200 毫秒，无法满足流水线实时性要求。\n- **训练与部署脱节**：训练好的模型包含多余层级，缺乏标准转换流程，直接部署会导致显存占用过高且报错频繁。\n\n### 使用 keras-retinanet 后\n- **密集场景精准识别**：借助其核心的 Focal Loss 机制，有效解决了正负样本不平衡问题，小包裹漏检率大幅降低，准确率提升至 95% 以上。\n- **开箱即用效率高**：直接调用预训练的 ResNet50 骨干网络，仅需少量标注数据微调，两天内即可完成模型定制与验证。\n- **高性能实时推理**：在 NVIDIA Pascal Titan X 显卡上，处理 1000x800 分辨率图像仅需约 75 毫秒，完美匹配高速传送带节奏。\n- **平滑部署流程**：利用内置的 `convert_model` 脚本，一键将训练模型转换为精简的推理模型，无缝集成到现有生产环境中。\n\nkeras-retinanet 通过成熟的 Keras 实现和高效的工具链，让团队以最低成本解决了密集小目标检测难题，显著提升了物流分拣的自动化水平。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffizyr_keras-retinanet_21a17c9b.png","fizyr","Fizyr","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Ffizyr_a1bf1a66.png","Artificial intelligence for vision guided robotics. We're hiring! https:\u002F\u002Ffizyr.com\u002Fcareers\u002F",null,"info@fizyr.com","https:\u002F\u002Ffizyr.com","https:\u002F\u002Fgithub.com\u002Ffizyr",[85,89],{"name":86,"color":87,"percentage":88},"Python","#3572A5",99.5,{"name":90,"color":91,"percentage":92},"Cython","#fedf5b",0.5,4393,1930,"2026-03-30T04:01:25","Apache-2.0",4,"Linux, macOS, Windows","需要 GPU 以获得最佳性能（文中提及 NVIDIA Pascal Titan X），具体显存和 CUDA 版本未说明，需自行安装匹配的 TensorFlow","未说明",{"notes":102,"python":103,"dependencies":104},"该项目已弃用，官方推荐使用 torchvision 模块。代码仅确认支持 Keras 2.4 和 TensorFlow 2.3.0，更新版本可能导致不支持。安装时不会自动依赖 TensorFlow，需用户根据系统要求手动安装。训练模型需转换为推理模型后方可用于检测。支持 ResNet, MobileNet, DenseNet, VGG 等多种骨干网络。","未说明 (需兼容 Keras 2.4 和 TensorFlow 2.3.0)",[105,106,107],"keras==2.4","tensorflow==2.3.0","pycocotools (可选，用于 MS COCO 数据集)",[14,13],"2026-03-27T02:49:30.150509","2026-04-06T07:22:58.154320",[],[113,118,123,128,133,138],{"id":114,"version":115,"summary_zh":116,"released_at":117},98573,"0.5.1","自上一版本以来的变更：\n\n- 修复 VGG ImageNet 数据集下载问题。\n- 将 numpy 添加为依赖项。\n- 将生成器转换为 Keras 的 `Sequence` 类。\n- 支持 Float16 精度。\n- 公开学习率参数。\n- 增加验证损失作为可选步骤。","2019-06-20T09:44:28",{"id":119,"version":120,"summary_zh":121,"released_at":122},98574,"0.5.0","# 自上一版本以来的更改\n\n- 评估过程中使用进度条\n- 修正分类子模型的权重初始化\n- 修复类别存在缺失时评估出错的问题\n- 添加配置选项（目前仅支持锚框设置）\n- 重构生成器中注释的生成方式\n- 使用 CPU 进行模型转换\n- 升级至 Keras 2.2.4\n- 增加 NCHW 格式支持\n\n特别感谢：\n@adreo00  \n@borakrc  \n@yecharlie  \n@ddowling  \n@enricoliscio  \n@hgaiser  \n@baek-jinoo  \n@de-vri-es  \n@penguinmenac3  \nMorten Back Nielsen  \n@relh  \n@vcarpani","2018-10-17T11:28:11",{"id":124,"version":125,"summary_zh":126,"released_at":127},98575,"0.4.1","# 自上一版本以来的变更\n\n- 生成器优化\n- 文档改进。\n- 支持 OID Challenge 2018。\n- Keras 版本升级至 2.2.0。\n- 添加按类别筛选的选项（NMS）。\n- 引入 flake8 进行代码测试。\n- 合并 COCO 和非 COCO 评估脚本。\n- 修正 MobileNet 和 DenseNet 的图像预处理。\n\n感谢以下贡献者：\n@apacha \n@hgaiser\n@de-vri-es \n@lvaleriu \n@cclauss \n@HolyGuacamole \n@leonardvandriel \n@PhilippMarquardt \n@vcarpani  ","2018-07-18T13:33:43",{"id":129,"version":130,"summary_zh":131,"released_at":132},98576,"0.3.1","# 自上一版本以来的变更\r\n\r\n- 实现了 DenseNet 和 VGG 主干网络。\r\n- 增加了冻结主干层的选项。\r\n- 添加了将评估日志记录到 TensorBoard 的功能。\r\n- 为 80 个类别添加了更美观的颜色方案。\r\n- 修复了 batch_size > 1 时的问题。\r\n- 重构了模型输出（希望今后保持这种结构）。\r\n- 通过将模型拆分为“训练模型”和“推理模型”，简化了训练流程。\r\n- 增加了针对不同主干网络的专用函数结构（例如 `load_model`）。\r\n- 将回归任务编码为 x1\u002Fy1\u002Fx2\u002Fy2 偏移量（使 mAP 提升至 0.350，此前为 0.345）。\r\n- 使用 `nearest` 插值方式进行上采样。\r\n\r\n特别感谢：\r\n@vidosits  \r\n@cgratie  \r\n@DiegoAgher  \r\n@eduramiba  \r\n@GuillaumeErhard  \r\n@Muhannes  \r\n@hgaiser  \r\n@iver56  \r\n@jjiunlin  \r\n@srslynow  \r\n@de-vri-es  \r\n@Ori226  \r\n@pedroconceicao  \r\n@pderian  \r\n@rodrigo2019  \r\n@lvaleriu  \r\n@yhenon","2018-05-12T16:55:37",{"id":134,"version":135,"summary_zh":136,"released_at":137},98577,"0.2","# 自上一版本以来的更改\r\n\r\n- 按照论文修正了 FPN 架构。\r\n- 将默认图像尺寸设置为至少 800 像素。\r\n- 将 NMS 改为按类别分别执行 NMS。\r\n- 对边界框变换进行了小幅修正。\r\n- 添加了 OID 数据生成器。\r\n- 将默认 NMS 阈值改为 0.5。\r\n- 添加了 MobileNet 主干网络。\r\n- 添加了 TensorBoard 回调函数。\r\n- 添加了用于调试数据集的工具。\r\n- 提升了数据增强方法的速度。\r\n- 增加了恢复训练的功能。\r\n- 为自定义数据集添加了评估工具（目前仅计算 mAP）。\r\n- 在权重加载中增加了 `skip_mismatch` 参数，支持从预训练的 COCO 模型进行迁移学习。\r\n\r\n特别感谢：\r\n@awilliamson\r\n@hgaiser\r\n@de-vri-es\r\n@mxvs\r\n@wassname\r\n@mkocabas\r\n@lvaleriu","2018-03-03T09:53:05",{"id":139,"version":140,"summary_zh":80,"released_at":141},98578,"0.1","2018-01-15T14:32:13"]