[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-jsbroks--awesome-dataset-tools":3,"tool-jsbroks--awesome-dataset-tools":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",157379,2,"2026-04-15T23:32:42",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",108322,"2026-04-10T11:39:34",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[52,13,15,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":32,"last_commit_at":59,"category_tags":60,"status":17},4721,"markitdown","microsoft\u002Fmarkitdown","MarkItDown 是一款由微软 AutoGen 团队打造的轻量级 Python 工具，专为将各类文件高效转换为 Markdown 格式而设计。它支持 PDF、Word、Excel、PPT、图片（含 OCR）、音频（含语音转录）、HTML 乃至 YouTube 链接等多种格式的解析，能够精准提取文档中的标题、列表、表格和链接等关键结构信息。\n\n在人工智能应用日益普及的今天，大语言模型（LLM）虽擅长处理文本，却难以直接读取复杂的二进制办公文档。MarkItDown 恰好解决了这一痛点，它将非结构化或半结构化的文件转化为模型“原生理解”且 Token 效率极高的 Markdown 格式，成为连接本地文件与 AI 分析 pipeline 的理想桥梁。此外，它还提供了 MCP（模型上下文协议）服务器，可无缝集成到 Claude Desktop 等 LLM 应用中。\n\n这款工具特别适合开发者、数据科学家及 AI 研究人员使用，尤其是那些需要构建文档检索增强生成（RAG）系统、进行批量文本分析或希望让 AI 助手直接“阅读”本地文件的用户。虽然生成的内容也具备一定可读性，但其核心优势在于为机器",93400,"2026-04-06T19:52:38",[52,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":72,"owner_name":73,"owner_avatar_url":74,"owner_bio":75,"owner_company":76,"owner_location":77,"owner_email":78,"owner_twitter":72,"owner_website":79,"owner_url":80,"languages":75,"stars":81,"forks":82,"last_commit_at":83,"license":84,"difficulty_score":85,"env_os":86,"env_gpu":86,"env_ram":86,"env_deps":87,"category_tags":90,"github_topics":91,"view_count":32,"oss_zip_url":75,"oss_zip_packed_at":75,"status":17,"created_at":98,"updated_at":99,"faqs":100,"releases":101},8071,"jsbroks\u002Fawesome-dataset-tools","awesome-dataset-tools","🔧 A curated list of awesome dataset tools","awesome-dataset-tools 并非单一软件，而是一份精心整理的开源数据集工具清单，旨在为人工智能开发者提供一站式的资源导航。在机器学习项目中，高质量的数据标注往往是耗时最长、最棘手的环节，这份清单直击痛点，系统性地汇集了涵盖图像、音频、时间序列及文本等多模态数据的标注工具与处理库。\n\n无论是需要为计算机视觉项目标注边界框的研究人员，还是致力于构建端到端检测模型的工程师，都能在此找到契合需求的解决方案。清单中不仅收录了 CVAT、LabelImg、VoTT 等支持视频交互、3D 点云标注及跨平台协作的成熟工具，还包含了针对特定格式（如 YOLO、COCO）的高效标注器。其独特价值在于打破了信息孤岛，将分散在全球的优质开源项目按功能场景分类呈现，帮助用户快速对比并选择最适合自身技术栈的工具。通过利用这些经过社区验证的资源，团队能显著降低数据准备门槛，提升模型训练效率，让开发者能将更多精力聚焦于算法创新而非重复造轮子。","# Awesome Dataset Tools\n\n> A curated list of awesome dataset tools\n\n- [Labeling Tools](#labeling-tools)\n  - [Images](#images)\n  - [Audio](#audio)\n  - [Time Series](#time-series)\n  - [Text](#text)\n- [Libraries](#libraries)\n\n## Labeling Tools\n\n### Images\n\n- [LabeFlow](https:\u002F\u002Fgithub.com\u002Flabelflow\u002Flabelflow) - Open image annotation tool for machine learning projects\n- [CVAT](https:\u002F\u002Fgithub.com\u002Fopencv\u002Fcvat) - Online, interactive video and image annotation tool for computer vision\n- [COCO Annotator](https:\u002F\u002Fgithub.com\u002Fjsbroks\u002Fcoco-annotator) - Web-based image segmentation tool for object detection, localization and keypoints\n- [VoTT](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FVoTT) - Visual Object Tagging Tool: An electron app for building end to end object detection models from images and videos.\n- [Scalabel](https:\u002F\u002Fgithub.com\u002Fscalabel\u002Fscalabel) - Versatile and scalable tool that supports various kinds of annotations\n- [EVA](https:\u002F\u002Fgithub.com\u002FEricsson\u002Feva) - EVA is a web-based tool for efficient annotation of videos and image sequences and has an additional tracking capabilities\n- [LOST](https:\u002F\u002Fgithub.com\u002Fl3p-cv\u002Flost) - Design your own smart Image Annotation process in a web-based environment\n- [Boobs](https:\u002F\u002Fgithub.com\u002Fdrainingsun\u002Fboobs) - Fast and efficient BBox annotation for your images in YOLO, VOC\u002FCOCO formats\n- [MuViLab](https:\u002F\u002Fgithub.com\u002Fale152\u002Fmuvilab) - Tool to help you labelling videos for computer vision\n- [Turkey](https:\u002F\u002Fgithub.com\u002Fyanfengliu\u002Fturkey) - Web UI on Amazon Mechanical Turk to crowd-source image segmentation\n- [React Image Annotation](https:\u002F\u002Fgithub.com\u002FSecretmapper\u002Freact-image-annotation) - An infinitely customizable image tool built on React\n- [Point Cloud Annotation Tool](https:\u002F\u002Fgithub.com\u002Fspringzfx\u002Fpoint-cloud-annotation-tool) - Annotate 3D boxes in point cloud\n- [ImageTagger](https:\u002F\u002Fgithub.com\u002Fbit-bots\u002Fimagetagger) - Open source online platform for collaborative image labeling\n- [DeepLabel](https:\u002F\u002Fgithub.com\u002Fjveitchmichaelis\u002Fdeeplabel) - A cross-platform image annotation tool for machine learning\n- [Visual Object Tagging Tool](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FVoTT) - An electron app for building end to end Object Detection Models\n- [VGG Image Annotator](https:\u002F\u002Fgitlab.com\u002Fvgg\u002Fvia) - Standalone image annotator application packaged as a single HTML file\n- [SMART](https:\u002F\u002Fgithub.com\u002FRTIInternational\u002FSMART) - Efficiently build labeled training datasets for supervised machine learning tasks\n- [Pixel Annotation Tool](https:\u002F\u002Fgithub.com\u002Fabreheret\u002FPixelAnnotationTool) - Uses the algorithm watershed marked of OpenCV to annotate images in directories\n- [Pixie](https:\u002F\u002Fgithub.com\u002Fbuni-rock\u002FPixie) - GUI annotation tool which provides the bounding box, polygon, and semantic segmentation\n- [Turktool](https:\u002F\u002Fgithub.com\u002Fjaxony\u002Fturktool) - Modern React app for scalable bounding box annotation of images\n- [LabelD](https:\u002F\u002Fgithub.com\u002Fsweppner\u002Flabeld) - Simple image annotation tool to streamlining the overall process\n- [Comma Coloring](https:\u002F\u002Fgithub.com\u002Fcommaai\u002Fcommacoloring) - Adult coloring book for image segmentation\n- [LabelImg](https:\u002F\u002Fgithub.com\u002Ftzutalin\u002FlabelImg) - Graphical image annotation tool and label object bounding boxes in images\n- [LCs Finder](https:\u002F\u002Fgithub.com\u002Flc-soft\u002FLC-Finder) - Image annotation and object detection tool written in C\n- [js-segment-annotator](https:\u002F\u002Fgithub.com\u002Fkyamagu\u002Fjs-segment-annotator) - Javascript image annotation tool based on image segmentation\n- [Cytomine](https:\u002F\u002Fgithub.com\u002Fcytomine) - Analysis of multi-gigapixel images\n- [labelme](https:\u002F\u002Fgithub.com\u002Fwkentaro\u002Flabelme) - Image Polygonal Annotation with Python (polygon, rectangle, circle, line, point and image-level flag annotation)\n- [SimpleAnnotate](https:\u002F\u002Fgithub.com\u002Fjoelb92\u002FSimpleAnnotate) - Open source video and image annotation software for, currently only for OSX\n- [Sloth](https:\u002F\u002Fgithub.com\u002FcvhciKIT\u002Fsloth) - Labeling image and video data for computer vision research\n- [Fast Annotation Tool](https:\u002F\u002Fgithub.com\u002Fchristopher5106\u002FFastAnnotationTool) - Online platform for collaborative image annotation\n- [Anno-Mage](https:\u002F\u002Fgithub.com\u002Fvirajmavani\u002Fsemi-auto-image-annotation-tool) - Helps you in annotating images by suggesting you annotations for 80 object classes\n- [MedTagger](https:\u002F\u002Fgithub.com\u002Fmedtagger\u002FMedTagger) - Collaborative framework for annotating medical datasets using crowdsourcing\n- [OpenLabeling](https:\u002F\u002Fgithub.com\u002FCartucho\u002FOpenLabeling) - Labeling in multiple annotation formats\n- [Alturos.ImageAnnotation](https:\u002F\u002Fgithub.com\u002FAlturosDestinations\u002FAlturos.ImageAnnotation) - Collaborative tool for labeling image data for yolo\n- [Yolo_mark](https:\u002F\u002Fgithub.com\u002FAlexeyAB\u002FYolo_mark) - GUI for marking bounded boxes of objects in images\n- [imglab](https:\u002F\u002Fgithub.com\u002FNaturalIntelligence\u002Fimglab) - peedup and simplify image labeling\u002F annotation process with multiple supported formats\n- [OpenLabeler](https:\u002F\u002Fgithub.com\u002Fkinhong\u002FOpenLabeler) - Open source desktop application for annotating objects\n- [UltimateLabeling](https:\u002F\u002Fgithub.com\u002Falexandre01\u002FUltimateLabeling) - A multi-purpose Video Labeling GUI with integrated SOTA detector and tracker\n- [DataGym.ai](https:\u002F\u002Fgithub.com\u002Fdatagym-ai\u002Fdatagym-core) - Open source annotation and labeling tool for image and video assets\n\n#### Closed Source\n\n- [DataTorch](https:\u002F\u002Fdatatorch.io\u002F) - Platform for creating and shareing datasets.\n- [Labelbox](https:\u002F\u002Flabelbox.com\u002F) - Platform for data labeling, data management, and data science. Its features include image annotation, bounding boxes, text classification, and more\n- [Supervise.ly](https:\u002F\u002Fsupervise.ly\u002F) - Image annotation and data management tool that you can use create image and video datasets\n- [Prodigy](https:\u002F\u002Fprodi.gy\u002F) - Various machine learning models such as image classification, entity recognition and intent detection\n- [RectLabel](https:\u002F\u002Fgithub.com\u002Fryouchinsa\u002FRectlabel-support) - Label images for bounding box object detection and segmentation\n- [Lionbridge AI](https:\u002F\u002Flionbridge.ai\u002Fservices\u002Fimage-annotation\u002F) - Quickly annotate thousands of images and videos with relevant tags\n- [TrainingData.io](https:\u002F\u002Fwww.trainingdata.io\u002F) - Medical image annotation tool for data labeling. Spports DICOM image format for radiology AI\n- [Spare5](https:\u002F\u002Fapp.spare5.com\u002Ffives) - Crowdsourcing service for tasks such as data and image annotation, language assessment, and more\n- [Hive](https:\u002F\u002Fthehive.ai\u002F) - Text and image annotation service that helps you create training datasets\n- [Figure Eight](https:\u002F\u002Fwww.figure-eight.com\u002F) - Supports audio , computer vision, natural language processing, and other data tasks\n- [Dataturks](https:\u002F\u002Fdataturks.com\u002F) - Image segmentation, named entity recognition (NER) tagging in documents, and POS tagging\n- [UBIAI](https:\u002F\u002Fubiai.tools\u002F) - Easy-to-use text annotation tool for teams with most comprehensive auto-annotation features. Supports NER, relations and document classification as well as OCR annotation for invoice labeling\n- [Playment](https:\u002F\u002Fplayment.io\u002F) - Services offered include bounding boxes, points and lines, polygons, semantic segmentation, and more\n- [Cogito Tech](https:\u002F\u002Fwww.cogitotech.com\u002F) - Image annotation, content moderation, sentiment analysis, chatbot training\n- [OCLAVI](https:\u002F\u002Foclavi.com\u002F) - Annotate Bounding Box, Polygon, Circle, Point and Cuboidal annotations with precision\n- [Humans in the Loop](https:\u002F\u002Fhumansintheloop.org\u002F) - Use cases include face recognition, autonomous vehicles, and figure detection\n- [WorkAround](https:\u002F\u002Fworkaround.online\u002F) - Host and annotate data, manage projects, and build datasets alongside top companies\n- [TaQadam](https:\u002F\u002Ftaqadam.io\u002F) - On-demand annotation with agents-in-the-loop\n- [Zillin](https:\u002F\u002Fzillin.io) - Image annotation service for classification, object detection and segmentation with API access and georeferenced images support.\n- [IBM Cloud Annotations](https:\u002F\u002Fcloud.annotations.ai\u002F) - Simple and collaborative image annotation tool for teams and individuals inside ibm cloud environment.\n- [TrainingSet.AI](https:\u002F\u002Ftrainingset.ai\u002F) - Platform to solve the data labelling step in the AI Development for images, video and point cloud data (automatic labeling, ground truth, annotation tools, web dataset creation, s3, teams and statistics tools)\n- [MedSeg](https:\u002F\u002Fmedseg.ai\u002F) - Free online medical annotation (segmentation) with AI models.\n- [MVTec Deep Learning Tool](https:\u002F\u002Fwww.mvtec.com\u002Fproducts\u002Fdeep-learning-tool\u002F) - Provides labeling functionalities for [HALCON](https:\u002F\u002Fwww.mvtec.com\u002Fproducts\u002Fhalcon\u002F)'s deep-learning-based object detection and classification.\n- [Amazon SageMaker Ground Truth](https:\u002F\u002Faws.amazon.com\u002Fsagemaker\u002Fgroundtruth\u002F) - annotate data using MTurk, vendor workforces, or your own private workteams. Use Ground Truth's built-in UIs (video, point cloud, image, text, document processing) or bring your own custom UI\n\n### Audio\n\n- [Audio Annotator](https:\u002F\u002Fgithub.com\u002FCrowdCurio\u002Faudio-annotator) - JavaScript interface for annotating and labeling audio files\n- [Dynitag](https:\u002F\u002Fgithub.com\u002Fdynilib\u002Fdynitag) - Web-based collaborative audio annotator tool\n- [EchoML](https:\u002F\u002Fgithub.com\u002Fritazh\u002FEchoML) - play, visualize, and annotate your audio files for machine learning\n\n#### Closed Source\n\n- [Figure Eight](https:\u002F\u002Fwww.figure-eight.com\u002F) - Supports audio , computer vision, natural language processing, and other data tasks\n\n### Time Series\n\n- [Curve](https:\u002F\u002Fgithub.com\u002Fbaidu\u002FCurve) - An integrated experimental platform for time series data anomaly detection\n- [TagAnomaly](https:\u002F\u002Fgithub.com\u002FMicrosoft\u002FTagAnomaly) - Anomaly detection analysis and labeling tool, specifically for multiple time series\n- [time-series-annotator](https:\u002F\u002Fgithub.com\u002FCrowdCurio\u002Ftime-series-annotator) - Implements classification tasks for time series.\n- [WDK](https:\u002F\u002Fgithub.com\u002Favenix\u002FWDK) - Tools to facilitate the development of activity recognition applications with wearable devices\n\n### Text\n\n- [brat](https:\u002F\u002Fgithub.com\u002Fnlplab\u002Fbrat) - For all your textual annotation needs\n- [doccano](https:\u002F\u002Fgithub.com\u002Fchakki-works\u002Fdoccano) - Open source text annotation tool for machine learning practitioner.\n- [Inception](https:\u002F\u002Fgithub.com\u002Finception-project\u002Finception) - A semantic annotation platform offering intelligent annotation assistance\n- [NeuroNER](https:\u002F\u002Fgithub.com\u002FFranck-Dernoncourt\u002FNeuroNER) - Named-entity recognition using neural networks\n- [YEDDA](https:\u002F\u002Fgithub.com\u002Fjiesutd\u002FYEDDA) - For annotating chunk\u002Fentity\u002Fevent on text, symbol and even emoji\n- [TALEN](https:\u002F\u002Fgithub.com\u002FCogComp\u002Ftalen) - Web-based tool for annotating word sequences\n- [WebAnno](https:\u002F\u002Fgithub.com\u002Fwebanno\u002Fwebanno) - Web-based annotation tool for a wide range of linguistic annotations\n- [MAE](https:\u002F\u002Fgithub.com\u002Fkeighrim\u002Fmae-annotation) - Lightweight, general-purpose natural language annotation tool\n- [Anafora](https:\u002F\u002Fgithub.com\u002Fweitechen\u002Fanafora) - Web-based raw text annotation tool\n- [TagEditor](https:\u002F\u002Fgithub.com\u002Fd5555\u002FTagEditor) - Label dependencies, parts of speech, Named entities, and text categories\n- [ML-Annotate](https:\u002F\u002Fgithub.com\u002Fplanbrothers\u002Fml-annotate) - Supports binary, multi-label and multi-class labeling of text\n\n#### Closed Source\n\n- [Hive](https:\u002F\u002Fthehive.ai\u002F) - Text and image annotation service that helps you create training datasets\n- [Figure Eight](https:\u002F\u002Fwww.figure-eight.com\u002F) - Supports audio , computer vision, natural language processing, and other data tasks\n- [LightTag](https:\u002F\u002Flighttag.io) Text Annotation Tool for Teams.\n\n## Libraries\n\n### Audio\n\n- [Muda](https:\u002F\u002Fgithub.com\u002Fbmcfee\u002Fmuda) - Python library for augmenting annotated audio data\n\n### Text\n\n- [DataProfiler](https:\u002F\u002Fgithub.com\u002Fcapitalone\u002FDataProfiler) - A Python library to facilitate data analysis, monitoring, and data identification\n","# 优秀的数据集工具\n\n> 精选的优秀数据集工具列表\n\n- [标注工具](#标注工具)\n  - [图像](#图像)\n  - [音频](#音频)\n  - [时间序列](#时间序列)\n  - [文本](#文本)\n- [库](#库)\n\n## 标注工具\n\n### 图像\n\n- [LabeFlow](https:\u002F\u002Fgithub.com\u002Flabelflow\u002Flabelflow) - 用于机器学习项目的开源图像标注工具\n- [CVAT](https:\u002F\u002Fgithub.com\u002Fopencv\u002Fcvat) - 面向计算机视觉的在线交互式视频和图像标注工具\n- [COCO Annotator](https:\u002F\u002Fgithub.com\u002Fjsbroks\u002Fcoco-annotator) - 基于Web的图像分割工具，适用于目标检测、定位和关键点标注\n- [VoTT](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FVoTT) - 视觉对象标注工具：一款基于Electron的应用程序，用于从图像和视频中构建端到端的目标检测模型。\n- [Scalabel](https:\u002F\u002Fgithub.com\u002Fscalabel\u002Fscalabel) - 功能多样且可扩展的工具，支持多种类型的标注\n- [EVA](https:\u002F\u002Fgithub.com\u002FEricsson\u002Feva) - EVA 是一个基于 Web 的工具，用于高效地标注视频和图像序列，并具备额外的跟踪功能\n- [LOST](https:\u002F\u002Fgithub.com\u002Fl3p-cv\u002Flost) - 在基于 Web 的环境中设计您自己的智能图像标注流程\n- [Boobs](https:\u002F\u002Fgithub.com\u002Fdrainingsun\u002Fboobs) - 快速高效的 BBox 标注工具，支持 YOLO、VOC\u002FCOCO 格式\n- [MuViLab](https:\u002F\u002Fgithub.com\u002Fale152\u002Fmuvilab) - 帮助您为计算机视觉任务标注视频的工具\n- [Turkey](https:\u002F\u002Fgithub.com\u002Fyanfengliu\u002Fturkey) - 基于 Amazon Mechanical Turk 的 Web UI，用于众包图像分割\n- [React Image Annotation](https:\u002F\u002Fgithub.com\u002FSecretmapper\u002Freact-image-annotation) - 基于 React 构建的可无限定制的图像标注工具\n- [Point Cloud Annotation Tool](https:\u002F\u002Fgithub.com\u002Fspringzfx\u002Fpoint-cloud-annotation-tool) - 用于点云中 3D 框的标注\n- [ImageTagger](https:\u002F\u002Fgithub.com\u002Fbit-bots\u002Fimagetagger) - 开源在线协作式图像标注平台\n- [DeepLabel](https:\u002F\u002Fgithub.com\u002Fjveitchmichaelis\u002Fdeeplabel) - 跨平台的机器学习图像标注工具\n- [Visual Object Tagging Tool](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002FVoTT) - 用于构建端到端目标检测模型的 Electron 应用程序\n- [VGG Image Annotator](https:\u002F\u002Fgitlab.com\u002Fvgg\u002Fvia) - 以单个 HTML 文件打包的独立图像标注应用程序\n- [SMART](https:\u002F\u002Fgithub.com\u002FRTIInternational\u002FSMART) - 高效构建用于监督式机器学习任务的标注训练数据集\n- [Pixel Annotation Tool](https:\u002F\u002Fgithub.com\u002Fabreheret\u002FPixelAnnotationTool) - 使用 OpenCV 的标记分水岭算法对目录中的图像进行标注\n- [Pixie](https:\u002F\u002Fgithub.com\u002Fbuni-rock\u002FPixie) - 提供边界框、多边形和语义分割功能的 GUI 标注工具\n- [Turktool](https:\u002F\u002Fgithub.com\u002Fjaxony\u002Fturktool) - 现代化的 React 应用程序，用于大规模图像边界框标注\n- [LabelD](https:\u002F\u002Fgithub.com\u002Fsweppner\u002Flabeld) - 简单的图像标注工具，旨在简化整个流程\n- [Comma Coloring](https:\u002F\u002Fgithub.com\u002Fcommaai\u002Fcommacoloring) - 成人填色书形式的图像分割工具\n- [LabelImg](https:\u002F\u002Fgithub.com\u002Ftzutalin\u002FlabelImg) - 图形化图像标注工具，用于在图像中绘制目标边界框\n- [LCs Finder](https:\u002F\u002Fgithub.com\u002Flc-soft\u002FLC-Finder) - 用 C 语言编写的图像标注与目标检测工具\n- [js-segment-annotator](https:\u002F\u002Fgithub.com\u002Fkyamagu\u002Fjs-segment-annotator) - 基于图像分割的 JavaScript 图像标注工具\n- [Cytomine](https:\u002F\u002Fgithub.com\u002Fcytomine) - 多千兆像素图像分析工具\n- [labelme](https:\u002F\u002Fgithub.com\u002Fwkentaro\u002Flabelme) - 使用 Python 进行图像多边形标注（包括多边形、矩形、圆形、直线、点以及图像级别的标记）\n- [SimpleAnnotate](https:\u002F\u002Fgithub.com\u002Fjoelb92\u002FSimpleAnnotate) - 开源的视频和图像标注软件，目前仅适用于 OSX\n- [Sloth](https:\u002F\u002Fgithub.com\u002FcvhciKIT\u002Fsloth) - 用于计算机视觉研究的图像和视频数据标注工具\n- [Fast Annotation Tool](https:\u002F\u002Fgithub.com\u002Fchristopher5106\u002FFastAnnotationTool) - 协作式在线图像标注平台\n- [Anno-Mage](https:\u002F\u002Fgithub.com\u002Fvirajmavani\u002Fsemi-auto-image-annotation-tool) - 通过为 80 种物体类别提供标注建议来帮助您标注图像\n- [MedTagger](https:\u002F\u002Fgithub.com\u002Fmedtagger\u002FMedTagger) - 利用众包方式标注医疗数据集的协作框架\n- [OpenLabeling](https:\u002F\u002Fgithub.com\u002FCartucho\u002FOpenLabeling) - 支持多种标注格式的标注工具\n- [Alturos.ImageAnnotation](https:\u002F\u002Fgithub.com\u002FAlturosDestinations\u002FAlturos.ImageAnnotation) - 用于 YOLO 数据标注的协作工具\n- [Yolo_mark](https:\u002F\u002Fgithub.com\u002FAlexeyAB\u002FYolo_mark) - 用于在图像中标记目标边界框的 GUI 工具\n- [imglab](https:\u002F\u002Fgithub.com\u002FNaturalIntelligence\u002Fimglab) - 加快并简化图像标注流程，支持多种格式\n- [OpenLabeler](https:\u002F\u002Fgithub.com\u002Fkinhong\u002FOpenLabeler) - 开源桌面应用，用于标注目标\n- [UltimateLabeling](https:\u002F\u002Fgithub.com\u002Falexandre01\u002FUltimateLabeling) - 多功能视频标注 GUI，集成最先进的检测器和跟踪器\n- [DataGym.ai](https:\u002F\u002Fgithub.com\u002Fdatagym-ai\u002Fdatagym-core) - 开源的图像和视频资产标注工具\n\n#### 封闭源\n\n- [DataTorch](https:\u002F\u002Fdatatorch.io\u002F) - 用于创建和共享数据集的平台。\n- [Labelbox](https:\u002F\u002Flabelbox.com\u002F) - 数据标注、数据管理和数据科学平台。其功能包括图像标注、边界框、文本分类等。\n- [Supervise.ly](https:\u002F\u002Fsupervise.ly\u002F) - 图像标注和数据管理工具，可用于创建图像和视频数据集。\n- [Prodigy](https:\u002F\u002Fprodi.gy\u002F) - 提供多种机器学习模型，如图像分类、实体识别和意图检测。\n- [RectLabel](https:\u002F\u002Fgithub.com\u002Fryouchinsa\u002FRectlabel-support) - 用于边界框目标检测和分割的图像标注工具。\n- [Lionbridge AI](https:\u002F\u002Flionbridge.ai\u002Fservices\u002Fimage-annotation\u002F) - 可快速为数千张图像和视频添加相关标签。\n- [TrainingData.io](https:\u002F\u002Fwww.trainingdata.io\u002F) - 医学图像标注工具，用于数据标注。支持放射科AI使用的DICOM图像格式。\n- [Spare5](https:\u002F\u002Fapp.spare5.com\u002Ffives) - 众包服务，适用于数据和图像标注、语言评估等任务。\n- [Hive](https:\u002F\u002Fthehive.ai\u002F) - 文本和图像标注服务，帮助您创建训练数据集。\n- [Figure Eight](https:\u002F\u002Fwww.figure-eight.com\u002F) - 支持音频、计算机视觉、自然语言处理及其他数据任务。\n- [Dataturks](https:\u002F\u002Fdataturks.com\u002F) - 提供图像分割、文档中的命名实体识别（NER）标注以及词性标注等功能。\n- [UBIAI](https:\u002F\u002Fubiai.tools\u002F) - 易于使用的文本标注工具，专为团队设计，具备最全面的自动标注功能。支持NER、关系抽取、文档分类以及发票标注的OCR标注。\n- [Playment](https:\u002F\u002Fplayment.io\u002F) - 提供的服务包括边界框、点线、多边形、语义分割等。\n- [Cogito Tech](https:\u002F\u002Fwww.cogitotech.com\u002F) - 图像标注、内容审核、情感分析、聊天机器人训练。\n- [OCLAVI](https:\u002F\u002Foclavi.com\u002F) - 精确标注边界框、多边形、圆形、点和长方体注释。\n- [Humans in the Loop](https:\u002F\u002Fhumansintheloop.org\u002F) - 应用场景包括人脸识别、自动驾驶汽车和人物检测。\n- [WorkAround](https:\u002F\u002Fworkaround.online\u002F) - 与顶尖公司合作，托管和标注数据、管理项目并构建数据集。\n- [TaQadam](https:\u002F\u002Ftaqadam.io\u002F) - 基于代理闭环的按需标注服务。\n- [Zillin](https:\u002F\u002Fzillin.io) - 图像标注服务，支持分类、目标检测和分割，并提供API接入及地理参考图像支持。\n- [IBM Cloud Annotations](https:\u002F\u002Fcloud.annotations.ai\u002F) - 简单且协作式的图像标注工具，适用于IBM云环境中的团队和个人。\n- [TrainingSet.AI](https:\u002F\u002Ftrainingset.ai\u002F) - 一个平台，旨在解决AI开发中针对图像、视频和点云数据的数据标注环节问题（自动标注、真值生成、标注工具、Web数据集创建、S3集成、团队协作及统计工具）。\n- [MedSeg](https:\u002F\u002Fmedseg.ai\u002F) - 免费的在线医学标注（分割）服务，结合AI模型使用。\n- [MVTec深度学习工具](https:\u002F\u002Fwww.mvtec.com\u002Fproducts\u002Fdeep-learning-tool\u002F) - 为[MHALCON](https:\u002F\u002Fwww.mvtec.com\u002Fproducts\u002Fhalcon\u002F)的基于深度学习的对象检测和分类提供标注功能。\n- [Amazon SageMaker Ground Truth](https:\u002F\u002Faws.amazon.com\u002Fsagemaker\u002Fgroundtruth\u002F) - 可通过MTurk、供应商员工或您自己的私有团队进行数据标注。可使用Ground Truth内置的UI（视频、点云、图像、文本、文档处理）或自定义UI。\n\n\n\n### 音频\n\n- [Audio Annotator](https:\u002F\u002Fgithub.com\u002FCrowdCurio\u002Faudio-annotator) - 用于标注和标记音频文件的JavaScript界面。\n- [Dynitag](https:\u002F\u002Fgithub.com\u002Fdynilib\u002Fdynitag) - 基于Web的协作式音频标注工具。\n- [EchoML](https:\u002F\u002Fgithub.com\u002Fritazh\u002FEchoML) - 播放、可视化并标注您的音频文件，以用于机器学习。\n\n#### 封闭源\n\n- [Figure Eight](https:\u002F\u002Fwww.figure-eight.com\u002F) - 支持音频、计算机视觉、自然语言处理及其他数据任务。\n\n### 时间序列\n\n- [Curve](https:\u002F\u002Fgithub.com\u002Fbaidu\u002FCurve) - 集成式的时间序列数据异常检测实验平台。\n- [TagAnomaly](https:\u002F\u002Fgithub.com\u002FMicrosoft\u002FTagAnomaly) - 异常检测分析与标注工具，专门用于多时间序列。\n- [time-series-annotator](https:\u002F\u002Fgithub.com\u002FCrowdCurio\u002Ftime-series-annotator) - 实现时间序列的分类任务。\n- [WDK](https:\u002F\u002Fgithub.com\u002Favenix\u002FWDK) - 用于促进可穿戴设备活动识别应用开发的工具。\n\n### 文本\n\n- [brat](https:\u002F\u002Fgithub.com\u002Fnlplab\u002Fbrat) - 满足您所有的文本标注需求。\n- [doccano](https:\u002F\u002Fgithub.com\u002Fchakki-works\u002Fdoccano) - 开源文本标注工具，专为机器学习从业者设计。\n- [Inception](https:\u002F\u002Fgithub.com\u002Finception-project\u002Finception) - 语义标注平台，提供智能标注辅助。\n- [NeuroNER](https:\u002F\u002Fgithub.com\u002FFranck-Dernoncourt\u002FNeuroNER) - 使用神经网络进行命名实体识别。\n- [YEDDA](https:\u002F\u002Fgithub.com\u002Fjiesutd\u002FYEDDA) - 用于标注文本、符号甚至表情符号中的片段\u002F实体\u002F事件。\n- [TALEN](https:\u002F\u002Fgithub.com\u002FCogComp\u002Ftalen) - 基于Web的词语序列标注工具。\n- [WebAnno](https:\u002F\u002Fgithub.com\u002Fwebanno\u002Fwebanno) - 广泛应用于各种语言学标注的基于Web的标注工具。\n- [MAE](https:\u002F\u002Fgithub.com\u002Fkeighrim\u002Fmae-annotation) - 轻量级、通用型自然语言标注工具。\n- [Anafora](https:\u002F\u002Fgithub.com\u002Fweitechen\u002Fanafora) - 基于Web的原始文本标注工具。\n- [TagEditor](https:\u002F\u002Fgithub.com\u002Fd5555\u002FTagEditor) - 标注依存关系、词性、命名实体和文本类别。\n- [ML-Annotate](https:\u002F\u002Fgithub.com\u002Fplanbrothers\u002Fml-annotate) - 支持文本的二元、多标签和多分类标注。\n\n#### 封闭源\n\n- [Hive](https:\u002F\u002Fthehive.ai\u002F) - 文本和图像标注服务，帮助您创建训练数据集。\n- [Figure Eight](https:\u002F\u002Fwww.figure-eight.com\u002F) - 支持音频、计算机视觉、自然语言处理及其他数据任务。\n- [LightTag](https:\u002F\u002Flighttag.io) 团队用文本标注工具。\n\n## 库\n\n### 音频\n\n- [Muda](https:\u002F\u002Fgithub.com\u002Fbmcfee\u002Fmuda) - 用于增强标注后音频数据的Python库。\n\n### 文本\n\n- [DataProfiler](https:\u002F\u002Fgithub.com\u002Fcapitalone\u002FDataProfiler) - 一个Python库，用于促进数据分析、监控和数据识别。","# Awesome Dataset Tools 快速上手指南\n\n`awesome-dataset-tools` 并非单一的软件工具，而是一个精选的开源数据集工具列表（Awesome List），涵盖了图像、音频、时间序列和文本等多种数据类型的标注工具及相关库。本指南将指导您如何浏览该列表，并快速启动其中几个最具代表性的开源标注工具。\n\n## 环境准备\n\n由于列表中包含多种不同技术栈的工具，环境要求取决于您选择的具体工具。以下是通用前置依赖：\n\n*   **操作系统**：Linux (Ubuntu\u002FCentOS), macOS, 或 Windows (部分工具推荐 WSL2)。\n*   **基础软件**：\n    *   `Git`: 用于克隆代码仓库。\n    *   `Python 3.8+`: 大多数现代标注工具基于 Python。\n    *   `Node.js & npm`: 部分基于 Web 的工具需要此环境。\n    *   `Docker` (可选但推荐): 许多工具提供 Docker 镜像，可避免环境配置冲突。\n*   **网络加速**：\n    *   克隆 GitHub 仓库时，若速度较慢，可使用国内镜像源（如 Gitee 镜像）或配置 Git 代理。\n    *   Python 包安装推荐使用清华源或阿里源。\n\n## 安装步骤\n\n以下选取列表中三个最流行且覆盖不同场景的开源工具作为示例：**LabelImg** (图像)、**Doccano** (文本) 和 **CVAT** (视频\u002F图像，Docker 部署)。\n\n### 1. 图像标注：LabelImg\n适用于目标检测（Bounding Box）任务。\n\n```bash\n# 配置 pip 国内源 (可选)\npip config set global.index-url https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n\n# 直接通过 pip 安装\npip install labelImg\n\n# 或者从源码安装\ngit clone https:\u002F\u002Fgithub.com\u002Ftzutalin\u002FlabelImg.git\ncd labelImg\npip install -r requirements.txt\npython setup.py build_ext --inplace\n```\n\n### 2. 文本标注：Doccano\n适用于命名实体识别 (NER)、情感分析等文本任务。\n\n```bash\n# 使用 Docker 部署 (推荐，最简单)\ndocker pull doccano\u002Fdoccano\n\n# 启动容器\ndocker run --rm -it -p 8000:8000 doccano\u002Fdoccano\n```\n\n*若不使用 Docker，可通过 pip 安装：*\n```bash\npip install doccano\ndoccano init\ndoccano createuser --username admin --password password\ndoccano webserver --port 8000\n```\n\n### 3. 高级图像\u002F视频标注：CVAT\n适用于复杂的计算机视觉任务，支持视频插值和多用户协作。\n\n```bash\n# 克隆仓库\ngit clone https:\u002F\u002Fgithub.com\u002Fopencv\u002Fcvat.git\ncd cvat\n\n# 使用 Docker Compose 构建并启动 (需确保已安装 docker-compose)\ndocker compose up -d\n```\n\n## 基本使用\n\n### LabelImg (图像)\n1.  **启动**：在终端输入 `labelImg` 命令。\n2.  **加载图片**：点击左侧 \"Open Dir\" 选择包含图片的文件夹。\n3.  **标注**：\n    *   选择左侧工具栏的矩形工具 (Rectangle)。\n    *   在图片上框选物体，弹出对话框输入标签名称（如 `person`, `car`）。\n4.  **保存**：点击 \"Save\" 或设置自动保存，生成对应的 `.xml` (VOC 格式) 或 `.txt` (YOLO 格式) 文件。\n\n### Doccano (文本)\n1.  **访问界面**：浏览器打开 `http:\u002F\u002Flocalhost:8000`。\n2.  **登录**：使用创建的用户名和密码登录（默认 admin\u002Fpassword，若用 Docker 首次运行需查看日志获取初始密码或通过命令创建）。\n3.  **创建项目**：\n    *   点击 \"Create Project\"。\n    *   选择任务类型（如 Sequence labeling for NER）。\n4.  **导入数据**：在 \"Dataset\" 页面上传 JSONL 或 TXT 格式的文本数据。\n5.  **开始标注**：进入 \"Annotation\" 页面，选中文本片段并分配标签，点击 \"Submit\" 保存。\n\n### CVAT (图像\u002F视频)\n1.  **访问界面**：浏览器打开 `http:\u002F\u002Flocalhost:8080`。\n2.  **创建任务**：\n    *   点击 \"Create new task\"。\n    *   填写名称，选择 Labels（标签类别）。\n    *   在 \"Data\" 选项卡上传图片或视频文件。\n3.  **标注作业**：\n    *   进入任务详情页，点击 \"Job #1\"。\n    *   使用左侧工具栏绘制形状（矩形、多边形等）。\n    *   对于视频，可利用插值功能自动填充中间帧。\n4.  **导出数据**：完成标注后，在任务页面点击 \"Export dataset\" 下载 COCO、YOLO、VOC 等格式的数据集。\n\n> **提示**：更多工具（如音频标注的 `EchoML`、时间序列的 `TagAnomaly`）的使用方式类似，通常遵循 `安装 -> 启动服务\u002F程序 -> 导入数据 -> 标注 -> 导出` 的流程。请访问 `awesome-dataset-tools` 仓库中对应工具的链接获取详细文档。","某自动驾驶初创团队正急需构建高质量的道路障碍物检测数据集，以训练其核心感知模型。\n\n### 没有 awesome-dataset-tools 时\n- 团队成员分散使用各类零散脚本或付费封闭软件，导致标注格式（如 YOLO、COCO、VOC）转换频繁且极易出错。\n- 缺乏统一的协作平台，多人同时处理同一批视频数据时版本混乱，重复劳动严重，沟通成本极高。\n- 面对海量点云和连续视频帧，手动逐帧绘制边界框效率低下，项目进度因数据准备不足而严重滞后。\n- 难以针对特定场景（如恶劣天气下的行人检测）定制灵活的标注流程，导致模型在长尾场景下表现不佳。\n\n### 使用 awesome-dataset-tools 后\n- 团队直接从列表中选用 LabelImg、CVAT 或 Scalabel 等工具，原生支持多种标准格式，彻底消除了格式转换的痛点。\n- 利用 CVAT 或 ImageTagger 等在线协作工具，实现了任务自动分发与实时进度监控，多人并行作业井然有序。\n- 借助 EVA 的追踪功能或 Point Cloud Annotation Tool 的 3D 标注能力，大幅提升了视频序列和激光雷达数据的处理效率。\n- 通过 LOST 或 SMART 自定义智能标注工作流，快速聚焦高价值难例数据，显著提升了模型在复杂场景的鲁棒性。\n\nawesome-dataset-tools 通过提供一站式精选工具清单，帮助团队将数据准备周期缩短了 60%，让算法工程师能更专注于模型优化而非数据清洗。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fjsbroks_awesome-dataset-tools_7e732b09.png","jsbroks","Justin Brooks","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fjsbroks_5c767d0f.png",null,"@wandb ","Canada","jsbroks@gmail.com","https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Fjsbroks","https:\u002F\u002Fgithub.com\u002Fjsbroks",936,134,"2026-04-10T08:55:27","MIT",5,"未说明",{"notes":88,"python":86,"dependencies":89},"该仓库（awesome-dataset-tools）是一个 curated list（精选列表），仅汇总了各类数据集标注工具和库的链接，本身不是一个可运行的单一软件工具。因此，README 中不包含具体的操作系统、GPU、内存、Python 版本或依赖库的安装需求。用户需点击列表中具体工具的链接（如 LabelImg, CVAT, doccano 等）去查看各自独立的运行环境要求。部分工具为基于 Web 的服务或闭源商业平台，无需本地部署。",[],[14,16],[92,93,94,95,96,97],"datasets","awsome","awsome-list","machine-learning","annotations","annotation-tool","2026-03-27T02:49:30.150509","2026-04-16T16:18:35.926018",[],[]]