[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-visual-layer--fastdup":3,"tool-visual-layer--fastdup":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",158594,2,"2026-04-16T23:34:05",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",108322,"2026-04-10T11:39:34",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[52,13,15,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":32,"last_commit_at":59,"category_tags":60,"status":17},4721,"markitdown","microsoft\u002Fmarkitdown","MarkItDown 是一款由微软 AutoGen 团队打造的轻量级 Python 工具，专为将各类文件高效转换为 Markdown 格式而设计。它支持 PDF、Word、Excel、PPT、图片（含 OCR）、音频（含语音转录）、HTML 乃至 YouTube 链接等多种格式的解析，能够精准提取文档中的标题、列表、表格和链接等关键结构信息。\n\n在人工智能应用日益普及的今天，大语言模型（LLM）虽擅长处理文本，却难以直接读取复杂的二进制办公文档。MarkItDown 恰好解决了这一痛点，它将非结构化或半结构化的文件转化为模型“原生理解”且 Token 效率极高的 Markdown 格式，成为连接本地文件与 AI 分析 pipeline 的理想桥梁。此外，它还提供了 MCP（模型上下文协议）服务器，可无缝集成到 Claude Desktop 等 LLM 应用中。\n\n这款工具特别适合开发者、数据科学家及 AI 研究人员使用，尤其是那些需要构建文档检索增强生成（RAG）系统、进行批量文本分析或希望让 AI 助手直接“阅读”本地文件的用户。虽然生成的内容也具备一定可读性，但其核心优势在于为机器",93400,"2026-04-06T19:52:38",[52,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":72,"owner_name":73,"owner_avatar_url":74,"owner_bio":75,"owner_company":76,"owner_location":76,"owner_email":77,"owner_twitter":76,"owner_website":78,"owner_url":79,"languages":80,"stars":89,"forks":90,"last_commit_at":91,"license":92,"difficulty_score":93,"env_os":94,"env_gpu":95,"env_ram":96,"env_deps":97,"category_tags":101,"github_topics":102,"view_count":32,"oss_zip_url":76,"oss_zip_packed_at":76,"status":17,"created_at":122,"updated_at":123,"faqs":124,"releases":153},8286,"visual-layer\u002Ffastdup","fastdup","fastdup is a powerful, free tool designed to rapidly generate valuable insights from image and video datasets. It helps enhance the quality of both images and labels, while significantly reducing data operation costs, all with unmatched scalability.","fastdup 是一款专为图像和视频数据集打造的高效开源分析工具，旨在帮助用户快速从海量视觉数据中挖掘价值。它主要解决了数据集中常见的重复图片、异常样本、标签错误以及数据分布不均等痛点，通过自动化清洗和策展流程，显著提升数据质量，同时大幅降低数据存储与计算成本。\n\n这款工具特别适合 AI 开发者、数据科学家以及机器学习研究人员使用，尤其是在处理大规模数据集需要兼顾效率与精度的场景下。无论是训练前的数据预处理，还是模型迭代中的问题排查，fastdup 都能提供强有力的支持。\n\n其独特的技术亮点在于卓越的扩展性与运行速度，能够轻松应对百万级甚至更大规模的数据集。值得一提的是，fastdup 由多位顶尖技术专家联合创立，核心团队成员曾主导或参与了 XGBoost、Apache TVM 以及 Apple Turi Create 等知名开源项目的开发，深厚的技术底蕴确保了工具在算法优化与工程落地上的专业性。通过简单的命令行操作或 Python 接口，用户即可生成详尽的数据洞察报告，让数据管理工作变得更加轻松高效。","\u003C!-- PROJECT LOGO -->\n\u003Cbr \u002F>\n\u003Cdiv align=\"left\">\n  \u003Ca href=\"https:\u002F\u002Fwww.visual-layer.com\" target=\"_blank\" rel=\"noopener noreferrer\" name=\"top\">\n    \u003Cpicture>\n    \u003Csource media=\"(prefers-color-scheme: dark)\" srcset=\".\u002Fgallery\u002Flogo_dark_mode.png\" width=600>\n    \u003Csource media=\"(prefers-color-scheme: light)\" srcset=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_bb34762eae88.png\" width=600>\n    \u003Cimg alt=\"fastdup logo.\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_bb34762eae88.png\">\n    \u003C\u002Fpicture>\n  \u003C\u002Fa>\n  \u003Cbr>\n  \u003Cbr>\n  \u003C\u002Fdiv>\n\n\u003C!-- \u003Ch3 align=\"left\">Manage, Clean & Curate Visual Data - Fast and at Scale.\u003C\u002Fh3> -->\n\n[![PyPi][pypi-shield]][pypi-url]\n[![PyPi][pypiversion-shield]][pypi-url]\n[![PyPi][downloads-shield]][downloads-url]\n[![Contributors][contributors-shield]][contributors-url]\n[![License][license-shield]][license-url]\n[![OS][os-shield]][os-url]\n\n\n\u003C!-- MARKDOWN LINKS & IMAGES -->\n\u003C!-- https:\u002F\u002Fwww.markdownguide.org\u002Fbasic-syntax\u002F#reference-style-links -->\n[pypi-shield]: https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FPython-3.8%20|%203.9%20|%203.10%20|%203.11-blue?style=for-the-badge\n[pypi-url]: https:\u002F\u002Fpypi.org\u002Fproject\u002Ffastdup\u002F\n[pypiversion-shield]: https:\u002F\u002Fimg.shields.io\u002Fpypi\u002Fv\u002Ffastdup?style=for-the-badge&color=lightblue\n[downloads-shield]: https:\u002F\u002Fimg.shields.io\u002Fpepy\u002Fdt\u002Ffastdup?style=for-the-badge&color=success\n[downloads-url]: https:\u002F\u002Fpypi.org\u002Fproject\u002Ffastdup\u002F\n[contributors-shield]: https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fcontributors\u002Fvisual-layer\u002Ffastdup?style=for-the-badge&color=orange\n[contributors-url]: https:\u002F\u002Fgithub.com\u002Fothneildrew\u002FBest-README-Template\u002Fgraphs\u002Fcontributors\n[license-shield]: https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FLicense-CC%20BY--NC--ND%204.0-purple.svg?style=for-the-badge\n[license-url]: https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002FLICENSE\n[os-shield]: https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FSupported%20OS-macOS%20%7C%20Linux%20%7C%20Windows(WSL2)%20-yellow?style=for-the-badge\n[os-url]: https:\u002F\u002Fdocs.visual-layer.com\u002Fdocs\u002Finstalling-fastdup\n\n\n\n\n\u003Cp align=\"left\">\n  A powerful open-source tool for analyzing image and video datasets founded by the authors of \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fapache\u002Ftvm\">XGBoost\u003C\u002Fa>, \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fapache\u002Ftvm\">Apache TVM\u003C\u002Fa> & \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fapple\u002Fturicreate\">Turi Create\u003C\u002Fa> - \u003Ca href=\"https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Fdr-danny-bickson-835b32\">Danny Bickson\u003C\u002Fa>, \u003Ca href=\"https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Fcarlos-guestrin-5352a869\">Carlos Guestrin\u003C\u002Fa> and \u003Ca href=\"https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Famiralush\">Amir Alush\u003C\u002Fa>.\u003C\u002Fp>\n  \u003Chr>\n    \u003Ca href=\"https:\u002F\u002Fvisual-layer.readme.io\u002F\" target=\"_blank\" rel=\"noopener noreferrer\">Documentation\u003C\u002Fa>\n    ·\n    \u003Ca href=\"#features--advantages\" target=\"_blank\" rel=\"noopener noreferrer\">Features\u003C\u002Fa>\n    ·\n    \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fissues\u002Fnew\u002Fchoose\" target=\"_blank\" rel=\"noopener noreferrer\">Report Bug\u003C\u002Fa>\n    ·\n    \u003Ca href=\"https:\u002F\u002Fmedium.com\u002Fvisual-layer\" target=\"_blank\" rel=\"noopener noreferrer\">Blog\u003C\u002Fa>\n    ·\n    \u003Ca href=\"#getting-started\" target=\"_blank\" rel=\"noopener noreferrer\">Quickstart\u003C\u002Fa>\n    ·\n    \u003Ca href=\"#visual-layer-cloud\" target=\"_blank\" rel=\"noopener noreferrer\">Visual Layer Cloud\u003C\u002Fa>\n    \u003Chr>\n\u003C\u002Fp>\n    \u003C!-- \u003Cbr \u002F>\n    \u003Cbr \u002F> \n    \u003Ca href=\"https:\u002F\u002Fdiscord.gg\u002FtkYHJCA7mb\" target=\"_blank\" rel=\"noopener noreferrer\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FDISCORD%20COMMUNITY-5865F2?style=for-the-badge&logo=discord&logoColor=white\" alt=\"Logo\">\n    \u003C\u002Fa>\n    \u003Ca href=\"https:\u002F\u002Fvisual-layer.readme.io\u002Fdiscuss\" target=\"_blank\" rel=\"noopener noreferrer\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FDISCUSSION%20FORUM-slateblue?style=for-the-badge&logo=discourse&logoWidth=20\" alt=\"Logo\">\n    \u003C\u002Fa>\n    \u003Ca href=\"https:\u002F\u002Fwww.linkedin.com\u002Fcompany\u002Fvisual-layer\u002F\" target=\"_blank\" rel=\"noopener noreferrer\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FLinkedIn-0077B5?style=for-the-badge&logo=linkedin&logoColor=white\" alt=\"Logo\">\n    \u003C\u002Fa>\n    \u003Ca href=\"https:\u002F\u002Ftwitter.com\u002Fvisual_layer\" target=\"_blank\" rel=\"noopener noreferrer\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FX%20(TWITTER)-000000?style=for-the-badge&logo=x&logoColor=white\" alt=\"Logo\">\n    \u003C\u002Fa>\n    \u003Ca href=\"https:\u002F\u002Fwww.youtube.com\u002F@visual-layer\" target=\"_blank\" rel=\"noopener noreferrer\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F-YouTube-black.svg?style=for-the-badge&logo=youtube&colorB=red\" alt=\"Logo\">\n    \u003C\u002Fa>\n  \u003Cbr \u002F>\n  \u003Cbr \u002F> -->\n\n## Getting Started\n\n`pip` install fastdup from [PyPI](https:\u002F\u002Fpypi.org\u002Fproject\u002Ffastdup\u002F):\n\n```bash\npip install fastdup\n```\n\nMore installation options are available [here](https:\u002F\u002Fvisual-layer.readme.io\u002Fdocs\u002Finstallation).\n\nInitialize and run fastdup:\n```python\nimport fastdup\n\nfd = fastdup.create(input_dir=\"IMAGE_FOLDER\u002F\")\nfd.run()\n```\n\nRemove duplicates from your dataset in a single call:\n\n```python\nimport fastdup\nfastdup.remove_duplicates(\"IMAGE_FOLDER\u002F\")\n```\nThis finds and deletes duplicate images (similarity > 0.96 by default) directly from disk. Use `dry_run=True` to preview which files would be removed before deleting, and adjust the `distance` parameter to control the similarity threshold.\n\n![run](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_0909bfd6639c.gif)\n\nVisualize the results in a static gallery:\n\n```python\nfd.vis.duplicates_gallery()    # gallery of duplicates\nfd.vis.outliers_gallery()      # gallery of outliers\nfd.vis.component_gallery()     # gallery of connected components\nfd.vis.stats_gallery()         # gallery of image statistics (e.g. blur, brightness, etc.)\nfd.vis.similarity_gallery()    # gallery of similar images\n```\n\n## Check this [quickstart tutorial](https:\u002F\u002Fyoutu.be\u002FGt46ciEIxtw) for more info\nhttps:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Fassets\u002F738a329d-8063-4515-a961-f2527934a0ca\n\n\n## Features & Advantages\nfastdup handles labeled\u002Funlabeled datasets in image or video format, providing a range of features:\n\n\u003Cdiv align=\"center\" style=\"display:flex;flex-direction:column;\">\n  \u003Ca href=\"https:\u002F\u002Fwww.visual-layer.com\" target=\"_blank\" rel=\"noopener noreferrer\">\n    \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_91b8f5267035.png\" alt=\"fastdup\" width=\"1000\">\n  \u003C\u002Fa>\n \u003C\u002Fdiv>\n\n\nWhat sets fastdup apart from other similar tools: \n\n+ **Quality**: High-quality analysis to identify duplicates\u002Fnear-duplicates, outliers, mislabels, broken images, and low-quality images.\n+ **Scale**: Highly scalable, capable of processing 400M images on a single CPU machine. Scales up to billions of images.\n+ **Speed**: Optimized C++ engine enables high performance even on low-resource CPU machines.\n+ **Privacy**: Runs locally or on your cloud infrastructure. Your data stays where it is.\n+ **Ease of use**: Works on labeled or unlabeled datasets in image or video format with support for major operating systems like MacOS, Linux and Windows.\n\n\n## Learn from Examples\nLearn the basics of fastdup through interactive examples. View the notebooks on GitHub or nbviewer. Even better, run them on Google Colab or Kaggle, for free.\n\n\n\n\u003Ctable>\n   \u003Ctr>\n      \u003Ctd rowspan=\"4\" width=\"160\">\n         \u003Ca href=\"https:\u002F\u002Fvisual-layer.readme.io\u002Fdocs\u002Fquickstart\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_38b7d3ddfff8.jpg\" width=\"200\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n      \u003Ctd rowspan=\"4\">\n         \u003Cb>⚡ Quickstart:\u003C\u002Fb> Learn how to install fastdup, load a dataset and analyze it for potential issues such as duplicates\u002Fnear-duplicates, broken images, outliers, dark\u002Fbright\u002Fblurry images, and view visually similar image clusters. If you're new, start here!\n         \u003Cbr>\n         \u003Cbr>\n         \u003Cb>📌 Dataset:\u003C\u002Fb> \u003Ca href=\"https:\u002F\u002Fwww.robots.ox.ac.uk\u002F~vgg\u002Fdata\u002Fpets\u002F\">Oxford-IIIT Pet\u003C\u002Fa>.\n      \u003C\u002Ftd>\n      \u003Ctd align=\"center\" width=\"80\">\n         \u003Ca href=\"https:\u002F\u002Fnbviewer.org\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fquickstart.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_751f8ff96c7f.png\" height=\"30\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fquickstart.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_438c17272c5f.png\" height=\"25\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fquickstart.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_5a89a23c2924.png\" height=\"20\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fkaggle.com\u002Fkernels\u002Fwelcome?src=https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fquickstart.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_cc78b4156f67.png\" height=\"25\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003C!-- ------------------------------------------------------------------- -->\n   \u003Ctr>\n      \u003Ctd rowspan=\"4\" width=\"160\">\n         \u003Ca href=\"https:\u002F\u002Fvisual-layer.readme.io\u002Fdocs\u002Ffinding-removing-duplicates\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_61c836e24253.jpg\" width=\"200\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n      \u003Ctd rowspan=\"4\">\n         \u003Cb>🧹 Finding and Removing Duplicates:\u003C\u002Fb> Learn how to how to analyze an image dataset for duplicates and near-duplicates.\n         \u003Cbr>\n         \u003Cbr>\n         \u003Cb>📌 Dataset:\u003C\u002Fb> \u003Ca href=\"https:\u002F\u002Fwww.robots.ox.ac.uk\u002F~vgg\u002Fdata\u002Fpets\u002F\">Oxford-IIIT Pet\u003C\u002Fa>.\n      \u003C\u002Ftd>\n      \u003Ctd align=\"center\" width=\"80\">\n         \u003Ca href=\"https:\u002F\u002Fnbviewer.org\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Ffinding-removing-duplicates.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_751f8ff96c7f.png\" height=\"30\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Ffinding-removing-duplicates.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_438c17272c5f.png\" height=\"25\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Ffinding-removing-duplicates.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_5a89a23c2924.png\" height=\"20\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fkaggle.com\u002Fkernels\u002Fwelcome?src=https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Ffinding-removing-duplicates.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_cc78b4156f67.png\" height=\"25\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003C!-- ------------------------------------------------------------------- -->\n   \u003Ctr>\n      \u003Ctd rowspan=\"4\" width=\"160\">\n         \u003Ca href=\"https:\u002F\u002Fvisual-layer.readme.io\u002Fdocs\u002Ffinding-removing-mislabels\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_a57099c3d42d.jpg\" width=\"200\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n      \u003Ctd rowspan=\"4\">\n         \u003Cb>🖼 Finding and Removing Mislabels:\u003C\u002Fb> Learn how to analyze an image dataset for potential image mislabels and export the list of mislabeled images for further inspection.\n         \u003Cbr>\n         \u003Cbr>\n         \u003Cb>📌 Dataset:\u003C\u002Fb> \u003Ca href=\"https:\u002F\u002Fdata.vision.ee.ethz.ch\u002Fcvl\u002Fdatasets_extra\u002Ffood-101\u002F\">Food-101\u003C\u002Fa>.\n      \u003C\u002Ftd>\n      \u003Ctd align=\"center\" width=\"80\">\n         \u003Ca href=\"https:\u002F\u002Fnbviewer.org\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Ffinding-removing-mislabels.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_751f8ff96c7f.png\" height=\"30\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Ffinding-removing-mislabels.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_438c17272c5f.png\" height=\"25\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Ffinding-removing-mislabels.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_5a89a23c2924.png\" height=\"20\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fkaggle.com\u002Fkernels\u002Fwelcome?src=https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Ffinding-removing-mislabels.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_cc78b4156f67.png\" height=\"25\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003C!-- ------------------------------------------------------------------- -->\n   \u003Ctr>\n      \u003Ctd rowspan=\"4\" width=\"160\">\n         \u003Ca href=\"https:\u002F\u002Fvisual-layer.readme.io\u002Fdocs\u002Fimage-search\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_73be08df577d.jpg\" width=\"200\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n      \u003Ctd rowspan=\"4\">\n         \u003Cb>🎁 Image Similarity Search:\u003C\u002Fb> Perform image search in a large dataset of images.\n         \u003Cbr>\n         \u003Cbr>\n         \u003Cb>📌 Dataset:\u003C\u002Fb> \u003Ca href=\"https:\u002F\u002Fwww.kaggle.com\u002Fcompetitions\u002Fshopee-product-matching\u002Fdata\">Shopee Product Matching\u003C\u002Fa>.\n      \u003C\u002Ftd>\n      \u003Ctd align=\"center\" width=\"80\">\n         \u003Ca href=\"https:\u002F\u002Fnbviewer.org\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fimage-search.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_751f8ff96c7f.png\" height=\"30\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fimage-search.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_438c17272c5f.png\" height=\"25\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fimage-search.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_5a89a23c2924.png\" height=\"20\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fkaggle.com\u002Fkernels\u002Fwelcome?src=https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fimage-search.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_cc78b4156f67.png\" height=\"25\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003C!-- ------------------------------------------------------------------- -->\n   \u003Ctr>\n        \u003Ctd rowspan=\"4\" width=\"160\">\n            \u003Ca href=\"https:\u002F\u002Fvisual-layer.readme.io\u002Fdocs\u002Fhugging-face-datasets\">\n                \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_0a64090f5f6c.jpg\" width=\"200\" \u002F>\n            \u003C\u002Fa>\n        \u003C\u002Ftd>\n        \u003Ctd rowspan=\"4\">\u003Cb>🤗 Hugging Face Datasets:\u003C\u002Fb> Load and analyze datasets from \u003Ca href=\"https:\u002F\u002Fhuggingface.co\u002Fdatasets\">Hugging Face Datasets\u003C\u002Fa>. Perfect if you already have a dataset hosted on Hugging Face hub.\n        \u003C\u002Ftd>\n        \u003Ctd align=\"center\" width=\"80\">\n            \u003Ca href=\"https:\u002F\u002Fnbviewer.org\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fanalyzing-hf-datasets.ipynb\">\n                \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_751f8ff96c7f.png\" height=\"30\" \u002F>\n            \u003C\u002Fa>\n        \u003C\u002Ftd>\n    \u003C\u002Ftr>\n    \u003Ctr>\n        \u003Ctd align=\"center\">\n            \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fanalyzing-hf-datasets.ipynb\">\n                \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_438c17272c5f.png\" height=\"25\" \u002F>\n            \u003C\u002Fa>\n        \u003C\u002Ftd>\n    \u003C\u002Ftr>\n    \u003Ctr>\n        \u003Ctd align=\"center\">\n            \u003Ca href=\"https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fanalyzing-hf-datasets.ipynb\">\n                \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_5a89a23c2924.png\" height=\"20\" \u002F>\n            \u003C\u002Fa>\n        \u003C\u002Ftd>\n    \u003C\u002Ftr>\n    \u003Ctr>\n        \u003Ctd align=\"center\">\n            \u003Ca href=\"https:\u002F\u002Fkaggle.com\u002Fkernels\u002Fwelcome?src=https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fanalyzing-hf-datasets.ipynb\">\n                \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_cc78b4156f67.png\" height=\"25\" \u002F>\n            \u003C\u002Fa>\n        \u003C\u002Ftd>\n    \u003C\u002Ftr>\n    \u003C!-- ------------------------------------------------------------------- -->\n    \u003Ctr>\n      \u003Ctd rowspan=\"4\" width=\"160\">\n         \u003Ca href=\"https:\u002F\u002Fvisual-layer.readme.io\u002Fdocs\u002Fembeddings-timm\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_e17a45b17521.jpg\" width=\"200\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n      \u003Ctd rowspan=\"4\">\n         \u003Cb> 🧠 TIMM Embeddings:\u003C\u002Fb> Compute dataset embeddings using \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fpytorch-image-models\">TIMM (PyTorch Image Models)\u003C\u002Fa> and run fastdup over the them to surface dataset issues. Runs on CPU and GPU.\n      \u003C\u002Ftd>\n      \u003Ctd align=\"center\" width=\"80\">\n         \u003Ca href=\"https:\u002F\u002Fnbviewer.org\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fembeddings-timm.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_751f8ff96c7f.png\" height=\"30\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fembeddings-timm.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_438c17272c5f.png\" height=\"25\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fembeddings-timm.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_5a89a23c2924.png\" height=\"20\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fkaggle.com\u002Fkernels\u002Fwelcome?src=https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fembeddings-timm.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_cc78b4156f67.png\" height=\"25\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003C!-- ------------------------------------------------------------------- -->\n   \u003Ctr>\n      \u003Ctd rowspan=\"4\" width=\"160\">\n         \u003Ca href=\"https:\u002F\u002Fvisual-layer.readme.io\u002Fdocs\u002Fgetting-started\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_771349d50d86.jpg\" width=\"200\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n      \u003Ctd rowspan=\"4\">\n         \u003Cb>🦖 ONNX Embeddings:\u003C\u002Fb> Bring your own ONNX model. In this example we extract feature vectors of your images using \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fdinov2\">DINOv2\u003C\u002Fa> model. Runs on CPU.\n      \u003C\u002Ftd>\n      \u003Ctd align=\"center\" width=\"80\">\n         \u003Ca href=\"https:\u002F\u002Fnbviewer.org\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fembeddings-onnx-dinov2.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_751f8ff96c7f.png\" height=\"30\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fembeddings-onnx-dinov2.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_438c17272c5f.png\" height=\"25\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fembeddings-onnx-dinov2.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_5a89a23c2924.png\" height=\"20\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fkaggle.com\u002Fkernels\u002Fwelcome?src=https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fembeddings-onnx-dinov2.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_cc78b4156f67.png\" height=\"25\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003C!-- ------------------------------------------------------------------- -->\n\u003C\u002Ftable>\n\nSee more [examples](EXAMPLES.md).\n\n\n## Join the Community\n\nGet help from the fastdup team or community members via the following channels:\n\n\u003Ca href=\"https:\u002F\u002Fdiscord.gg\u002FtkYHJCA7mb\" target=\"_blank\" rel=\"noopener noreferrer\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FDISCORD%20COMMUNITY-5865F2?style=for-the-badge&logo=discord&logoColor=white\" alt=\"Logo\">\n    \u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fvisual-layer.readme.io\u002Fdiscuss\" target=\"_blank\" rel=\"noopener noreferrer\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FDISCUSSION%20FORUM-slateblue?style=for-the-badge&logo=discourse&logoWidth=20\" alt=\"Logo\">\n    \u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fissues\u002Fnew\u002Fchoose\" target=\"_blank\" rel=\"noopener noreferrer\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fissues\u002Fvisual-layer\u002Ffastdup?style=for-the-badge&logo=github&logoColor=white\" alt=\"GitHub Issues\">\n\u003C\u002Fa>\n\n\nCommunity-contributed blog posts on fastdup:\n\n\u003Ctable>\n  \u003Ctr>\n    \u003Ctd>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_e1f35466a4c1.jpg\" width=\"200\">\u003C\u002Ftd>\n    \u003Ctd>\n      \u003Ca href=\"https:\u002F\u002Fmedium.com\u002F@atahanbulus.w\u002Fdeploying-aws-lambda-functions-with-docker-container-by-using-custom-base-image-2d110d307f9b\">Deploying AWS Lambda functions with Docker Container by using Custom Base Image\u003C\u002Fa>\u003Cbr>\n      🖋️ \u003Ca href=\"https:\u002F\u002Fmedium.com\u002F@atahanbulus.w\">atahan bulus\u003C\u002Fa> &nbsp;&nbsp;&nbsp;•&nbsp;&nbsp;&nbsp; 🗓 16 September 2023\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_07bb9a439a5f.jpg\" width=\"200\">\u003C\u002Ftd>\n    \u003Ctd>\n      \u003Ca href=\"https:\u002F\u002Fmedium.com\u002F@daniel-klitzke\u002Fcleaning-image-classification-datasets-with-fastdup-and-renumics-spotlight-e68deb4730a3\">Renumics: Cleaning Image Classification Datasets With fastdup and Renumics Spotlight\u003C\u002Fa>\u003Cbr>\n      🖋️ \u003Ca href=\"https:\u002F\u002Fmedium.com\u002F@daniel-klitzke\">Daniel Klitzke\u003C\u002Fa> &nbsp;&nbsp;&nbsp;•&nbsp;&nbsp;&nbsp; 🗓 4 September 2023\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_ae6d132b0778.jpg\" width=\"200\">\u003C\u002Ftd>\n    \u003Ctd>\n      \u003Ca href=\"https:\u002F\u002Fblog.roboflow.com\u002Fhow-to-reduce-dataset-size-computer-vision\u002F\">Roboflow: How to Reduce Dataset Size Without Losing Accuracy\u003C\u002Fa>\u003Cbr>\n      🖋️ \u003Ca href=\"https:\u002F\u002Fblog.roboflow.com\u002Fauthor\u002Farty\u002F\">Arty Ariuntuya\u003C\u002Fa> &nbsp;&nbsp;&nbsp;•&nbsp;&nbsp;&nbsp; 🗓 9 August 2023\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_6916e98019e1.jpg\" width=\"200\">\u003C\u002Ftd>\n    \u003Ctd>\n      \u003Ca href=\"https:\u002F\u002Falexlanseedoo.medium.com\u002Fthe-weighty-significance-of-data-cleanliness-eb03dce1d0f8\">The weighty significance of data cleanliness — or as I like to call it, “cleanliness is next to model-ness” — cannot be overstated.\u003C\u002Fa>\u003Cbr>\n      🖋️ \u003Ca href=\"https:\u002F\u002Falexlanseedoo.medium.com\u002F\">Alexander Lan\u003C\u002Fa> &nbsp;&nbsp;&nbsp;•&nbsp;&nbsp;&nbsp; 🗓 9 March 2023\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_f20aa0dd00d8.gif\" width=\"200\">\u003C\u002Ftd>\n    \u003Ctd>\n      \u003Ca href=\"https:\u002F\u002Fdicksonneoh.com\u002Fblog\u002Fclean_up_your_digital_life\u002F\">Clean Up Your Digital Life: How I Found 1929 Fully Identical Images, Dark, Bright and Blurry Shots in Minutes, For Free.\u003C\u002Fa>\u003Cbr>\n      🖋️ \u003Ca href=\"https:\u002F\u002Fmedium.com\u002F@dickson.neoh\">Dickson Neoh\u003C\u002Fa> &nbsp;&nbsp;&nbsp;•&nbsp;&nbsp;&nbsp; 🗓 23 February 2023\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_bb407b08e310.gif\" width=\"200\">\u003C\u002Ftd>\n    \u003Ctd>\n      \u003Ca href=\"https:\u002F\u002Fdicksonneoh.com\u002Fportfolio\u002Ffastdup_manage_clean_curate\u002F\">fastdup: A Powerful Tool to Manage, Clean & Curate Visual Data at Scale on Your CPU - For Free.\u003C\u002Fa>\u003Cbr>\n      🖋️ \u003Ca href=\"https:\u002F\u002Fmedium.com\u002F@dickson.neoh\">Dickson Neoh\u003C\u002Fa> &nbsp;&nbsp;&nbsp;•&nbsp;&nbsp;&nbsp; 🗓 3 January 2023\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_061fbcf2487a.jpg\" width=\"200\">\u003C\u002Ftd>\n    \u003Ctd>\n      \u003Ca href=\"https:\u002F\u002Ftowardsdatascience.com\u002Fmaster-data-integrity-to-clean-your-computer-vision-datasets-df432cf9e596\">Master Data Integrity to Clean Your Computer Vision Datasets.\u003C\u002Fa>\u003Cbr>\n      🖋️ \u003Ca href=\"https:\u002F\u002Fpauliusztin.medium.com\u002F\">Paul lusztin\u003C\u002Fa> &nbsp;&nbsp;&nbsp;•&nbsp;&nbsp;&nbsp; 🗓 19 December 2022\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n\u003C\u002Ftable>\n\n\nWhat our users say:\n\n![feedback](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_a7b80c8dd97f.jpg)\n\n![feedback2](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_635eecb37bc6.png)\n\n## Visual Layer Cloud\nVisual Layer offers commercial services for managing, cleaning, and curating visual data at scale. \n\n[Sign-up](https:\u002F\u002Fapp.visual-layer.com?utm_source=fastdup_readme) for free. \n\n\nhttps:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fassets\u002F6821286\u002F57f13d77-0ac4-4c74-8031-07fae87c5b00\n\nNot convinced? Interact with Visual Layer Cloud [public dataset](https:\u002F\u002Fapp.visual-layer.com\u002Fvl-datasets?utm_source=fastdup_readme) with no sign-up required.\n\n## Disclaimer\n\u003Cdetails>\n  \u003Csummary>\u003Cb>Usage Tracking\u003C\u002Fb>\u003C\u002Fsummary>\n\nWe have added an experimental crash report collection using [Sentry](https:\u002F\u002Fgithub.com\u002Fgetsentry\u002F). \n\nWe **DO NOT** collect user-specific information such as folder names, user names, image names, image content, etc. \nWe do collect data related to fastdup's internal operations and performance statistics such as total number of images, average runtime per image, total free memory, total free disk space, number of cores, etc. \n\nThis help us identify and resolve stability issues, thereby improving the overall reliability of fastdup.\nThe code for the data collection is found [here](.\u002Ffastdup\u002Fsentry.py). On MAC we use [Google crashpad](https:\u002F\u002Fchromium.googlesource.com\u002Fcrashpad\u002Fcrashpad) to report crashes.\n\nUsers have the option to opt out of the experimental crash reporting system through one of the following methods:\n- Define an environment variable called `SENTRY_OPT_OUT`\n- or `run()` with `turi_param='run_sentry=0'`\n\n\u003C\u002Fdetails>\n\n## License\nfastdup is licensed under [Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International](https:\u002F\u002Fcreativecommons.org\u002Flicenses\u002Fby-nc-nd\u002F4.0\u002F) Public License. \n\nFor any more information or inquiries regarding the license, please contact us at info@visual-layer.com or see the [LICENSE](.\u002FLICENSE) file.\n\n\n\u003Cdiv align=\"right\">\u003Ca href=\"#top\">🔝 Back to Top\u003C\u002Fa>\u003C\u002Fdiv>\n\n","\u003C!-- 项目Logo -->\n\u003Cbr \u002F>\n\u003Cdiv align=\"left\">\n  \u003Ca href=\"https:\u002F\u002Fwww.visual-layer.com\" target=\"_blank\" rel=\"noopener noreferrer\" name=\"top\">\n    \u003Cpicture>\n    \u003Csource media=\"(prefers-color-scheme: dark)\" srcset=\".\u002Fgallery\u002Flogo_dark_mode.png\" width=600>\n    \u003Csource media=\"(prefers-color-scheme: light)\" srcset=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_bb34762eae88.png\" width=600>\n    \u003Cimg alt=\"fastdup logo.\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_bb34762eae88.png\">\n    \u003C\u002Fpicture>\n  \u003C\u002Fa>\n  \u003Cbr>\n  \u003Cbr>\n  \u003C\u002Fdiv>\n\n\u003C!-- \u003Ch3 align=\"left\">管理、清洗与精选视觉数据——快速且大规模。\u003C\u002Fh3> -->\n\n[![PyPi][pypi-shield]][pypi-url]\n[![PyPi][pypiversion-shield]][pypi-url]\n[![PyPi][downloads-shield]][downloads-url]\n[![Contributors][contributors-shield]][contributors-url]\n[![License][license-shield]][license-url]\n[![OS][os-shield]][os-url]\n\n\n\u003C!-- Markdown链接与图片 -->\n\u003C!-- https:\u002F\u002Fwww.markdownguide.org\u002Fbasic-syntax\u002F#reference-style-links -->\n[pypi-shield]: https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FPython-3.8%20|%203.9%20|%203.10%20|%203.11-blue?style=for-the-badge\n[pypi-url]: https:\u002F\u002Fpypi.org\u002Fproject\u002Ffastdup\u002F\n[pypiversion-shield]: https:\u002F\u002Fimg.shields.io\u002Fpypi\u002Fv\u002Ffastdup?style=for-the-badge&color=lightblue\n[downloads-shield]: https:\u002F\u002Fimg.shields.io\u002Fpepy\u002Fdt\u002Ffastdup?style=for-the-badge&color=success\n[downloads-url]: https:\u002F\u002Fpypi.org\u002Fproject\u002Ffastdup\u002F\n[contributors-shield]: https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fcontributors\u002Fvisual-layer\u002Ffastdup?style=for-the-badge&color=orange\n[contributors-url]: https:\u002F\u002Fgithub.com\u002Fothneildrew\u002FBest-README-Template\u002Fgraphs\u002Fcontributors\n[license-shield]: https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FLicense-CC%20BY--NC--ND%204.0-purple.svg?style=for-the-badge\n[license-url]: https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002FLICENSE\n[os-shield]: https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FSupported%20OS-macOS%20%7C%20Linux%20%7C%20Windows(WSL2)%20-yellow?style=for-the-badge\n[os-url]: https:\u002F\u002Fdocs.visual-layer.com\u002Fdocs\u002Finstalling-fastdup\n\n\n\n\n\u003Cp align=\"left\">\n  一款功能强大的开源工具，用于分析图像和视频数据集，由\u003Ccode>XGBoost\u003C\u002Fcode>、\u003Ccode>Apache TVM\u003C\u002Fcode> 和 \u003Ccode>Turi Create\u003C\u002Fcode> 的作者创立——\u003Ca href=\"https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Fdr-danny-bickson-835b32\">Danny Bickson\u003C\u002Fa>、\u003Ca href=\"https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Fcarlos-guestrin-5352a869\">Carlos Guestrin\u003C\u002Fa> 和 \u003Ca href=\"https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Famiralush\">Amir Alush\u003C\u002Fa>。\u003C\u002Fp>\n  \u003Chr>\n    \u003Ca href=\"https:\u002F\u002Fvisual-layer.readme.io\u002F\" target=\"_blank\" rel=\"noopener noreferrer\">文档\u003C\u002Fa>\n    ·\n    \u003Ca href=\"#features--advantages\" target=\"_blank\" rel=\"noopener noreferrer\">功能与优势\u003C\u002Fa>\n    ·\n    \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fissues\u002Fnew\u002Fchoose\" target=\"_blank\" rel=\"noopener noreferrer\">报告Bug\u003C\u002Fa>\n    ·\n    \u003Ca href=\"https:\u002F\u002Fmedium.com\u002Fvisual-layer\" target=\"_blank\" rel=\"noopener noreferrer\">博客\u003C\u002Fa>\n    ·\n    \u003Ca href=\"#getting-started\" target=\"_blank\" rel=\"noopener noreferrer\">快速入门\u003C\u002Fa>\n    ·\n    \u003Ca href=\"#visual-layer-cloud\" target=\"_blank\" rel=\"noopener noreferrer\">Visual Layer Cloud\u003C\u002Fa>\n    \u003Chr>\n\u003C\u002Fp>\n    \u003C!-- \u003Cbr \u002F>\n    \u003Cbr \u002F> \n    \u003Ca href=\"https:\u002F\u002Fdiscord.gg\u002FtkYHJCA7mb\" target=\"_blank\" rel=\"noopener noreferrer\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FDISCORD%20COMMUNITY-5865F2?style=for-the-badge&logo=discord&logoColor=white\" alt=\"Logo\">\n    \u003C\u002Fa>\n    \u003Ca href=\"https:\u002F\u002Fvisual-layer.readme.io\u002Fdiscuss\" target=\"_blank\" rel=\"noopener noreferrer\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FDISCUSSION%20FORUM-slateblue?style=for-the-badge&logo=discourse&logoWidth=20\" alt=\"Logo\">\n    \u003C\u002Fa>\n    \u003Ca href=\"https:\u002F\u002Fwww.linkedin.com\u002Fcompany\u002Fvisual-layer\u002F\" target=\"_blank\" rel=\"noopener noreferrer\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FLinkedIn-0077B5?style=for-the-badge&logo=linkedin&logoColor=white\" alt=\"Logo\">\n    \u003C\u002Fa>\n    \u003Ca href=\"https:\u002F\u002Ftwitter.com\u002Fvisual_layer\" target=\"_blank\" rel=\"noopener noreferrer\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FX%20(TWITTER)-000000?style=for-the-badge&logo=x&logoColor=white\" alt=\"Logo\">\n    \u003C\u002Fa>\n    \u003Ca href=\"https:\u002F\u002Fwww.youtube.com\u002F@visual-layer\" target=\"_blank\" rel=\"noopener noreferrer\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F-YouTube-black.svg?style=for-the-badge&logo=youtube&colorB=red\" alt=\"Logo\">\n    \u003C\u002Fa>\n  \u003Cbr \u002F>\n  \u003Cbr \u002F> -->\n\n## 快速入门\n\n通过 [PyPI](https:\u002F\u002Fpypi.org\u002Fproject\u002Ffastdup\u002F) 使用 `pip` 安装 fastdup：\n\n```bash\npip install fastdup\n```\n\n更多安装选项请参阅 [此处](https:\u002F\u002Fvisual-layer.readme.io\u002Fdocs\u002Finstallation)。\n\n初始化并运行 fastdup：\n```python\nimport fastdup\n\nfd = fastdup.create(input_dir=\"IMAGE_FOLDER\u002F\")\nfd.run()\n```\n\n只需一次调用即可从数据集中移除重复项：\n\n```python\nimport fastdup\nfastdup.remove_duplicates(\"IMAGE_FOLDER\u002F\")\n```\n此命令会直接在磁盘上查找并删除相似度超过默认阈值 0.96 的重复图像。使用 `dry_run=True` 可以预览将被删除的文件，并调整 `distance` 参数来控制相似度阈值。\n\n![run](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_0909bfd6639c.gif)\n\n在静态图库中可视化结果：\n\n```python\nfd.vis.duplicates_gallery()    # 重复图像图库\nfd.vis.outliers_gallery()      # 异常值图库\nfd.vis.component_gallery()     # 连通组件图库\nfd.vis.stats_gallery()         # 图像统计信息图库（如模糊度、亮度等）\nfd.vis.similarity_gallery()    # 相似图像图库\n```\n\n## 更多信息请查看此[快速入门教程](https:\u002F\u002Fyoutu.be\u002FGt46ciEIxtw)\nhttps:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Fassets\u002F738a329d-8063-4515-a961-f2527934a0ca\n\n\n## 功能与优势\nfastdup 可处理带标签或不带标签的图像或视频数据集，并提供一系列功能：\n\n\u003Cdiv align=\"center\" style=\"display:flex;flex-direction:column;\">\n  \u003Ca href=\"https:\u002F\u002Fwww.visual-layer.com\" target=\"_blank\" rel=\"noopener noreferrer\">\n    \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_91b8f5267035.png\" alt=\"fastdup\" width=\"1000\">\n  \u003C\u002Fa>\n \u003C\u002Fdiv>\n\n\nfastdup 与其他类似工具相比的独特之处在于：\n\n+ **质量**：高质量的分析能力，可识别重复\u002F近似重复、异常值、错误标签、损坏图像以及低质量图像。\n+ **规模**：高度可扩展，单台 CPU 机器即可处理 4 亿张图像，甚至可扩展至数十亿张。\n+ **速度**：优化的 C++ 引擎即使在资源有限的 CPU 设备上也能实现高性能。\n+ **隐私**：可在本地或您的云基础设施上运行，确保数据始终保留在您指定的位置。\n+ **易用性**：支持带标签或不带标签的图像及视频数据集，并兼容主流操作系统，如 macOS、Linux 和 Windows。\n\n\n## 通过示例学习\n通过交互式示例学习 fastdup 的基础知识。您可以在 GitHub 或 nbviewer 上查看这些笔记本，更好的是，您可以免费在 Google Colab 或 Kaggle 上运行它们。\n\n\u003Ctable>\n   \u003Ctr>\n      \u003Ctd rowspan=\"4\" width=\"160\">\n         \u003Ca href=\"https:\u002F\u002Fvisual-layer.readme.io\u002Fdocs\u002Fquickstart\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_38b7d3ddfff8.jpg\" width=\"200\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n      \u003Ctd rowspan=\"4\">\n         \u003Cb>⚡ 快速入门：\u003C\u002Fb>了解如何安装 fastdup、加载数据集，并分析其中可能存在的问题，例如重复\u002F近似重复样本、损坏的图像、异常值、过暗\u002F过亮\u002F模糊的图像，以及查看视觉上相似的图像簇。如果您是新手，请从这里开始！\n         \u003Cbr>\n         \u003Cbr>\n         \u003Cb>📌 数据集：\u003C\u002Fb> \u003Ca href=\"https:\u002F\u002Fwww.robots.ox.ac.uk\u002F~vgg\u002Fdata\u002Fpets\u002F\">牛津-IIIT 宠物数据集\u003C\u002Fa>。\n      \u003C\u002Ftd>\n      \u003Ctd align=\"center\" width=\"80\">\n         \u003Ca href=\"https:\u002F\u002Fnbviewer.org\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fquickstart.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_751f8ff96c7f.png\" height=\"30\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fquickstart.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_438c17272c5f.png\" height=\"25\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fquickstart.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_5a89a23c2924.png\" height=\"20\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fkaggle.com\u002Fkernels\u002Fwelcome?src=https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fquickstart.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_cc78b4156f67.png\" height=\"25\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003C!-- ------------------------------------------------------------------- -->\n   \u003Ctr>\n      \u003Ctd rowspan=\"4\" width=\"160\">\n         \u003Ca href=\"https:\u002F\u002Fvisual-layer.readme.io\u002Fdocs\u002Ffinding-removing-duplicates\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_61c836e24253.jpg\" width=\"200\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n      \u003Ctd rowspan=\"4\">\n         \u003Cb>🧹 查找并移除重复项：\u003C\u002Fb>学习如何分析图像数据集中的重复和近似重复样本。\n         \u003Cbr>\n         \u003Cbr>\n         \u003Cb>📌 数据集：\u003C\u002Fb> \u003Ca href=\"https:\u002F\u002Fwww.robots.ox.ac.uk\u002F~vgg\u002Fdata\u002Fpets\u002F\">牛津-IIIT 宠物数据集\u003C\u002Fa>。\n      \u003C\u002Ftd>\n      \u003Ctd align=\"center\" width=\"80\">\n         \u003Ca href=\"https:\u002F\u002Fnbviewer.org\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Ffinding-removing-duplicates.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_751f8ff96c7f.png\" height=\"30\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Ffinding-removing-duplicates.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_438c17272c5f.png\" height=\"25\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Ffinding-removing-duplicates.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_5a89a23c2924.png\" height=\"20\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fkaggle.com\u002Fkernels\u002Fwelcome?src=https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Ffinding-removing-duplicates.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_cc78b4156f67.png\" height=\"25\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003C!-- ------------------------------------------------------------------- -->\n   \u003Ctr>\n      \u003Ctd rowspan=\"4\" width=\"160\">\n         \u003Ca href=\"https:\u002F\u002Fvisual-layer.readme.io\u002Fdocs\u002Ffinding-removing-mislabels\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_a57099c3d42d.jpg\" width=\"200\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n      \u003Ctd rowspan=\"4\">\n         \u003Cb>🖼 查找并移除错误标签：\u003C\u002Fb>学习如何分析图像数据集中可能存在的标签错误，并导出错误标签图像列表以便进一步检查。\n         \u003Cbr>\n         \u003Cbr>\n         \u003Cb>📌 数据集：\u003C\u002Fb> \u003Ca href=\"https:\u002F\u002Fdata.vision.ee.ethz.ch\u002Fcvl\u002Fdatasets_extra\u002Ffood-101\u002F\">Food-101 数据集\u003C\u002Fa>。\n      \u003C\u002Ftd>\n      \u003Ctd align=\"center\" width=\"80\">\n         \u003Ca href=\"https:\u002F\u002Fnbviewer.org\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Ffinding-removing-mislabels.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_751f8ff96c7f.png\" height=\"30\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Ffinding-removing-mislabels.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_438c17272c5f.png\" height=\"25\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Ffinding-removing-mislabels.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_5a89a23c2924.png\" height=\"20\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fkaggle.com\u002Fkernels\u002Fwelcome?src=https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Ffinding-removing-mislabels.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_cc78b4156f67.png\" height=\"25\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003C!-- ------------------------------------------------------------------- -->\n   \u003Ctr>\n      \u003Ctd rowspan=\"4\" width=\"160\">\n         \u003Ca href=\"https:\u002F\u002Fvisual-layer.readme.io\u002Fdocs\u002Fimage-search\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_73be08df577d.jpg\" width=\"200\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n      \u003Ctd rowspan=\"4\">\n         \u003Cb>🎁 图像相似性搜索：\u003C\u002Fb>在大型图像数据集中执行图像搜索。\n         \u003Cbr>\n         \u003Cbr>\n         \u003Cb>📌 数据集：\u003C\u002Fb> \u003Ca href=\"https:\u002F\u002Fwww.kaggle.com\u002Fcompetitions\u002Fshopee-product-matching\u002Fdata\">Shopee 产品匹配数据集\u003C\u002Fa>。\n      \u003C\u002Ftd>\n      \u003Ctd align=\"center\" width=\"80\">\n         \u003Ca href=\"https:\u002F\u002Fnbviewer.org\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fimage-search.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_751f8ff96c7f.png\" height=\"30\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fimage-search.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_438c17272c5f.png\" height=\"25\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fimage-search.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_5a89a23c2924.png\" height=\"20\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fkaggle.com\u002Fkernels\u002Fwelcome?src=https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fimage-search.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_cc78b4156f67.png\" height=\"25\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003C!-- ------------------------------------------------------------------- -->\n   \u003Ctr>\n        \u003Ctd rowspan=\"4\" width=\"160\">\n            \u003Ca href=\"https:\u002F\u002Fvisual-layer.readme.io\u002Fdocs\u002Fhugging-face-datasets\">\n                \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_0a64090f5f6c.jpg\" width=\"200\" \u002F>\n            \u003C\u002Fa>\n        \u003C\u002Ftd>\n        \u003Ctd rowspan=\"4\">\u003Cb>🤗 Hugging Face 数据集：\u003C\u002Fb>加载并分析来自 \u003Ca href=\"https:\u002F\u002Fhuggingface.co\u002Fdatasets\">Hugging Face 数据集\u003C\u002Fa> 的数据集。如果您已经在 Hugging Face 中心托管了数据集，这将非常理想。\n        \u003C\u002Ftd>\n        \u003Ctd align=\"center\" width=\"80\">\n            \u003Ca href=\"https:\u002F\u002Fnbviewer.org\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fanalyzing-hf-datasets.ipynb\">\n                \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_751f8ff96c7f.png\" height=\"30\" \u002F>\n            \u003C\u002Fa>\n        \u003C\u002Ftd>\n    \u003C\u002Ftr>\n    \u003Ctr>\n        \u003Ctd align=\"center\">\n            \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fanalyzing-hf-datasets.ipynb\">\n                \u003Cimg src \"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_438c17272c5f.png\" height=\"25\" \u002F>\n            \u003C\u002Fa>\n        \u003C\u002Ftd>\n    \u003C\u002Ftr>\n    \u003Ctr>\n        \u003Ctd align=\"center\">\n            \u003Ca href=\"https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fanalyzing-hf-datasets.ipynb\">\n                \u003Cimg src \"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_5a89a23c2924.png\" height=\"20\" \u002F>\n            \u003C\u002Fa>\n        \u003C\u002Ftd>\n    \u003C\u002Ftr>\n    \u003Ctr>\n        \u003Ctd align=\"center\">\n            \u003Ca href=\"https:\u002F\u002Fkaggle.com\u002Fkernels\u002Fwelcome?src=https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fanalyzing-hf-datasets.ipynb\">\n                \u003Cimg src \"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_cc78b4156f67.png\" height=\"25\" \u002F>\n            \u003C\u002Fa>\n        \u003C\u002Ftd>\n    \u003C\u002Ftr>\n    \u003C!-- ------------------------------------------------------------------- -->\n   \u003Ctr>\n      \u003Ctd rowspan=\"4\" width=\"160\">\n         \u003Ca href=\"https:\u002F\u002Fvisual-layer.readme.io\u002Fdocs\u002Fembeddings-timm\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_e17a45b17521.jpg\" width=\"200\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n      \u003Ctd rowspan=\"4\">\n         \u003Cb> 🧠 TIMM 嵌入：\u003C\u002Fb>使用 \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fhuggingface\u002Fpytorch-image-models\">TIMM（PyTorch 图像模型）\u003C\u002Fa> 计算数据集嵌入，并在其上运行 fastdup 以发现数据集中的问题。支持 CPU 和 GPU。\n      \u003C\u002Ftd>\n      \u003Ctd align=\"center\" width=\"80\">\n         \u003Ca href=\"https:\u002F\u002Fnbviewer.org\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fembeddings-timm.ipynb\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_751f8ff96c7f.png\" height=\"30\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fembeddings-timm.ipynb\">\n         \u003Cimg src \"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_438c17272c5f.png\" height=\"25\" \u002F>\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fembeddings-timm.ipynb\">\n         \u003Cimg src \"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_5a89a23c2924.png\" height=\"20\" \u002F>\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fkaggle.com\u002Fkernels\u002Fwelcome?src=https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fembeddings-timm.ipynb\">\n         \u003Cimg src \"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_cc78b4156f67.png\" height=\"25\" \u002F>\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003C!-- ------------------------------------------------------------------- -->\n   \u003Ctr>\n      \u003Ctd rowspan=\"4\" width=\"160\">\n         \u003Ca href=\"https:\u002F\u002Fvisual-layer.readme.io\u002Fdocs\u002Fgetting-started\">\n         \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_771349d50d86.jpg\" width=\"200\">\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n      \u003Ctd rowspan=\"4\">\n         \u003Cb>🦖 ONNX 嵌入：\u003C\u002Fb>使用您自己的 ONNX 模型。在本示例中，我们使用 \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002Fdinov2\">DINOv2\u003C\u002Fa> 模型提取图像的特征向量。仅支持 CPU。\n      \u003C\u002Ftd>\n      \u003Ctd align=\"center\" width=\"80\">\n         \u003Ca href=\"https:\u002F\u002Fnbviewer.org\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fembeddings-onnx-dinov2.ipynb\">\n         \u003Cimg src \"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_751f8ff96c7f.png\" height=\"30\" \u002F>\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fembeddings-onnx-dinov2.ipynb\">\n         \u003Cimg src \"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_438c17272c5f.png\" height=\"25\" \u002F>\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fcolab.research.google.com\u002Fgithub\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fembeddings-onnx-dinov2.ipynb\">\n         \u003Cimg src \"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_5a89a23c2924.png\" height=\"20\" \u002F>\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003Ctr>\n      \u003Ctd align=\"center\">\n         \u003Ca href=\"https:\u002F\u002Fkaggle.com\u002Fkernels\u002Fwelcome?src=https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fblob\u002Fmain\u002Fexamples\u002Fembeddings-onnx-dinov2.ipynb\">\n         \u003Cimg src \"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_cc78b4156f67.png\" height=\"25\" \u002F>\n         \u003C\u002Fa>\n      \u003C\u002Ftd>\n   \u003C\u002Ftr>\n   \u003C!-- ------------------------------------------------------------------- -->\n\u003C\u002Ftable>\n\n查看更多[示例](EXAMPLES.md)。\n\n\n\n\n## 加入社区\n\n通过以下渠道从 fastdup 团队或社区成员处获得帮助：\n\n\u003Ca href=\"https:\u002F\u002Fdiscord.gg\u002FtkYHJCA7mb\" target=\"_blank\" rel=\"noopener noreferrer\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FDISCORD%20COMMUNITY-5865F2?style=for-the-badge&logo=discord&logoColor=white\" alt=\"Logo\">\n\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fvisual-layer.readme.io\u002Fdiscuss\" target=\"_blank\" rel=\"noopener noreferrer\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FDISCUSSION%20FORUM-slateblue?style=for-the-badge&logo=discourse&logoWidth=20\" alt=\"Logo\">\n\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fissues\u002Fnew\u002Fchoose\" target=\"_blank\" rel=\"noopener noreferrer\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Fissues\u002Fvisual-layer\u002Ffastdup?style=for-the-badge&logo=github&logoColor=white\" alt=\"GitHub Issues\">\n\u003C\u002Fa>\n\n\n社区贡献的关于 fastdup 的博客文章：\n\n\u003Ctable>\n  \u003Ctr>\n    \u003Ctd>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_e1f35466a4c1.jpg\" width=\"200\">\u003C\u002Ftd>\n    \u003Ctd>\n      \u003Ca href=\"https:\u002F\u002Fmedium.com\u002F@atahanbulus.w\u002Fdeploying-aws-lambda-functions-with-docker-container-by-using-custom-base-image-2d110d307f9b\">使用自定义基础镜像通过 Docker 容器部署 AWS Lambda 函数\u003C\u002Fa>\u003Cbr>\n      🖋️ \u003Ca href=\"https:\u002F\u002Fmedium.com\u002F@atahanbulus.w\">atahan bulus\u003C\u002Fa> &nbsp;&nbsp;&nbsp;•&nbsp;&nbsp;&nbsp; 🗓 16 September 2023\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_07bb9a439a5f.jpg\" width=\"200\">\u003C\u002Ftd>\n    \u003Ctd>\n      \u003Ca href=\"https:\u002F\u002Fmedium.com\u002F@daniel-klitzke\u002Fcleaning-image-classification-datasets-with-fastdup-and-renumics-spotlight-e68deb4730a3\">Renumics：使用 fastdup 和 Renumics Spotlight 清理图像分类数据集\u003C\u002Fa>\u003Cbr>\n      🖋️ \u003Ca href=\"https:\u002F\u002Fmedium.com\u002F@daniel-klitzke\">Daniel Klitzke\u003C\u002Fa> &nbsp;&nbsp;&nbsp;•&nbsp;&nbsp;&nbsp; 🗓 4 September 2023\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_ae6d132b0778.jpg\" width=\"200\">\u003C\u002Ftd>\n    \u003Ctd>\n      \u003Ca href=\"https:\u002F\u002Fblog.roboflow.com\u002Fhow-to-reduce-dataset-size-computer-vision\u002F\">Roboflow：如何在不损失精度的情况下减少数据集大小\u003C\u002Fa>\u003Cbr>\n      🖋️ \u003Ca href=\"https:\u002F\u002Fblog.roboflow.com\u002Fauthor\u002Farty\u002F\">Arty Ariuntuya\u003C\u002Fa> &nbsp;&nbsp;&nbsp;•&nbsp;&nbsp;&nbsp; 🗓 9 August 2023\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_6916e98019e1.jpg\" width=\"200\">\u003C\u002Ftd>\n    \u003Ctd>\n      \u003Ca href=\"https:\u002F\u002Falexlanseedoo.medium.com\u002Fthe-weighty-significance-of-data-cleanliness-eb03dce1d0f8\">数据清洁的重要性——或者正如我所喜欢称呼的那样，“清洁度仅次于模型性能”——怎么强调都不为过。\u003C\u002Fa>\u003Cbr>\n      🖋️ \u003Ca href=\"https:\u002F\u002Falexlanseedoo.medium.com\u002F\">Alexander Lan\u003C\u002Fa> &nbsp;&nbsp;&nbsp;•&nbsp;&nbsp;&nbsp; 🗓 9 March 2023\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_f20aa0dd00d8.gif\" width=\"200\">\u003C\u002Ftd>\n    \u003Ctd>\n      \u003Ca href=\"https:\u002F\u002Fdicksonneoh.com\u002Fblog\u002Fclean_up_your_digital_life\u002F\">清理你的数字生活：我如何在几分钟内免费找到 1929 张完全相同的图片、暗光、过亮和模糊的照片。\u003C\u002Fa>\u003Cbr>\n      🖋️ \u003Ca href=\"https:\u002F\u002Fmedium.com\u002F@dickson.neoh\">Dickson Neoh\u003C\u002Fa> &nbsp;&nbsp;&nbsp;•&nbsp;&nbsp;&nbsp; 🗓 23 February 2023\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_bb407b08e310.gif\" width=\"200\">\u003C\u002Ftd>\n    \u003Ctd>\n      \u003Ca href=\"https:\u002F\u002Fdicksonneoh.com\u002Fportfolio\u002Ffastdup_manage_clean_curate\u002F\">fastdup：一款强大的工具，可在您的 CPU 上免费大规模管理、清洗和整理视觉数据。\u003C\u002Fa>\u003Cbr>\n      🖋️ \u003Ca href=\"https:\u002F\u002Fmedium.com\u002F@dickson.neoh\">Dickson Neoh\u003C\u002Fa> &nbsp;&nbsp;&nbsp;•&nbsp;&nbsp;&nbsp; 🗓 3 January 2023\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n  \u003Ctr>\n    \u003Ctd>\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_061fbcf2487a.jpg\" width=\"200\">\u003C\u002Ftd>\n    \u003Ctd>\n      \u003Ca href=\"https:\u002F\u002Ftowardsdatascience.com\u002Fmaster-data-integrity-to-clean-your-computer-vision-datasets-df432cf9e596\">掌握数据完整性，清理你的计算机视觉数据集。\u003C\u002Fa>\u003Cbr>\n      🖋️ \u003Ca href=\"https:\u002F\u002Fpauliusztin.medium.com\u002F\">Paul lusztin\u003C\u002Fa> &nbsp;&nbsp;&nbsp;•&nbsp;&nbsp;&nbsp; 🗓 19 December 2022\n    \u003C\u002Ftd>\n  \u003C\u002Ftr>\n\u003C\u002Ftable>\n\n\n我们的用户怎么说：\n\n![feedback](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_a7b80c8dd97f.jpg)\n\n![feedback2](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_readme_635eecb37bc6.png)\n\n## Visual Layer Cloud\nVisual Layer 提供用于大规模管理、清洗和整理视觉数据的商业服务。\n\n[免费注册](https:\u002F\u002Fapp.visual-layer.com?utm_source=fastdup_readme)。\n\n\nhttps:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fassets\u002F6821286\u002F57f13d77-0ac4-4c74-8031-07fae87c5b00\n\n还不太相信？无需注册即可与 Visual Layer Cloud 的[公共数据集](https:\u002F\u002Fapp.visual-layer.com\u002Fvl-datasets?utm_source=fastdup_readme)互动。\n\n## 免责声明\n\u003Cdetails>\n  \u003Csummary>\u003Cb>使用情况跟踪\u003C\u002Fb>\u003C\u002Fsummary>\n\n我们已添加了一个基于 [Sentry](https:\u002F\u002Fgithub.com\u002Fgetsentry\u002F) 的实验性崩溃报告收集功能。\n\n我们**不会**收集用户特定的信息，例如文件夹名称、用户名、图像名称、图像内容等。我们会收集与 fastdup 内部运行和性能统计相关的信息，如图像总数、每张图像的平均运行时间、总可用内存、总可用磁盘空间、核心数等。\n\n这有助于我们识别并解决稳定性问题，从而提高 fastdup 的整体可靠性。数据收集代码位于[此处](.\u002Ffastdup\u002Fsentry.py)。在 MAC 系统上，我们使用 [Google crashpad](https:\u002F\u002Fchromium.googlesource.com\u002Fcrashpad\u002Fcrashpad) 来报告崩溃。\n\n用户可以通过以下任一方式选择退出实验性崩溃报告系统：\n- 定义名为 `SENTRY_OPT_OUT` 的环境变量\n- 或者在调用 `run()` 时设置 `turi_param='run_sentry=0'`\n\n\u003C\u002Fdetails>\n\n## 许可证\nfastdup 根据 [知识共享署名-非商业性使用-禁止演绎 4.0 国际](https:\u002F\u002Fcreativecommons.org\u002Flicenses\u002Fby-nc-nd\u002F4.0\u002F) 公共许可证进行许可。\n\n如需了解更多关于许可证的信息或有任何疑问，请发送邮件至 info@visual-layer.com 或参阅 [LICENSE](.\u002FLICENSE) 文件。\n\n\n\u003Cdiv align=\"right\">\u003Ca href=\"#top\">🔝 返回顶部\u003C\u002Fa>\u003C\u002Fdiv>","# fastdup 快速上手指南\n\nfastdup 是一款由 XGBoost、Apache TVM 和 Turi Create 作者团队开发的高性能开源工具，专为大规模图像和视频数据集的分析、清洗和管理而设计。它能快速识别重复项、异常值、错误标签及低质量图片。\n\n## 环境准备\n\n在开始之前，请确保您的开发环境满足以下要求：\n\n*   **操作系统**：支持 macOS、Linux 或 Windows (需使用 WSL2)。\n*   **Python 版本**：3.8, 3.9, 3.10 或 3.11。\n*   **硬件建议**：基于优化的 C++ 引擎，fastdup 即使在低资源 CPU 机器上也能高效运行，支持单机处理数亿级图像。\n\n## 安装步骤\n\n推荐使用 `pip` 进行安装。国内开发者若遇到下载速度慢的问题，可使用清华或阿里云镜像源加速安装。\n\n**标准安装：**\n```bash\npip install fastdup\n```\n\n**使用国内镜像源加速安装（推荐）：**\n```bash\npip install fastdup -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n```\n\n## 基本使用\n\n### 1. 初始化并运行分析\n\n创建一个 Python 脚本或直接在交互式环境中运行以下代码，将 `IMAGE_FOLDER\u002F` 替换为您的数据集路径：\n\n```python\nimport fastdup\n\n# 创建 fastdup 对象并指定输入目录\nfd = fastdup.create(input_dir=\"IMAGE_FOLDER\u002F\")\n\n# 执行分析\nfd.run()\n```\n\n### 2. 一键去重\n\n如果您只想快速查找并删除数据集中的重复图片（默认相似度阈值 > 0.96），可以使用单行命令：\n\n```python\nimport fastdup\n# 直接删除重复文件\nfastdup.remove_duplicates(\"IMAGE_FOLDER\u002F\")\n```\n\n**安全预览模式**：在实际删除前，建议先使用 `dry_run=True` 预览将被删除的文件列表：\n\n```python\n# 仅预览，不实际删除\nfastdup.remove_duplicates(\"IMAGE_FOLDER\u002F\", dry_run=True)\n```\n\n您还可以通过调整 `distance` 参数来控制相似度阈值。\n\n### 3. 可视化分析结果\n\n分析完成后，fastdup 可以生成多种静态画廊以直观展示数据问题：\n\n```python\n# 查看重复图片画廊\nfd.vis.duplicates_gallery()\n\n# 查看异常值画廊\nfd.vis.outliers_gallery()\n\n# 查看连通分量画廊\nfd.vis.component_gallery()\n\n# 查看图像统计信息画廊（如模糊度、亮度等）\nfd.vis.stats_gallery()\n\n# 查看相似图片聚类画廊\nfd.vis.similarity_gallery()\n```\n\n运行上述代码后，您将在本地看到生成的 HTML 报告，方便进一步审查和清洗数据。","某自动驾驶初创公司的数据团队正面临从路测车辆回传的 50 万张道路图像数据的清洗难题，急需在模型训练前剔除无效样本并修正错误标注。\n\n### 没有 fastdup 时\n- 工程师需编写繁琐的脚本逐张抽检，耗时两周才勉强发现部分模糊或过曝的废片，大量低质数据混入训练集导致模型收敛缓慢。\n- 标注团队人工复核成本极高，难以察觉那些“看起来正常但标签错误”的隐蔽样本（如将卡车误标为轿车），严重干扰模型判断逻辑。\n- 数据集中存在大量近乎重复的连续帧，不仅浪费了宝贵的 GPU 算力，还导致模型对特定场景过拟合，泛化能力差。\n- 缺乏全局数据洞察，团队无法快速定位长尾分布问题，只能凭经验盲目扩充数据，迭代周期被无限拉长。\n\n### 使用 fastdup 后\n- fastdup 在数小时内自动扫描全量数据，精准识别并隔离了模糊、黑暗及损坏图像，使训练集纯净度提升 40%，模型训练速度显著加快。\n- 通过异常检测功能，fastdup 迅速锁定了数千张标签与图像内容不符的样本，团队得以针对性修正，大幅提升了模型的识别准确率。\n- 工具自动聚类并去除了高度相似的冗余帧，在保持数据多样性的同时将数据集体积缩减 30%，直接降低了云端的存储与计算成本。\n- 生成的可视化报告清晰展示了数据分布短板，指导团队有的放矢地采集稀缺场景数据，将算法迭代周期从数周缩短至几天。\n\nfastdup 将原本需要数周的人工数据治理工作压缩至小时级，让团队能以最低成本构建高质量视觉数据集，从而加速核心算法的落地进程。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fvisual-layer_fastdup_91b8f526.png","visual-layer","Visual Layer","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fvisual-layer_c1c75061.png","",null,"github@visual-layer.com","www.visual-layer.com","https:\u002F\u002Fgithub.com\u002Fvisual-layer",[81,85],{"name":82,"color":83,"percentage":84},"Python","#3572A5",99.9,{"name":86,"color":87,"percentage":88},"Dockerfile","#384d54",0.1,1848,88,"2026-04-16T11:17:29","NOASSERTION",1,"Linux, macOS, Windows (WSL2)","非必需。工具基于优化的 C++ 引擎，专为在低资源 CPU 机器上高性能运行而设计。","未说明（文档提及单台 CPU 机器可处理 4 亿张图像，暗示内存效率极高）",{"notes":98,"python":99,"dependencies":100},"该工具由 XGBoost、Apache TVM 和 Turi Create 的作者开发。支持本地运行或部署在用户自己的云基础设施上以保护数据隐私。可通过 pip 直接安装。能够处理标记或未标记的图像及视频数据集，用于查找重复项、异常值、错误标签和低质量图像。","3.8, 3.9, 3.10, 3.11",[],[15,16,14],[103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121],"data-curation","dataset","deep-learning","image-duplicate-detection","machine-learning","novelty-detection","object-detection","outlier-detection","python","visual-search","data-augmentation","image-classification","image","image-classfication","image-processing","visualization-tools","image-analysis","visualization","image-similarity","2026-03-27T02:49:30.150509","2026-04-17T08:25:18.618447",[125,130,135,140,145,149],{"id":126,"question_zh":127,"answer_zh":128,"source_url":129},37103,"在 CentOS 系统上安装 fastdup 失败，提示找不到匹配的版本或共享库，如何解决？","这通常是因为系统无法自动找到 fastdup 的共享库路径。解决方法是手动设置 LD_LIBRARY_PATH 环境变量。请在终端执行以下命令（请根据实际安装路径调整）：\nexport LD_LIBRARY_PATH=\u002Fpath\u002Fto\u002Fvenv\u002Flib\u002Fpython3.8\u002Fsite-packages\u002Ffastdup\u002Flib\n设置完成后，在同一终端窗口中重新运行 Python 脚本即可正常加载库文件。","https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fissues\u002F8",{"id":131,"question_zh":132,"answer_zh":133,"source_url":134},37104,"运行 fastdup 时出现 'Failed to initialize ort model' 错误或内核崩溃（Kernel Crash），该怎么办？","如果在日志中看到关于 ONNX 模型初始化失败的错误，或者遇到类似 'Error: failed to set cpu affinity error code 2' 的警告，通常可以忽略。维护者已确认这些错误在某些环境下已被改为警告信息，不影响程序正常运行。如果程序在报错后继续执行并输出结果，则无需担心；若程序完全卡死，请尝试减少线程数或在纯终端环境中运行以排查是否是 Jupyter 内核兼容性问题。","https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fissues\u002F317",{"id":136,"question_zh":137,"answer_zh":138,"source_url":139},37105,"在 macOS 13.x (Ventura) 的 Intel CPU 机器上无法通过 pip 安装 fastdup 怎么办？","目前 fastdup 已不再支持 macOS 上的 Intel CPU 架构。如果您使用的是 Intel 芯片的 Mac，将无法通过 pip 直接安装该库。建议更换为 Apple Silicon (M1\u002FM2\u002FM3) 设备的 Mac，或使用 Linux\u002FWindows 环境进行部署。","https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fissues\u002F214",{"id":141,"question_zh":142,"answer_zh":143,"source_url":144},37106,"处理大量图片（如 10 万 +）时出现 'buffer overflow detected' 或 'Canceled future' 错误，如何修复？","在处理大规模数据集时出现此类内存或线程错误，可以通过限制并发线程数来解决。在调用 fd.run() 时，显式设置 num_threads 参数为较小的值（例如 4 或更低）：\nfd.run(num_threads=4, threshold=0.96, compute=\"cpu\", verbose=True)\n降低线程数可以减少内存峰值占用，避免缓冲区溢出导致的程序崩溃。","https:\u002F\u002Fgithub.com\u002Fvisual-layer\u002Ffastdup\u002Fissues\u002F94",{"id":146,"question_zh":147,"answer_zh":148,"source_url":144},37107,"运行时提示 'the system cannot find the path' 是否意味着程序出错了？","不需要担心，'the system cannot find the path' 只是一个警告信息而非致命错误。程序在打印该警告后通常会继续正常运行。您可以忽略此消息，继续执行后续的图片去重或分析任务。",{"id":150,"question_zh":151,"answer_zh":152,"source_url":129},37108,"fastdup 是否支持 Python 3.10 版本？","是的，fastdup 支持 Python 3.10。如果在 CentOS 等特定系统上遇到问题，请确保使用了正确的虚拟环境，并参考相关 Issue 中关于设置 LD_LIBRARY_PATH 的解决方案。维护者也在持续发布适配新 Python 版本的安装包。",[154,159,163,168,172,176,180,184,188,192,196,201,205,209,213,217,221,225,229,233],{"id":155,"version":156,"summary_zh":157,"released_at":158},297590,"v2.2_3.7","当前","2024-06-14T09:05:59",{"id":160,"version":161,"summary_zh":157,"released_at":162},297591,"v2.2_3.8","2024-06-14T09:05:57",{"id":164,"version":165,"summary_zh":166,"released_at":167},297592,"v1.119","快叠1.119","2024-04-04T03:50:42",{"id":169,"version":170,"summary_zh":157,"released_at":171},297593,"v1.65_3.7","2023-11-03T08:26:54",{"id":173,"version":174,"summary_zh":157,"released_at":175},297594,"v1.65_3.8","2023-11-03T08:26:51",{"id":177,"version":178,"summary_zh":157,"released_at":179},297595,"v1.53_3.7","2023-10-21T17:32:29",{"id":181,"version":182,"summary_zh":157,"released_at":183},297596,"v1.53_3.8","2023-10-21T17:32:26",{"id":185,"version":186,"summary_zh":157,"released_at":187},297597,"v1.44_3.7","2023-09-30T08:13:40",{"id":189,"version":190,"summary_zh":157,"released_at":191},297598,"v1.44_3.8","2023-09-30T08:13:37",{"id":193,"version":194,"summary_zh":157,"released_at":195},297599,"v1.43_3.7","2023-09-21T04:29:06",{"id":197,"version":198,"summary_zh":199,"released_at":200},297600,"v1.43_3.8","Current","2023-09-21T04:29:03",{"id":202,"version":203,"summary_zh":199,"released_at":204},297601,"v1.41_3.7","2023-09-17T04:33:38",{"id":206,"version":207,"summary_zh":199,"released_at":208},297602,"v1.41_3.8","2023-09-17T04:33:35",{"id":210,"version":211,"summary_zh":199,"released_at":212},297603,"v1.39_3.7","2023-09-10T08:27:52",{"id":214,"version":215,"summary_zh":199,"released_at":216},297604,"v1.39_3.8","2023-09-10T08:27:49",{"id":218,"version":219,"summary_zh":199,"released_at":220},297605,"v1.38_3.7","2023-09-02T04:51:36",{"id":222,"version":223,"summary_zh":199,"released_at":224},297606,"v1.38_3.8","2023-09-02T04:51:33",{"id":226,"version":227,"summary_zh":199,"released_at":228},297607,"v1.36_3.7","2023-08-16T11:07:33",{"id":230,"version":231,"summary_zh":199,"released_at":232},297608,"v1.36_3.8","2023-08-16T11:07:30",{"id":234,"version":235,"summary_zh":199,"released_at":236},297609,"v1.34_3.7","2023-08-07T18:56:59"]