[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-Hello-SimpleAI--chatgpt-comparison-detection":3,"tool-Hello-SimpleAI--chatgpt-comparison-detection":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",143909,2,"2026-04-07T11:33:18",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",107888,"2026-04-06T11:32:50",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},4721,"markitdown","microsoft\u002Fmarkitdown","MarkItDown 是一款由微软 AutoGen 团队打造的轻量级 Python 工具，专为将各类文件高效转换为 Markdown 格式而设计。它支持 PDF、Word、Excel、PPT、图片（含 OCR）、音频（含语音转录）、HTML 乃至 YouTube 链接等多种格式的解析，能够精准提取文档中的标题、列表、表格和链接等关键结构信息。\n\n在人工智能应用日益普及的今天，大语言模型（LLM）虽擅长处理文本，却难以直接读取复杂的二进制办公文档。MarkItDown 恰好解决了这一痛点，它将非结构化或半结构化的文件转化为模型“原生理解”且 Token 效率极高的 Markdown 格式，成为连接本地文件与 AI 分析 pipeline 的理想桥梁。此外，它还提供了 MCP（模型上下文协议）服务器，可无缝集成到 Claude Desktop 等 LLM 应用中。\n\n这款工具特别适合开发者、数据科学家及 AI 研究人员使用，尤其是那些需要构建文档检索增强生成（RAG）系统、进行批量文本分析或希望让 AI 助手直接“阅读”本地文件的用户。虽然生成的内容也具备一定可读性，但其核心优势在于为机器",93400,"2026-04-06T19:52:38",[52,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":10,"last_commit_at":59,"category_tags":60,"status":17},4487,"LLMs-from-scratch","rasbt\u002FLLMs-from-scratch","LLMs-from-scratch 是一个基于 PyTorch 的开源教育项目，旨在引导用户从零开始一步步构建一个类似 ChatGPT 的大型语言模型（LLM）。它不仅是同名技术著作的官方代码库，更提供了一套完整的实践方案，涵盖模型开发、预训练及微调的全过程。\n\n该项目主要解决了大模型领域“黑盒化”的学习痛点。许多开发者虽能调用现成模型，却难以深入理解其内部架构与训练机制。通过亲手编写每一行核心代码，用户能够透彻掌握 Transformer 架构、注意力机制等关键原理，从而真正理解大模型是如何“思考”的。此外，项目还包含了加载大型预训练权重进行微调的代码，帮助用户将理论知识延伸至实际应用。\n\nLLMs-from-scratch 特别适合希望深入底层原理的 AI 开发者、研究人员以及计算机专业的学生。对于不满足于仅使用 API，而是渴望探究模型构建细节的技术人员而言，这是极佳的学习资源。其独特的技术亮点在于“循序渐进”的教学设计：将复杂的系统工程拆解为清晰的步骤，配合详细的图表与示例，让构建一个虽小但功能完备的大模型变得触手可及。无论你是想夯实理论基础，还是为未来研发更大规模的模型做准备",90106,"2026-04-06T11:19:32",[35,15,13,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":67,"readme_en":68,"readme_zh":69,"quickstart_zh":70,"use_case_zh":71,"hero_image_url":72,"owner_login":73,"owner_name":74,"owner_avatar_url":75,"owner_bio":76,"owner_company":77,"owner_location":77,"owner_email":77,"owner_twitter":77,"owner_website":77,"owner_url":78,"languages":79,"stars":88,"forks":89,"last_commit_at":90,"license":77,"difficulty_score":91,"env_os":92,"env_gpu":93,"env_ram":93,"env_deps":94,"category_tags":99,"github_topics":100,"view_count":32,"oss_zip_url":77,"oss_zip_packed_at":77,"status":17,"created_at":115,"updated_at":116,"faqs":117,"releases":118},5229,"Hello-SimpleAI\u002Fchatgpt-comparison-detection","chatgpt-comparison-detection","Human ChatGPT Comparison Corpus (HC3), Detectors, and more! 🔥","chatgpt-comparison-detection 是一个专注于识别 AI 生成内容并促进人机对比研究的开源项目。它核心解决了“如何区分文本是由人类专家撰写还是由 ChatGPT 生成”这一难题，为应对日益逼真的 AI 内容提供了技术依据。\n\n该项目最大的亮点是构建了首个“人类 vs. ChatGPT\"对比语料集（HC3），涵盖英文和中文双语种，涉及医疗、金融、法律等多个专业领域。基于这套高质量数据，项目提供了三种不同场景的检测器模型：包括针对特定问题的问答版检测、无需上下文的独立文本检测，以及多答案对比检测。这些工具均支持中英文输入，并能通过 Hugging Face 空间直接体验。\n\nchatgpt-comparison-detection 非常适合人工智能研究人员、自然语言处理开发者以及对内容真实性有鉴别需求的专业人士使用。研究者可以利用 HC3 数据集深入分析大模型与人类表达的差异，训练更鲁棒的分类模型；开发者则能调用其检测接口，集成到内容审核或教育辅助系统中。对于普通用户而言，它也是一个直观了解当前 AI 写作水平与局限性的窗口。作为一个学术导向的开源项目，它不仅提供了","chatgpt-comparison-detection 是一个专注于识别 AI 生成内容并促进人机对比研究的开源项目。它核心解决了“如何区分文本是由人类专家撰写还是由 ChatGPT 生成”这一难题，为应对日益逼真的 AI 内容提供了技术依据。\n\n该项目最大的亮点是构建了首个“人类 vs. ChatGPT\"对比语料集（HC3），涵盖英文和中文双语种，涉及医疗、金融、法律等多个专业领域。基于这套高质量数据，项目提供了三种不同场景的检测器模型：包括针对特定问题的问答版检测、无需上下文的独立文本检测，以及多答案对比检测。这些工具均支持中英文输入，并能通过 Hugging Face 空间直接体验。\n\nchatgpt-comparison-detection 非常适合人工智能研究人员、自然语言处理开发者以及对内容真实性有鉴别需求的专业人士使用。研究者可以利用 HC3 数据集深入分析大模型与人类表达的差异，训练更鲁棒的分类模型；开发者则能调用其检测接口，集成到内容审核或教育辅助系统中。对于普通用户而言，它也是一个直观了解当前 AI 写作水平与局限性的窗口。作为一个学术导向的开源项目，它不仅提供了实用的检测工具，更推动了社区对 AI 生成内容伦理与技术的透明化探讨。","# ChatGPT-Comparison-Detection Project 🔬\n\n![](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FLanguages-%20English%2C%20Chinese-brightgreen) \n![](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FChatGPT-Corpus%2C%20Detector-blue)\n\nOfficial repository of paper [\"How Close is ChatGPT to Human Experts? Comparison Corpus, Evaluation, and Detection\"](https:\u002F\u002Farxiv.org\u002Fabs\u002F2301.07597). Please star, watch, and fork our repo for the active updates!\n\nSee also→([📢 Feedback Space for Detectors](https:\u002F\u002Fgithub.com\u002FHello-SimpleAI\u002Fchatgpt-comparison-detection\u002Fdiscussions\u002F2) please feel free to leave your feedback here! 请留下您宝贵的意见！)\n\n\n\n\u003Cimg width=\"600\" alt=\"image\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FHello-SimpleAI_chatgpt-comparison-detection_readme_c74c0ac754b1.png\">\n\n---\n### Human ChatGPT Comparison Corpus (HC3) \u002F 人类-ChatGPT 问答对比语料集\nYes, we propose the first **Human vs. ChatGPT** comparison corpus, named **HC3**.\n\n我们提出了第一个 **Human vs. ChatGPT** 对比语料, 叫做 **HC3**.\n\n\u003Cimg width=\"520\" alt=\"image\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FHello-SimpleAI_chatgpt-comparison-detection_readme_0c7627b646c0.png\">\n\nThe first version of the HC3 datasets are now available on 🤗 Huggingface Datasets:\n- [HC3-English](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FHello-SimpleAI\u002FHC3)\n- [HC3-Chinese](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FHello-SimpleAI\u002FHC3-Chinese)\n\n\n在中文社区，HC3 数据集也已在 ModelScope 上可用:\n- [HC3-English](https:\u002F\u002Fwww.modelscope.cn\u002Fdatasets\u002Fsimpleai\u002FHC3)\n- [HC3-Chinese](https:\u002F\u002Fwww.modelscope.cn\u002Fdatasets\u002Fsimpleai\u002FHC3-Chinese)\n\n\n> Train\u002FTest splits & filtered versions of the paper, ref to Google Drive links in [HC3\u002FREADME.md](HC3\u002FREADME.md).\n\n### Dataset Copyright\n\nIf the source datasets used in this corpus has a specific license which is stricter than CC-BY-SA, our products follow the same.\nIf not, they follow CC-BY-SA license.\n\n| English Split       | Source | Source License | Note |\n|----------|-------------|--------|-------------|\n| reddit_eli5 | [ELI5](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002FELI5)   | BSD License    |     |\n| open_qa  | [WikiQA](https:\u002F\u002Fwww.microsoft.com\u002Fen-us\u002Fdownload\u002Fdetails.aspx?id=52419)  | [PWC Custom](https:\u002F\u002Fpaperswithcode.com\u002Fdatasets\u002Flicense)   |      |\n| wiki_csai   | Wikipedia | CC-BY-SA |   | [Wiki FAQ](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FWikipedia:FAQ\u002FCopyright) |\n| medicine    | [Medical Dialog](https:\u002F\u002Fgithub.com\u002FUCSD-AI4H\u002FMedical-Dialogue-System) | Unknown|  [Asking](https:\u002F\u002Fgithub.com\u002FUCSD-AI4H\u002FMedical-Dialogue-System\u002Fissues\u002F10)|\n| finance     | [FiQA](https:\u002F\u002Fpaperswithcode.com\u002Fdataset\u002Ffiqa-1) | Unknown |  Asking by 📧  |\n\n| Chinese Split       | Source | Source License  | Note |\n|----------|-------------|-----------|-------------|\n| open_qa  | [WebTextQA & BaikeQA](https:\u002F\u002Fgithub.com\u002Fbrightmart\u002Fnlp_chinese_corpus) | MIT license |  |  |\n| baike     | Baidu Baike  | None   |    |   |\n| nlpcc_dbqa  | [NLPCC-DBQA](https:\u002F\u002Fgithub.com\u002Fmsra-nlc\u002FChineseDBQA) | Unknown |   [Asking](https:\u002F\u002Fgithub.com\u002FUCSD-AI4H\u002FMedical-Dialogue-System\u002Fissues\u002F10) |\n| medicine    | [Chinese Medical Dialogue](https:\u002F\u002Ftianchi.aliyun.com\u002Fdataset\u002F90163) |  CC-BY-NC 4.0 | \n| finance     | [FinanceZhidao](https:\u002F\u002Fwww.heywhale.com\u002Fmw\u002Fdataset\u002F5e9588f8e7ec38002d0331b1\u002Fcontent) | CC-BY 4.0 |  |\n| psychology  | [On Baidu AI Studio](https:\u002F\u002Faistudio.baidu.com\u002Faistudio\u002Fdatasetdetail\u002F38489) | CC0  | |\n|law          | [LegalQA](https:\u002F\u002Fgithub.com\u002Fsiatnlp\u002FLegalQA) | Unknown | [Asking](https:\u002F\u002Fgithub.com\u002Fsiatnlp\u002FLegalQA\u002Fissues\u002F2) |\n\n\n---\n\n### ChatGPT detectors \u002F 内容检测器\n![image](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FHello-SimpleAI_chatgpt-comparison-detection_readme_5695a28ab9a6.png)\n(Hosted on 🤗 Hugging Face Spaces)\n\n\nWe provide three kinds of detectors, all in Bilingual \u002F 我们提供了三个版本的检测器，且都支持中英文:\n- [QA version \u002F 问答版](https:\u002F\u002Fhuggingface.co\u002Fspaces\u002FHello-SimpleAI\u002Fchatgpt-detector-qa): detect whether an **answer** is generated by ChatGPT for certain **question**, using PLM-based classifiers \u002F 判断某个**问题的回答**是否由ChatGPT生成，使用基于PTM的分类器来开发;\n- [Sinlge-text version \u002F 独立文本版](https:\u002F\u002Fhuggingface.co\u002Fspaces\u002FHello-SimpleAI\u002Fchatgpt-detector-single): detect whether a piece of text is ChatGPT generated, using PLM-based classifiers \u002F 判断**单条文本**是否由ChatGPT生成，使用基于PTM的分类器来开发;\n- [Linguistic version \u002F 语言学版](https:\u002F\u002Fhuggingface.co\u002Fspaces\u002FHello-SimpleAI\u002Fchatgpt-detector-ling): detect whether a piece of text is ChatGPT generated, using linguistic features \u002F 判断**单条文本**是否由ChatGPT生成，使用基于语言学特征的模型来开发;\n\n\n在 modelscope 中文社区平台，三个版本的检测器也都可用:\n- [QA version \u002F 问答版](https:\u002F\u002Fwww.modelscope.cn\u002Fstudios\u002Fsimpleai\u002Fchatgpt-detector-qa)\n- [Sinlge-text version \u002F 独立文本版](https:\u002F\u002Fwww.modelscope.cn\u002Fstudios\u002Fsimpleai\u002Fchatgpt-detector-single)\n- [Linguistic version \u002F 语言学版](https:\u002F\u002Fwww.modelscope.cn\u002Fstudios\u002Fsimpleai\u002Fchatgpt-detector-ling)\n\n\nThe model weights are all available at 🤗 Hugging Face Models:\n\n| Model Checkpoints              | Comment      |\n|-----------------------|------------|\n|[chatgpt-detector-roberta](https:\u002F\u002Fhuggingface.co\u002FHello-SimpleAI\u002Fchatgpt-detector-roberta)|To detect a single piece of text|\n|[chatgpt-qa-detector-roberta](https:\u002F\u002Fhuggingface.co\u002FHello-SimpleAI\u002Fchatgpt-qa-detector-roberta)|To detect a question-answer pair|\n|[chatgpt-detector-roberta-chinese](https:\u002F\u002Fhuggingface.co\u002FHello-SimpleAI\u002Fchatgpt-detector-roberta-chinese)|检测单条文本，中文版|\n|[chatgpt-qa-detector-roberta-chinese](https:\u002F\u002Fhuggingface.co\u002FHello-SimpleAI\u002Fchatgpt-qa-detector-roberta-chinese)|检测一对QA文本，中文版|\n\nThe English models are based on [roberta-base](https:\u002F\u002Fhuggingface.co\u002Froberta-base).\nThe Chinese models are based on [hfl\u002Fchinese-roberta-wwm-ext](https:\u002F\u002Fhuggingface.co\u002Fhfl\u002Fchinese-roberta-wwm-ext).\n\n\n---\n\n### Important Dates \u002F 重要节点:\n\n| Events                | Dates      |\n|-----------------------|------------|\n| Project Launch \u002F 项目启动        | 2022-12-09 ✅ |\n| Comparison Data Collection \u002F 对比数据收集        | 2022-12-11 to Now 🏎️|\n| Release ChatGPT Detector (Demo) \u002F 检测器 Demo 发布 | 2023-01-11 ✅|\n| Models Release \u002F 模型开源 | 2023-01-18 ✅|\n| Comparison Corpus Release \u002F 语料集开源 | 2023-01-18 ✅|\n| Research Paper \u002F 研究论文发布 | 2023-01-19 ✅|\n|...|...|\n\n\n\n---\n\n### Citation\n\nCheckout this paper [arxiv: 2301.07597](https:\u002F\u002Farxiv.org\u002Fabs\u002F2301.07597)\n\n```\n@article{guo-etal-2023-hc3,\n    title = \"How Close is ChatGPT to Human Experts? Comparison Corpus, Evaluation, and Detection\",\n    author = \"Guo, Biyang  and\n      Zhang, Xin  and\n      Wang, Ziyuan  and\n      Jiang, Minqi  and\n      Nie, Jinran  and\n      Ding, Yuxuan  and\n      Yue, Jianwei  and\n      Wu, Yupeng\",\n    journal={arXiv preprint arxiv:2301.07597}\n    year = \"2023\",\n}\n```\n\n\n\n---\n### Our Story... \u002F 背景故事\n\nOn December 9, 2022, which is 10 days after the launch of [ChatGPT](https:\u002F\u002Fopenai.com\u002Fblog\u002Fchatgpt\u002F), we started this project, for two purposes: \n1. To create some **open-source models** for efficiently detecting ChatGPT-generated content; \n2. To collect a valuable **human-ChatGPT comparison Q&A corpus**, to facilitate releated research.\n\n2022 年 12 月 9 日，也就是 [ChatGPT](https:\u002F\u002Fopenai.com\u002Fblog\u002Fchatgpt\u002F) 推出的第 10 天，我们开始了这个项目，为了两个目的：\n1. 做出一些**开源**模型工具来高效检测 ChatGPT 生成的内容；\n2. 收集一批有价值的**人类和 ChatGPT 对比**的中英双语问答语料，来助力相关学术研究。\n\nWelcome to follow our project! We have released a preview of our ChatGPT detectors, and the **models, dataset will be open-sourced** in about a week. We look forward to receiving feedback from the community to help improve the models and make contributions to **open** academic research together:)\u003Cbr>\n欢迎关注我们项目，我们目前已经发布ChatGPT检测器预览版，并将于约**一周内发布开源模型、数据集**。期待得到广大群众的反馈，来帮助我们改进模型，为**开放**的学术研究一起做贡献！\n\n### About Us \u002F 关于我们\n\nWe are a group of insignificant researchers (in the shadow of ChatGPT) hoping to do some significant work for the community. The team for this projects consists of PhD students and engineers from 6 universities\u002Fcompanies.\u003Cbr>\n我们是一群（在 ChatGPT 的阴影下）渺小的研究人员，但希望为社区做一些有意义的事。这个项目的团队由来自6所大学\u002F公司的博士生和工程师组成。\n\n|   |   |   |   |\n|:-:|:-:|:-:|:-:|\n| [Biyang Guo](https:\u002F\u002Fgithub.com\u002Fbeyondguo) | [Minqi Jiang](https:\u002F\u002Fgithub.com\u002FMinqi824) | [Ziyuan Wang](https:\u002F\u002Fgithub.com\u002FSUFEHeisenberg) | [Xin Zhang](https:\u002F\u002Fgithub.com\u002Fizhx) |\n|\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FHello-SimpleAI_chatgpt-comparison-detection_readme_753d5ad36689.png\" alt=\"\" width=\"40\"\u002F>|\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FHello-SimpleAI_chatgpt-comparison-detection_readme_7cc0077db6d9.png\" alt=\"\" width=\"40\"\u002F>|\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FHello-SimpleAI_chatgpt-comparison-detection_readme_ac0420f718ae.png\" alt=\"\" width=\"40\"\u002F>|\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FHello-SimpleAI_chatgpt-comparison-detection_readme_f264c7a0b923.png\" alt=\"\" width=\"40\"\u002F>|\n| [Jinran Nie](https:\u002F\u002Fgithub.com\u002FNJRBarry) | [Yuxuan Ding](https:\u002F\u002Fgithub.com\u002Fyxding95) | [Jianwei Yue](https:\u002F\u002Fgithub.com\u002FTurquoiseA) | [Yupeng Wu](https:\u002F\u002Fgithub.com\u002FrealRoc) |\n|\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FHello-SimpleAI_chatgpt-comparison-detection_readme_f268c0dbfa6e.png\" alt=\"\" width=\"40\"\u002F>|\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FHello-SimpleAI_chatgpt-comparison-detection_readme_96e4bd36e9b9.png\" alt=\"\" width=\"40\"\u002F>|  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FHello-SimpleAI_chatgpt-comparison-detection_readme_2172528a51bc.png\" alt=\"\" width=\"40\"\u002F> | \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FHello-SimpleAI_chatgpt-comparison-detection_readme_0c819e5a639b.png\" alt=\"\" width=\"40\"\u002F>  |\n\n\n\n\n\n\n\n\n\n","# ChatGPT-比较-检测 项目 🔬\n\n![](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FLanguages-%20English%2C%20Chinese-brightgreen) \n![](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FChatGPT-Corpus%2C%20Detector-blue)\n\n论文 [\"ChatGPT 跟人类专家有多接近？对比语料、评估与检测\"](https:\u002F\u002Farxiv.org\u002Fabs\u002F2301.07597) 的官方仓库。请为我们的仓库点赞、关注并 fork，以获取最新更新！\n\n相关链接→([📢 检测器反馈区](https:\u002F\u002Fgithub.com\u002FHello-SimpleAI\u002Fchatgpt-comparison-detection\u002Fdiscussions\u002F2) 欢迎在此留下您的宝贵意见！ 请留下您宝贵的意见！)\n\n\n\n\u003Cimg width=\"600\" alt=\"image\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FHello-SimpleAI_chatgpt-comparison-detection_readme_c74c0ac754b1.png\">\n\n---\n### 人类-ChatGPT 问答对比语料集 (HC3)\n是的，我们提出了首个 **Human vs. ChatGPT** 对比语料，命名为 **HC3**。\n\n我们提出了第一个 **Human vs. ChatGPT** 对比语料, 叫做 **HC3**.\n\n\u003Cimg width=\"520\" alt=\"image\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FHello-SimpleAI_chatgpt-comparison-detection_readme_0c7627b646c0.png\">\n\nHC3 数据集的第一个版本现已在 🤗 Huggingface Datasets 上发布：\n- [HC3-English](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FHello-SimpleAI\u002FHC3)\n- [HC3-Chinese](https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FHello-SimpleAI\u002FHC3-Chinese)\n\n\n在中文社区，HC3 数据集也已在 ModelScope 上可用:\n- [HC3-English](https:\u002F\u002Fwww.modelscope.cn\u002Fdatasets\u002Fsimpleai\u002FHC3)\n- [HC3-Chinese](https:\u002F\u002Fwww.modelscope.cn\u002Fdatasets\u002Fsimpleai\u002FHC3-Chinese)\n\n\n> 训练\u002F测试划分及论文中使用的过滤版本，请参考 [HC3\u002FREADME.md](HC3\u002FREADME.md) 中的 Google Drive 链接。\n\n### 数据集版权\n\n如果本语料集中所使用的源数据集具有比 CC-BY-SA 更严格的特定许可，则我们的产品也将遵循相同的许可。否则，将遵循 CC-BY-SA 许可。\n\n| 英文划分       | 来源 | 来源许可 | 备注 |\n|----------|-------------|--------|-------------|\n| reddit_eli5 | [ELI5](https:\u002F\u002Fgithub.com\u002Ffacebookresearch\u002FELI5)   | BSD License    |     |\n| open_qa  | [WikiQA](https:\u002F\u002Fwww.microsoft.com\u002Fen-us\u002Fdownload\u002Fdetails.aspx?id=52419)  | [PWC Custom](https:\u002F\u002Fpaperswithcode.com\u002Fdatasets\u002Flicense)   |      |\n| wiki_csai   | Wikipedia | CC-BY-SA |   | [Wiki FAQ](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FWikipedia:FAQ\u002FCopyright) |\n| medicine    | [Medical Dialog](https:\u002F\u002Fgithub.com\u002FUCSD-AI4H\u002FMedical-Dialogue-System) | Unknown|  [Asking](https:\u002F\u002Fgithub.com\u002FUCSD-AI4H\u002FMedical-Dialogue-System\u002Fissues\u002F10)|\n| finance     | [FiQA](https:\u002F\u002Fpaperswithcode.com\u002Fdataset\u002Ffiqa-1) | Unknown |  Asking by 📧  |\n\n| 中文划分       | 来源 | 来源许可  | 备注 |\n|----------|-------------|-----------|-------------|\n| open_qa  | [WebTextQA & BaikeQA](https:\u002F\u002Fgithub.com\u002Fbrightmart\u002Fnlp_chinese_corpus) | MIT license |  |  |\n| baike     | Baidu Baike  | None   |    |   |\n| nlpcc_dbqa  | [NLPCC-DBQA](https:\u002F\u002Fgithub.com\u002Fmsra-nlc\u002FChineseDBQA) | Unknown |   [Asking](https:\u002F\u002Fgithub.com\u002FUCSD-AI4H\u002FMedical-Dialogue-System\u002Fissues\u002F10) |\n| medicine    | [Chinese Medical Dialogue](https:\u002F\u002Ftianchi.aliyun.com\u002Fdataset\u002F90163) |  CC-BY-NC 4.0 | \n| finance     | [FinanceZhidao](https:\u002F\u002Fwww.heywhale.com\u002Fmw\u002Fdataset\u002F5e9588f8e7ec38002d0331b1\u002Fcontent) | CC-BY 4.0 |  |\n| psychology  | [On Baidu AI Studio](https:\u002F\u002Faistudio.baidu.com\u002Faistudio\u002Fdatasetdetail\u002F38489) | CC0  | |\n|law          | [LegalQA](https:\u002F\u002Fgithub.com\u002Fsiatnlp\u002FLegalQA) | Unknown | [Asking](https:\u002F\u002Fgithub.com\u002Fsiatnlp\u002FLegalQA\u002Fissues\u002F2) |\n\n\n---\n\n### ChatGPT 内容检测器\n![image](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FHello-SimpleAI_chatgpt-comparison-detection_readme_5695a28ab9a6.png)\n(托管于 🤗 Hugging Face Spaces)\n\n\n我们提供了三种类型的检测器，全部支持中英文：\n- [问答版](https:\u002F\u002Fhuggingface.co\u002Fspaces\u002FHello-SimpleAI\u002Fchatgpt-detector-qa): 判断某个**问题的回答**是否由ChatGPT生成，使用基于PTM的分类器来开发;\n- [独立文本版](https:\u002F\u002Fhuggingface.co\u002Fspaces\u002FHello-SimpleAI\u002Fchatgpt-detector-single): 判断**单条文本**是否由ChatGPT生成，使用基于PTM的分类器来开发;\n- [语言学版](https:\u002F\u002Fhuggingface.co\u002Fspaces\u002FHello-SimpleAI\u002Fchatgpt-detector-ling): 判断**单条文本**是否由ChatGPT生成，使用基于语言学特征的模型来开发;\n\n\n在 modelscope 中文社区平台，三个版本的检测器也都可用:\n- [问答版](https:\u002F\u002Fwww.modelscope.cn\u002Fstudios\u002Fsimpleai\u002Fchatgpt-detector-qa)\n- [独立文本版](https:\u002F\u002Fwww.modelscope.cn\u002Fstudios\u002Fsimpleai\u002Fchatgpt-detector-single)\n- [语言学版](https:\u002F\u002Fwww.modelscope.cn\u002Fstudios\u002Fsimpleai\u002Fchatgpt-detector-ling)\n\n\n模型权重均可在 🤗 Hugging Face Models 上找到：\n\n| 模型检查点              | 说明      |\n|-----------------------|------------|\n|[chatgpt-detector-roberta](https:\u002F\u002Fhuggingface.co\u002FHello-SimpleAI\u002Fchatgpt-detector-roberta)|用于检测单条文本|\n|[chatgpt-qa-detector-roberta](https:\u002F\u002Fhuggingface.co\u002FHello-SimpleAI\u002Fchatgpt-qa-detector-roberta)|用于检测问答对|\n|[chatgpt-detector-roberta-chinese](https:\u002F\u002Fhuggingface.co\u002FHello-SimpleAI\u002Fchatgpt-detector-roberta-chinese)|检测单条文本，中文版|\n|[chatgpt-qa-detector-roberta-chinese](https:\u002F\u002Fhuggingface.co\u002FHello-SimpleAI\u002Fchatgpt-qa-detector-roberta-chinese)|检测一对QA文本，中文版|\n\n英文模型基于 [roberta-base](https:\u002F\u002Fhuggingface.co\u002Froberta-base)。\n中文模型基于 [hfl\u002Fchinese-roberta-wwm-ext](https:\u002F\u002Fhuggingface.co\u002Fhfl\u002Fchinese-roberta-wwm-ext)。\n\n---\n\n### 重要日期:\n\n| 活动                | 日期      |\n|-----------------------|------------|\n| 项目启动        | 2022-12-09 ✅ |\n| 对比数据收集        | 2022-12-11 至今 🏎️|\n| ChatGPT 检测器 Demo 发布 | 2023-01-11 ✅|\n| 模型开源 | 2023-01-18 ✅|\n| 语料集开源 | 2023-01-18 ✅|\n| 研究论文发表 | 2023-01-19 ✅|\n|...|...|\n\n\n\n---\n\n### 引用\n\n请参阅此论文 [arxiv: 2301.07597](https:\u002F\u002Farxiv.org\u002Fabs\u002F2301.07597)\n\n```\n@article{guo-etal-2023-hc3,\n    title = \"How Close is ChatGPT to Human Experts? Comparison Corpus, Evaluation, and Detection\",\n    author = \"Guo, Biyang  and\n      Zhang, Xin  and\n      Wang, Ziyuan  and\n      Jiang, Minqi  and\n      Nie, Jinran  and\n      Ding, Yuxuan  and\n      Yue, Jianwei  and\n      Wu, Yupeng\",\n    journal={arXiv preprint arxiv:2301.07597}\n    year = \"2023\",\n}\n```\n\n\n\n---\n\n### 我们的故事... \u002F 背景故事\n\n2022年12月9日，也就是[ChatGPT](https:\u002F\u002Fopenai.com\u002Fblog\u002Fchatgpt\u002F)推出后的第10天，我们启动了这个项目，旨在实现两个目标：\n1. 构建一些**开源模型**，用于高效检测由 ChatGPT 生成的内容；\n2. 收集一份有价值的**人类与 ChatGPT 对比**的中英双语问答语料库，以推动相关研究的发展。\n\n欢迎关注我们的项目！目前我们已发布了 ChatGPT 检测器的预览版本，并计划在大约**一周内开源模型和数据集**。我们期待来自社区的反馈，帮助我们不断优化模型，共同为**开放**的学术研究贡献力量：)\u003Cbr>\n欢迎关注我们项目，我们目前已经发布ChatGPT检测器预览版，并将于约**一周内发布开源模型、数据集**。期待得到广大群众的反馈，来帮助我们改进模型，为**开放**的学术研究一起做贡献！\n\n### 关于我们 \u002F 关于我们\n\n我们是一群（在 ChatGPT 的阴影下）渺小的研究人员，但希望为社区做一些有意义的事。这个项目的团队由来自6所大学\u002F公司的博士生和工程师组成。\n\n|   |   |   |   |\n|:-:|:-:|:-:|:-:|\n| [Biyang Guo](https:\u002F\u002Fgithub.com\u002Fbeyondguo) | [Minqi Jiang](https:\u002F\u002Fgithub.com\u002FMinqi824) | [Ziyuan Wang](https:\u002F\u002Fgithub.com\u002FSUFEHeisenberg) | [Xin Zhang](https:\u002F\u002Fgithub.com\u002Fizhx) |\n|\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FHello-SimpleAI_chatgpt-comparison-detection_readme_753d5ad36689.png\" alt=\"\" width=\"40\"\u002F>|\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FHello-SimpleAI_chatgpt-comparison-detection_readme_7cc0077db6d9.png\" alt=\"\" width=\"40\"\u002F>|\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FHello-SimpleAI_chatgpt-comparison-detection_readme_ac0420f718ae.png\" alt=\"\" width=\"40\"\u002F>|\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FHello-SimpleAI_chatgpt-comparison-detection_readme_f264c7a0b923.png\" alt=\"\" width=\"40\"\u002F>|\n| [Jinran Nie](https:\u002F\u002Fgithub.com\u002FNJRBarry) | [Yuxuan Ding](https:\u002F\u002Fgithub.com\u002Fyxding95) | [Jianwei Yue](https:\u002F\u002Fgithub.com\u002FTurquoiseA) | [Yupeng Wu](https:\u002F\u002Fgithub.com\u002FrealRoc) |\n|\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FHello-SimpleAI_chatgpt-comparison-detection_readme_f268c0dbfa6e.png\" alt=\"\" width=\"40\"\u002F>|\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FHello-SimpleAI_chatgpt-comparison-detection_readme_96e4bd36e9b9.png\" alt=\"\" width=\"40\"\u002F>|  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FHello-SimpleAI_chatgpt-comparison-detection_readme_2172528a51bc.png\" alt=\"\" width=\"40\"\u002F> | \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FHello-SimpleAI_chatgpt-comparison-detection_readme_0c819e5a639b.png\" alt=\"\" width=\"40\"\u002F>  |","# ChatGPT-Comparison-Detection 快速上手指南\n\n本指南帮助开发者快速使用 **ChatGPT-Comparison-Detection** 项目中的检测模型和数据集，用于识别文本是否由 ChatGPT 生成。\n\n## 环境准备\n\n### 系统要求\n- 操作系统：Linux \u002F macOS \u002F Windows\n- Python 版本：3.8 或更高\n- GPU（可选）：推荐用于加速模型推理\n\n### 前置依赖\n请确保已安装以下基础库：\n```bash\npip install torch transformers datasets\n```\n\n> 💡 国内用户建议使用清华或阿里镜像源加速安装：\n```bash\npip install torch transformers datasets -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n```\n\n## 安装步骤\n\n本项目无需克隆仓库即可使用模型，可直接通过 Hugging Face `transformers` 加载。如需使用完整数据集或源码，可克隆仓库：\n\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002FHello-SimpleAI\u002Fchatgpt-comparison-detection.git\ncd chatgpt-comparison-detection\n```\n\n## 基本使用\n\n### 1. 加载英文单文本检测模型\n\n```python\nfrom transformers import AutoTokenizer, AutoModelForSequenceClassification\nimport torch\n\nmodel_name = \"Hello-SimpleAI\u002Fchatgpt-detector-roberta\"\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nmodel = AutoModelForSequenceClassification.from_pretrained(model_name)\n\ntext = \"ChatGPT is a large language model developed by OpenAI.\"\n\ninputs = tokenizer(text, return_tensors=\"pt\", truncation=True, max_length=512)\nwith torch.no_grad():\n    outputs = model(**inputs)\n    prediction = torch.softmax(outputs.logits, dim=-1).argmax().item()\n\nprint(\"ChatGPT 生成\" if prediction == 1 else \"人类撰写\")\n```\n\n### 2. 加载中文单文本检测模型\n\n```python\nmodel_name = \"Hello-SimpleAI\u002Fchatgpt-detector-roberta-chinese\"\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nmodel = AutoModelForSequenceClassification.from_pretrained(model_name)\n\ntext = \"ChatGPT 是由 OpenAI 开发的大型语言模型。\"\n\ninputs = tokenizer(text, return_tensors=\"pt\", truncation=True, max_length=512)\nwith torch.no_grad():\n    outputs = model(**inputs)\n    prediction = torch.softmax(outputs.logits, dim=-1).argmax().item()\n\nprint(\"ChatGPT 生成\" if prediction == 1 else \"人类撰写\")\n```\n\n### 3. 使用问答对检测模型（英文）\n\n```python\nmodel_name = \"Hello-SimpleAI\u002Fchatgpt-qa-detector-roberta\"\ntokenizer = AutoTokenizer.from_pretrained(model_name)\nmodel = AutoModelForSequenceClassification.from_pretrained(model_name)\n\nquestion = \"What is ChatGPT?\"\nanswer = \"ChatGPT is a large language model developed by OpenAI.\"\n\ninputs = tokenizer(question, answer, return_tensors=\"pt\", truncation=True, max_length=512)\nwith torch.no_grad():\n    outputs = model(**inputs)\n    prediction = torch.softmax(outputs.logits, dim=-1).argmax().item()\n\nprint(\"ChatGPT 生成\" if prediction == 1 else \"人类撰写\")\n```\n\n> 🌐 所有模型均托管在 Hugging Face，国内用户也可通过 ModelScope 加载对应中文模型（需安装 `modelscope` 库）。\n\n---\n\n✅ 现在你可以开始检测文本是否由 ChatGPT 生成了！","某在线教育平台的内容审核团队正面临海量用户提交问答内容的甄别压力，急需区分哪些是真人专家的回答，哪些是由 ChatGPT 生成的内容。\n\n### 没有 chatgpt-comparison-detection 时\n- 审核人员只能依靠人工阅读判断，面对成千上万条问答效率极低，且容易因疲劳产生误判。\n- 缺乏统一的评估标准，不同审核员对\"AI 味”的主观理解不一致，导致内容分级混乱。\n- 无法量化 AI 生成内容的比例，难以向管理层汇报平台内容生态的真实健康状况。\n- 遇到高仿真的 AI 回答时，传统关键词过滤手段完全失效，劣质内容极易混入精品库。\n- 缺乏针对中文场景的专用检测模型，直接套用国外工具导致对本土化表达的识别率低下。\n\n### 使用 chatgpt-comparison-detection 后\n- 利用其提供的双语检测器（QA 版与独立文本版）自动批量筛查，将审核效率提升数倍，人工仅需复核置信度低的案例。\n- 基于 HC3 对比语料集训练的模型提供了客观的判定分数，统一了全团队的审核标尺，消除了主观偏差。\n- 通过数据分析面板清晰展示平台中 AI 生成内容的占比趋势，为制定内容运营策略提供了坚实的数据支撑。\n- 依托论文级别的检测算法，有效识别出逻辑严密但实为 AI 生成的高仿真回答，守住内容质量底线。\n- 直接使用针对中文社区优化的 HC3-Chinese 数据集版本，显著提升了对百度百科、医疗咨询等本土场景的识别精度。\n\nchatgpt-comparison-detection 通过提供权威的对比语料与高精度检测模型，帮助平台在 AI 内容泛滥的时代重建了可信的内容筛选机制。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FHello-SimpleAI_chatgpt-comparison-detection_c74c0ac7.png","Hello-SimpleAI","SimpleAI","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002FHello-SimpleAI_ac8f4306.png","A group of independent and creative researchers.",null,"https:\u002F\u002Fgithub.com\u002FHello-SimpleAI",[80,84],{"name":81,"color":82,"percentage":83},"Python","#3572A5",81,{"name":85,"color":86,"percentage":87},"Jupyter Notebook","#DA5B0B",19,1346,124,"2026-04-04T18:24:27",1,"","未说明",{"notes":95,"python":93,"dependencies":96},"该项目主要提供预训练模型权重（基于 roberta-base 和 hfl\u002Fchinese-roberta-wwm-ext）及数据集。README 中未列出具体的本地运行环境配置、Python 版本或依赖库版本要求。用户可直接通过 Hugging Face Spaces 或 ModelScope 在线使用演示，或下载模型权重后自行搭建推理环境。",[97,98],"transformers (基于 RoBERTa)","torch",[13,16,14,15,35],[101,102,103,104,105,106,107,108,109,110,111,112,113,114],"ai","chatbot","chatgpt","dataset","nlp","openai","text-classification","python","gpt2","gpt3","gpt-3","ml","machine-learning","deep-learning","2026-03-27T02:49:30.150509","2026-04-08T03:56:14.337723",[],[]]