[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-zacharywhitley--awesome-ocr":3,"tool-zacharywhitley--awesome-ocr":61},[4,18,28,36,45,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":24,"last_commit_at":25,"category_tags":26,"status":17},9989,"n8n","n8n-io\u002Fn8n","n8n 是一款面向技术团队的公平代码（fair-code）工作流自动化平台，旨在让用户在享受低代码快速构建便利的同时，保留编写自定义代码的灵活性。它主要解决了传统自动化工具要么过于封闭难以扩展、要么完全依赖手写代码效率低下的痛点，帮助用户轻松连接 400 多种应用与服务，实现复杂业务流程的自动化。\n\nn8n 特别适合开发者、工程师以及具备一定技术背景的业务人员使用。其核心亮点在于“按需编码”：既可以通过直观的可视化界面拖拽节点搭建流程，也能随时插入 JavaScript 或 Python 代码、调用 npm 包来处理复杂逻辑。此外，n8n 原生集成了基于 LangChain 的 AI 能力，支持用户利用自有数据和模型构建智能体工作流。在部署方面，n8n 提供极高的自由度，支持完全自托管以保障数据隐私和控制权，也提供云端服务选项。凭借活跃的社区生态和数百个现成模板，n8n 让构建强大且可控的自动化系统变得简单高效。",184740,2,"2026-04-19T23:22:26",[16,14,13,15,27],"插件",{"id":29,"name":30,"github_repo":31,"description_zh":32,"stars":33,"difficulty_score":10,"last_commit_at":34,"category_tags":35,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":24,"last_commit_at":42,"category_tags":43,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",161147,"2026-04-19T23:31:47",[14,13,44],"语言模型",{"id":46,"name":47,"github_repo":48,"description_zh":49,"stars":50,"difficulty_score":24,"last_commit_at":51,"category_tags":52,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",109154,"2026-04-18T11:18:24",[14,15,13],{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":24,"last_commit_at":59,"category_tags":60,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[27,13,15,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":72,"owner_name":73,"owner_avatar_url":74,"owner_bio":65,"owner_company":73,"owner_location":75,"owner_email":76,"owner_twitter":65,"owner_website":77,"owner_url":78,"languages":65,"stars":79,"forks":80,"last_commit_at":81,"license":65,"difficulty_score":82,"env_os":83,"env_gpu":84,"env_ram":84,"env_deps":85,"category_tags":88,"github_topics":89,"view_count":24,"oss_zip_url":65,"oss_zip_packed_at":65,"status":17,"created_at":97,"updated_at":98,"faqs":99,"releases":100},9895,"zacharywhitley\u002Fawesome-ocr","awesome-ocr",null,"awesome-ocr 并非单一软件，而是一个精心整理的开源光学字符识别（OCR）资源合集。它主要解决文档数字化过程中常见的图像质量难题，如扫描件倾斜、页面弯曲变形、复杂版面分割以及手写文字识别等痛点。通过汇聚从传统图像处理到前沿深度学习的一系列优秀项目，awesome-ocr 为开发者提供了包括自适应径向投影校正、生成对抗网络（GANs）去扭曲、以及基于立方体模型的页面展平等多样化技术方案。\n\n无论是需要构建高精度 OCR 流水线的工程师，还是致力于文档分析算法研究的研究人员，都能在此找到适合的工具库。例如，其中的 LayoutParser 和 LAREX 擅长处理复杂的古籍或印刷品版面分析，而 jdeskew 和 DewarpNet 则能高效修复拍摄角度不佳的文档图像。虽然普通用户通常不直接操作代码库，但许多基于这些底层技术开发的应用程序正默默提升着日常扫描体验。awesome-ocr 以其全面的技术视野和开放的社区生态，成为连接理论研究与工程落地的重要桥梁，帮助从业者快速定位并集成最适合当前任务的 OCR 组件。","# Awesome OCR\n\n## Deskewing and Dewarping\n- [jdeskew](https:\u002F\u002Fgithub.com\u002Fphamquiluan\u002Fjdeskew) ([paper:2022](https:\u002F\u002Fwww.researchgate.net\u002Fpublication\u002F364320913_ADAPTIVE_RADIAL_PROJECTION_ON_FOURIER_MAGNITUDE_SPECTRUM_FOR_DOCUMENT_IMAGE_SKEW_ESTIMATION)) - Adaptive Radial Projection on Fourier Magnitude Spectrum for Document Image Skew Estimation\n- [DewarpNet](https:\u002F\u002Fgithub.com\u002Fcvlab-stonybrook\u002FDewarpNet) ([paper:2019](https:\u002F\u002Fwww3.cs.stonybrook.edu\u002F~cvl\u002Fprojects\u002Fdewarpnet\u002Fstorage\u002Fpaper.pdf))\n- [MORAN_v2](https:\u002F\u002Fgithub.com\u002FCanjie-Luo\u002FMORAN_v2) ([paper:2019](https:\u002F\u002Farxiv.org\u002Fabs\u002F1901.03003)) - A Multi-Object Rectified Attention Network for Scene Text Recognition\n- [thomasjhaung\u002Fdeep-learning-for-document-dewarping](https:\u002F\u002Fgithub.com\u002Fthomasjhuang\u002Fdeep-learning-for-document-dewarping) An application of high resolution GANs to dewarp images of perturbed documents\n- [unproject_text](https:\u002F\u002Fgithub.com\u002Fmzucker\u002Funproject_text) - Perspective recovery of text using transformed ellipses\n- [unpaper](https:\u002F\u002Fgithub.com\u002FFlameeyes\u002Funpaper) - a post-processing tool for scanned sheets of paper, especially for book pages that have been scanned from previously created photocopies.\n- [deskew](https:\u002F\u002Fgithub.com\u002Fsbrunner\u002Fdeskew) - Library used to deskew a scanned document\n- [deskewing](https:\u002F\u002Fgithub.com\u002Fsauravbiswasiupr\u002Fdeskewing) - Contains code to deskew images using MLPs, LSTMs and LLS tranformations\n- [skew_correction](https:\u002F\u002Fgithub.com\u002Fprajwalmylar\u002Fskew_correction) - De-skewing images with slanted content by finding the deviation using Canny Edge Detection.\n- [page_dewarp](https:\u002F\u002Fgithub.com\u002Fmzucker\u002Fpage_dewarp) (2016) - Page dewarping and thresholding using a \"cubic sheet\" model \n- [text_deskewing](https:\u002F\u002Fgithub.com\u002Fdehaisea\u002Ftext_deskewing) - Rotate text images if they are not straight for better text detection and recognition.\n- [galfar\u002Fdeskew](https:\u002F\u002Fgithub.com\u002Fgalfar\u002Fdeskew) - Deskew is a command line tool for deskewing scanned text documents. It uses Hough transform to detect \"text lines\" in the image. As an output, you get an image rotated so that the lines are horizontal.\n- [xellows1305\u002FDocument-Image-Dewarping](https:\u002F\u002Fgithub.com\u002Fxellows1305\u002FDocument-Image-Dewarping) - No code :(\n- https:\u002F\u002Fgithub.com\u002FRaymondMcGuire\u002FBOOK-CONTENT-SEGMENTATION-AND-DEWARPING\n- [Docuwarp](https:\u002F\u002Fgithub.com\u002Fthomasjhuang\u002Fdeep-learning-for-document-dewarping)\n- [Alyn](https:\u002F\u002Fgithub.com\u002Fkakul\u002FAlyn)\n\n\n## Segmentation\n\n### Line Segmentation\n- [ARU-Net](https:\u002F\u002Fgithub.com\u002FTobiasGruening\u002FARU-Net) - Deep Learning Chinese Word Segment\n- [sbb_textline_detector](https:\u002F\u002Fgithub.com\u002Fqurator-spk\u002Fsbb_textline_detector)\n\n### Character Segmentation \n- [watersink\u002FCharacter-Segmentation](https:\u002F\u002Fgithub.com\u002Fwatersink\u002FCharacter-Segmentation)\n- [sharatsawhney\u002Fcharacter_segmentation](https:\u002F\u002Fgithub.com\u002Fsharatsawhney\u002Fcharacter_segmentation)\n\n### Word Segmentation\n- [githubharald\u002FWordSegmentation](https:\u002F\u002Fgithub.com\u002Fgithubharald\u002FWordSegmentation) Detect handwritten words (classic image processing based method).\n- [kcws](https:\u002F\u002Fgithub.com\u002Fkoth\u002Fkcws)\n\n### Document Segmentation\n- [LayoutParser](https:\u002F\u002Flayout-parser.github.io)\n- [eynollah](https:\u002F\u002Fgithub.com\u002Fqurator-spk\u002Feynollah)\n- [chulwoopack\u002Fdocstrum](https:\u002F\u002Fgithub.com\u002Fchulwoopack\u002Fdocstrum)\n- [LAREX](https:\u002F\u002Fgithub.com\u002FOCR4all\u002FLAREX) - LAREX is a semi-automatic open-source tool for layout analysis on early printed books.\n- [leonlulu\u002FDeepLayout](https:\u002F\u002Fgithub.com\u002Fleonlulu\u002FDeepLayout) - Deep learning based page layout analysis\n- [dhSegment](https:\u002F\u002Fgithub.com\u002Fdhlab-epfl\u002FdhSegment)\n- [Pay20Y\u002FLayout_Analysis](https:\u002F\u002Fgithub.com\u002FPay20Y\u002FLayout_Analysis)\n- [rbaguila\u002Fdocument-layout-analysis](https:\u002F\u002Fgithub.com\u002Frbaguila\u002Fdocument-layout-analysis)\n- [P2PaLA](https:\u002F\u002Fgithub.com\u002Flquirosd\u002FP2PaLA) - Page to PAGE Layout Analysis Tool\n- [ocroseg](https:\u002F\u002Fgithub.com\u002FNVlabs\u002Focroseg\u002F) - This is a deep learning model for page layout analysis \u002F segmentation.\n- [DIVA-DIA\u002FDIVA_Layout_Analysis_Evaluator](https:\u002F\u002Fgithub.com\u002FDIVA-DIA\u002FDIVA_Layout_Analysis_Evaluator) - Layout Analysis Evaluator for the ICDAR 2017 competition on Layout Analysis for Challenging Medieval Manuscripts\n- [ocrsegment](https:\u002F\u002Fgithub.com\u002Fwatersink\u002Focrsegment) - a deep learning model for page layout analysis \u002F segmentation.\n- [ARU-Net](https:\u002F\u002Fgithub.com\u002FTobiasGruening\u002FARU-Net)\n- [xy-cut-tree](https:\u002F\u002Fgithub.com\u002Fkavishgambhir\u002Fxy-cut-tree)\n- [ocrd_segment](https:\u002F\u002Fgithub.com\u002FOCR-D\u002Focrd_segment)\n- [LayoutML](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Funilm\u002Ftree\u002Fmaster\u002Flayoutlm)\n- [LayoutLMv2](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Funilm\u002Ftree\u002Fmaster\u002Flayoutlmv2)\n- [eynollah](https:\u002F\u002Fgithub.com\u002Fqurator-spk\u002Feynollah)\n\n### Form Segmentation\n- https:\u002F\u002Fgithub.com\u002Fdoxakis\u002Fform-segmentation\n\n## Handwritten\n- https:\u002F\u002Fgithub.com\u002Farthurflor23\u002Fhandwritten-text-recognition\n- https:\u002F\u002Fgithub.com\u002Fawslabs\u002Fhandwritten-text-recognition-for-apache-mxnet\n- https:\u002F\u002Fgithub.com\u002F0x454447415244\u002FHandwritingRecognitionSystem\n- https:\u002F\u002Fgithub.com\u002FSparshaSaha\u002FHandwritten-Number-Recognition-With-Image-Segmentation\n- https:\u002F\u002Fgithub.com\u002FThomasDelteil\u002FHandwrittenTextRecognition_MXNet\n- [SimpleHTR](https:\u002F\u002Fgithub.com\u002Fgithubharald\u002FSimpleHTR) - Handwritten Text Recognition (HTR) system implemented with TensorFlow.\n- [handwriting-ocr](https:\u002F\u002Fgithub.com\u002FBreta01\u002Fhandwriting-ocr) - OCR software for recognition of handwritten text\n- [AWSLabs: handwritten text regognition for Apache MXNet](https:\u002F\u002Fgithub.com\u002Fawslabs\u002Fhandwritten-text-recognition-for-apache-mxnet)\n- [vloison\u002FHandwritten_Text_Recognition](https:\u002F\u002Fgithub.com\u002Fvloison\u002FHandwritten_Text_Recognition)\n- https:\u002F\u002Fgithub.com\u002Fsushant097\u002FHandwritten-Line-Text-Recognition-using-Deep-Learning-with-Tensorflow\n- https:\u002F\u002Fgithub.com\u002Fqurator-spk\u002Fsbb_textline_detection\n- [Handprint](https:\u002F\u002Fgithub.com\u002Fcaltechlibrary\u002Fhandprint) - apply HTR services from Amazon, Google, and\u002For Microsoft to scanned documents\n\n## Table detection\n\n- [unitable](https:\u002F\u002Fgithub.com\u002Fpoloclub\u002Funitable) - [paper:2024](https:\u002F\u002Farxiv.org\u002Fabs\u002F2403.04822) UniTable: Towards a Unified Table Foundation Model\n- [TableTransformer](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Ftable-transformer)\n- [TableNet](https:\u002F\u002Fgithub.com\u002Fjainammm\u002FTableNet) - Unofficial implementation of ICDAR 2019 paper : TableNet: Deep Learning model for end-to-end Table detection and Tabular data extraction from Scanned Document Images.\n- [image-table-ocr](https:\u002F\u002Fgithub.com\u002Feihli\u002Fimage-table-ocr)\n- [TreeStructure](https:\u002F\u002Fgithub.com\u002FHazyResearch\u002FTreeStructure) - Table Extraction Tool\n- [TableTrainNet](https:\u002F\u002Fgithub.com\u002Fmawanda-jun\u002FTableTrainNet) - Table recognition inside douments using neural networks.\n- [table_layout_detection_research](https:\u002F\u002Fgithub.com\u002Fcbgaindia\u002Fparsers\u002Fblob\u002Fmaster\u002Fresearch\u002Flayout_detection_research.md) \n- [TableBank](https:\u002F\u002Fgithub.com\u002Fdoc-analysis\u002FTableBank)\n- [Camelot](https:\u002F\u002Fgithub.com\u002Fatlanhq\u002Fcamelot)\n- [ocr-table](https:\u002F\u002Fgithub.com\u002Fcseas\u002Focr-table) - Extract tables from scanned image PDFs using Optical Character Recognition.\n- [ExtractTable-py](https:\u002F\u002Fgithub.com\u002FExtractTable\u002FExtractTable-py)\n- [image-table-ocr](https:\u002F\u002Fgithub.com\u002Feihli\u002Fimage-table-ocr)\n\n## Language detection\n\n- [lingua](https:\u002F\u002Fgithub.com\u002Fpemistahl\u002Flingua) - The most accurate natural language detection library for Java and other JVM languages, suitable for long and short text alike\n- [langdetect](https:\u002F\u002Fpypi.org\u002Fproject\u002Flangdetect\u002F)\n- [whatthelang](https:\u002F\u002Fgithub.com\u002Findix\u002Fwhatthelang) - Lightning Fast Language Prediction rocket\n- [wiki-lang-detect](https:\u002F\u002Fgithub.com\u002Fvseloved\u002Fwiki-lang-detect)\n\n## Text detection and localization\n\n- [DB](https:\u002F\u002Fgithub.com\u002FMhLiao\u002FDB)\n- [DeepReg](https:\u002F\u002Fgithub.com\u002FDeepRegNet\u002FDeepReg)\n- [CornerText](https:\u002F\u002Fgithub.com\u002Flvpengyuan\u002Fcorner) - [paper:2018](https:\u002F\u002Farxiv.org\u002Fabs\u002F1802.08948)) - Multi-Oriented Scene Text Detection via Corner Localization and Region Segmentation\n- [RRPN](https:\u002F\u002Fgithub.com\u002Fmjq11302010044\u002FRRPN) - ([paper:2018](https:\u002F\u002Farxiv.org\u002Fabs\u002F1703.01086)) - Arbitrary-Oriented Scene Text Detection via Rotation Proposals\n- [MASTER-TF](https:\u002F\u002Fgithub.com\u002Fjiangxiluning\u002FMASTER-TF) - ([paper:2021](https:\u002F\u002Farxiv.org\u002Fabs\u002F1910.02562)) - TensorFlow reimplementation of \"MASTER: Multi-Aspect Non-local Network for Scene Text Recognition\" (Pattern Recognition 2021).\n- [MaskTextSpotterV3](https:\u002F\u002Fgithub.com\u002FMhLiao\u002FMaskTextSpotterV3) - ([paper:2020](https:\u002F\u002Farxiv.org\u002Fabs\u002F2007.09482)) -  Mask TextSpotter v3 is an end-to-end trainable scene text spotter that adopts a Segmentation Proposal Network (SPN) instead of an RPN.\n- [TextFuseNet](https:\u002F\u002Fgithub.com\u002Fying09\u002FTextFuseNet) - ([paper:2020](https:\u002F\u002Fwww.ijcai.org\u002FProceedings\u002F2020\u002F72)) A PyTorch implementation of \"TextFuseNet: Scene Text Detection with Richer Fused Features\".\n- [SATRN](https:\u002F\u002Fgithub.com\u002Fclovaai\u002FSATRN)- ([paper:2020](https:\u002F\u002Fopenaccess.thecvf.com\u002Fcontent_CVPRW_2020\u002Fpapers\u002Fw34\u002FLee_On_Recognizing_Texts_of_Arbitrary_Shapes_With_2D_Self-Attention_CVPRW_2020_paper.pdf)) - Official Tensorflow Implementation of Self-Attention Text Recognition Network (SATRN) (CVPR Workshop WTDDLE 2020).\n- [cvpr20-scatter-text-recognizer](https:\u002F\u002Fgithub.com\u002Fphantrdat\u002Fcvpr20-scatter-text-recognizer) - ([paper:2020](https:\u002F\u002Fopenaccess.thecvf.com\u002Fcontent_CVPR_2020\u002Fpapers\u002FLitman_SCATTER_Selective_Context_Attentional_Scene_Text_Recognizer_CVPR_2020_paper.pdf)) - Unofficial implementation of CVPR 2020 paper \"SCATTER: Selective Context Attentional Scene Text Recognizer\"\n- [seed](https:\u002F\u002Fgithub.com\u002FPay20Y\u002FSEED) - ([paper:2020[https:\u002F\u002Farxiv.org\u002Fpdf\u002F2005.10977.pdf]) - This is the implementation of the paper \"SEED: Semantics Enhanced Encoder-Decoder Framework for Scene Text Recognition\" \n- [vedastr](https:\u002F\u002Fgithub.com\u002FMedia-Smart\u002Fvedastr) - A scene text recognition toolbox based on PyTorch\n- [AutoSTR](https:\u002F\u002Fgithub.com\u002FAutoML-4Paradigm\u002FAutoSTR) - ([paper:2020](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2003.06567.pdf)) Efficient Backbone Search for Scene Text Recognition\n- [Decoupled-attention-network](https:\u002F\u002Fgithub.com\u002FWang-Tianwei\u002FDecoupled-attention-network) - ([paper:2019](https:\u002F\u002Farxiv.org\u002Fabs\u002F1912.10205)) Pytorch implementation for \"Decoupled attention network for text recognition\".\n- [Bi-STET](https:\u002F\u002Fgithub.com\u002FMauritsBleeker\u002FBi-STET) - ([paper:2020](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1912.03656.pdf)) Implementation of Bidirectional Scene Text Recognition with a Single Decoder\n- [kiss](https:\u002F\u002Fgithub.com\u002FBartzi\u002Fkiss) - ([paper:2019](https:\u002F\u002Farxiv.org\u002Fabs\u002F1911.08400)\n- [Deformable Text Recognition](https:\u002F\u002Fgithub.com\u002FAlpaca07\u002Fdtr) - ([paper:2019](https:\u002F\u002Fieeexplore.ieee.org\u002Fabstract\u002Fdocument\u002F9064428))\n- [MaskTextSpotter](https:\u002F\u002Fgithub.com\u002FMhLiao\u002FMaskTextSpotter) - ([paper:2019](https:\u002F\u002Fieeexplore.ieee.org\u002Fdocument\u002F8812908))\n- [CUTIE](https:\u002F\u002Fgithub.com\u002Fvsymbol\u002FCUTIE) - ([paper:2019](https:\u002F\u002Farxiv.org\u002Fabs\u002F1903.12363v4)\n- [AttentionOCR](https:\u002F\u002Fgithub.com\u002Fzhang0jhon\u002FAttentionOCR) - ([paper:2019](https:\u002F\u002Farxiv.org\u002Fabs\u002F1912.04561))\n- [crpn](https:\u002F\u002Fgithub.com\u002Fxhzdeng\u002Fcrpn) - ([paper:2019](https:\u002F\u002Farxiv.org\u002Fabs\u002F1804.02690))\n- [Scene-Text-Detection-with-SPECNET](https:\u002F\u002Fgithub.com\u002FAirBernard\u002FScene-Text-Detection-with-SPCNET) - Repository for Scene Text Detection with Supervised Pyramid Context Network with tensorflow.\n- [Character-Region-Awareness-for-Text-Detection](https:\u002F\u002Fgithub.com\u002FguruL\u002FCharacter-Region-Awareness-for-Text-Detection-)\n- [Real-time-Scene-Text-Detection-and-Recognition-System](https:\u002F\u002Fgithub.com\u002Ffnzhan\u002FReal-time-Scene-Text-Detection-and-Recognition-System) - End-to-end pipeline for real-time scene text detection and recognition.\n- [ocr_attention](https:\u002F\u002Fgithub.com\u002Fmarvis\u002Focr_attention) - Robust Scene Text Recognition with Automatic Rectification.\n- [masktextspotter.caffee2](https:\u002F\u002Fgithub.com\u002Flvpengyuan\u002Fmasktextspotter.caffe2) - The code of \"Mask TextSpotter: An End-to-End Trainable Neural Network for Spotting Text with Arbitrary Shapes\".\n- [InceptText-Tensorflow](https:\u002F\u002Fgithub.com\u002Fxieyufei1993\u002FInceptText-Tensorflow) - An Implementation of the alogrithm in paper IncepText: A New Inception-Text Module with Deformable PSROI Pooling for Multi-Oriented Scene Text Detection.\n- [textspotter](https:\u002F\u002Fgithub.com\u002Ftonghe90\u002Ftextspotter) - An End-to-End TextSpotter with Explicit Alignment and Attention\n- [RRD](https:\u002F\u002Fgithub.com\u002FMhLiao\u002FRRD) - RRD: Rotation-Sensitive Regression for Oriented Scene Text Detection.\n- [crpn](https:\u002F\u002Fgithub.com\u002Fxhzdeng\u002Fcrpn) - Corner-based Region Proposal Network.\n- [SSTDNet](https:\u002F\u002Fgithub.com\u002FHotaekHan\u002FSSTDNet) - Implement 'Single Shot Text Detector with Regional Attention, ICCV 2017 Spotlight'.\n- [R2CNN](https:\u002F\u002Fgithub.com\u002Fbeacandler\u002FR2CNN) - caffe re-implementation of R2CNN: Rotational Region CNN for Orientation Robust Scene Text Detection.\n- [RRPN](https:\u002F\u002Fgithub.com\u002Fmjq11302010044\u002FRRPN) - Source code of RRPN ---- Arbitrary-Oriented Scene Text Detection via Rotation Proposals\n- [Tensorflow_SceneText_Oriented_Box_Predictor](https:\u002F\u002Fgithub.com\u002Fdafanghe\u002FTensorflow_SceneText_Oriented_Box_Predictor) - This project modify tensorflow object detection api code to predict oriented bounding boxes. It can be used for scene text detection.\n- [DeepSceneTextReader](https:\u002F\u002Fgithub.com\u002Fdafanghe\u002FDeepSceneTextReader) - This is a c++ project deploying a deep scene text reading pipeline with tensorflow. It reads text from natural scene images. It uses frozen tensorflow graphs. The detector detect scene text locations. The recognizer reads word from each detected bounding box.\n- [DeRPN](https:\u002F\u002Fgithub.com\u002FHCIILAB\u002FDeRPN) - A novel region proposal network for more general object detection ( including scene text detection ).\n- [Bartzi\u002Fsee](https:\u002F\u002Fgithub.com\u002FBartzi\u002Fsee) - SEE: Towards Semi-Supervised End-to-End Scene Text Recognition\n- [Bartzi\u002Fstn-ocr](https:\u002F\u002Fgithub.com\u002FBartzi\u002Fstn-ocr) - Code for the paper STN-OCR: A single Neural Network for Text Detection and Text Recognition\n- [beacandler\u002FR2CNN](https:\u002F\u002Fgithub.com\u002Fbeacandler\u002FR2CNN) - caffe re-implementation of R2CNN: Rotational Region CNN for Orientation Robust Scene Text Detection\n- [HsiehYiChia\u002FScene-text-recognition](https:\u002F\u002Fgithub.com\u002FHsiehYiChia\u002FScene-text-recognition) - Scene text detection and recognition based on Extremal Region(ER)\n- [R2CNN_Faster-RCNN_Tensorflow](https:\u002F\u002Fgithub.com\u002FDetectionTeamUCAS\u002FR2CNN_Faster-RCNN_Tensorflow) - Rotational region detection based on Faster-RCNN.\n- [corner](https:\u002F\u002Fgithub.com\u002Flvpengyuan\u002Fcorner) - Multi-Oriented Scene Text Detection via Corner Localization and Region Segmentation\n- [Corner_Segmentation_TextDetection](https:\u002F\u002Fgithub.com\u002FJK-Rao\u002FCorner_Segmentation_TextDetection) - Multi-Oriented Scene Text Detection via Corner Localization and Region Segmentation.\n- [TextSnake.pytorch](https:\u002F\u002Fgithub.com\u002Fprincewang1994\u002FTextSnake.pytorch) - A PyTorch implementation of ECCV2018 Paper: TextSnake: A Flexible Representation for Detecting Text of Arbitrary Shapes\n- [AON](https:\u002F\u002Fgithub.com\u002Fhuizhang0110\u002FAON) - Implementation for CVPR 2018 text recognition Paper by Tensorflow: \"AON: Towards Arbitrarily-Oriented Text Recognition\"\n- [pixel_link](https:\u002F\u002Fgithub.com\u002FZJULearning\u002Fpixel_link) - Implementation of our paper 'PixelLink: Detecting Scene Text via Instance Segmentation' in AAAI2018\n- [seglink](https:\u002F\u002Fgithub.com\u002Fdengdan\u002Fseglink) - An Implementation of the seglink alogrithm in paper Detecting Oriented Text in Natural Images by Linking Segments (=> pixe_link)\n- [SSTD](https:\u002F\u002Fgithub.com\u002FBestSonny\u002FSSTD) - Single Shot Text Detector with Regional Attention\n- [MORAN_v2](https:\u002F\u002Fgithub.com\u002FCanjie-Luo\u002FMORAN_v2) - MORAN: A Multi-Object Rectified Attention Network for Scene Text Recognition\n- [Curve-Text-Detector](https:\u002F\u002Fgithub.com\u002FYuliang-Liu\u002FCurve-Text-Detector) - This repository provides train＆test code, dataset, det.&rec. annotation, evaluation script, annotation tool, and ranking table.\n- [HCIILAB\u002FDeRPN](https:\u002F\u002Fgithub.com\u002FHCIILAB\u002FDeRPN) - A novel region proposal network for more general object detection ( including scene text detection ).\n- [TextField](https:\u002F\u002Fgithub.com\u002FYukangWang\u002FTextField) - TextField: Learning A Deep Direction Field for Irregular Scene Text Detection (TIP 2019)\n- [tensorflow-TextMountain](https:\u002F\u002Fgithub.com\u002Fliny23\u002Ftensorflow-TextMountain) - TextMountain: Accurate Scene Text Detection via Instance Segmentation\n- [Bartzi\u002Fsee](https:\u002F\u002Fgithub.com\u002FBartzi\u002Fsee) - Code for the AAAI 2018 publication \"SEE: Towards Semi-Supervised End-to-End Scene Text Recognition\"\n- [bgshih\u002Faster](https:\u002F\u002Fgithub.com\u002Fbgshih\u002Faster) - Recognizing cropped text in natural images.\n- [ReceiptParser](https:\u002F\u002Fgithub.com\u002FReceiptManager\u002Freceipt-parser) - A fuzzy receipt parser written in Python.\n- [vedastr](https:\u002F\u002Fgithub.com\u002FMedia-Smart\u002Fvedastr)\n\n### ABCNet [paper:2020](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2002.10200.pdf)\n- [AdelaiDet](https:\u002F\u002Fgithub.com\u002Faim-uofa\u002FAdelaiDet)\n- https:\u002F\u002Fgithub.com\u002FYuliang-Liu\u002Fbezier_curve_text_spotting\n- https:\u002F\u002Fgithub.com\u002Fquangvy2703\u002FABCNet-ESRGAN-SRTEXT\n- https:\u002F\u002Fgithub.com\u002FPxtri2156\u002FAdelaiDet_v2\n- https:\u002F\u002Fgithub.com\u002FzhubinQAQ\u002FIns\n\n### CRAFT [paper:2019](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1904.01941.pdf)\n- [CRAFT-pytorch (official)](https:\u002F\u002Fgithub.com\u002Fclovaai\u002FCRAFT-pytorch) - Pytorch implementation of CRAFT text detector.\n- [autonise\u002FCRAFT-Remade](https:\u002F\u002Fgithub.com\u002Fautonise\u002FCRAFT-Remade)\n- [s3nh\u002Fpytorch-text-recognition](https:\u002F\u002Fgithub.com\u002Fs3nh\u002Fpytorch-text-recognition) \n- [backtime92\u002FCRAFT-Reimplementation](https:\u002F\u002Fgithub.com\u002Fbacktime92\u002FCRAFT-Reimplementation)\n- [fcakyon\u002Fcraft-text-detector](https:\u002F\u002Fgithub.com\u002Ffcakyon\u002Fcraft-text-detector) - PyTorch implementation of CRAFT\n- [YongWookHa\u002Fcraft-text-detector](https:\u002F\u002Fgithub.com\u002FYongWookHa\u002Fcraft-text-detector)\n- [faustomorales\u002Fkeras-ocr](https:\u002F\u002Fgithub.com\u002Ffaustomorales\u002Fkeras-ocr) - A packaged and flexible version of the CRAFT text detector and Keras CRNN recognition model.\n- [fcakyon\u002Fcraft-text-detector](https:\u002F\u002Fgithub.com\u002Ffcakyon\u002Fcraft-text-detector)\n\n### FOTS [paper:2018](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1801.01671.pdf)\n- [FOTS](https:\u002F\u002Fgithub.com\u002Fxieyufei1993\u002FFOTS) - An Implementation of the FOTS: Fast Oriented Text Spotting with a Unified Network.\n- [FOTS_OCR](https:\u002F\u002Fgithub.com\u002FMasao-Taketani\u002FFOTS_OCR) \n\n### TextBoxes++ [paper:2018](https:\u002F\u002Farxiv.org\u002Fabs\u002F1801.02765)\n- [TextBoxes_plusplus (offical)](https:\u002F\u002Fgithub.com\u002FMhLiao\u002FTextBoxes_plusplus) TextBoxes++: A Single-Shot Oriented Scene Text Detector\n- [Shun14\u002FTextBoxes_plusplus_Tensorflo](https:\u002F\u002Fgithub.com\u002FShun14\u002FTextBoxes_plusplus_Tensorflow) - Textboxes_plusplus implementation with Tensorflow (python)\n\n### PSENet [paper:2018](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1806.02559.pdf)\n- [tensorflow_PSENet](https:\u002F\u002Fgithub.com\u002Fliuheng92\u002Ftensorflow_PSENet) - This is a tensorflow re-implementation of PSENet: Shape Robust Text Detection with Progressive Scale Expansion Network\n- [PAN-PSEnet](https:\u002F\u002Fgithub.com\u002Frahzaazhar\u002FPAN-PSEnet)\n- [PSENet](https:\u002F\u002Fgithub.com\u002Fwhai362\u002FPSENet) - Shape Robust Text Detection with Progressive Scale Expansion Network.\n\n### EAST [paper:2017](https:\u002F\u002Farxiv.org\u002Fabs\u002F1704.03155)\n- [EAST](https:\u002F\u002Fgithub.com\u002Fargman\u002FEAST)(official) - (tf1\u002Fpy2) A tensorflow implementation of EAST text detector\n- [AdvancedEAST](https:\u002F\u002Fgithub.com\u002Fhuoyijie\u002FAdvancedEAST) - (tf1\u002Fpy2) AdvancedEAST is an algorithm used for Scene image text detect, which is primarily based on EAST, and the significant improvement was also made, which make long text predictions more accurate.\n- [kurapan\u002FEAST](https:\u002F\u002Fgithub.com\u002Fkurapan\u002FEAST) Implementation of EAST scene text detector in Keras\n- [songdejia\u002FEAST](https:\u002F\u002Fgithub.com\u002Fsongdejia\u002FEAST) - This is a pytorch re-implementation of EAST: An Efficient and Accurate Scene Text Detector.\n- [HaozhengLi\u002FEAST_ICPR](https:\u002F\u002Fgithub.com\u002FHaozhengLi\u002FEAST_ICPR) - Forked from argman\u002FEAST for the ICPR MTWI 2018 CHALLENGE\n- [deepthinking-qichao\u002FEAST_ICPR2018](https:\u002F\u002Fgithub.com\u002Fdeepthinking-qichao\u002FEAST_ICPR2018)\n- [SakuraRiven\u002FEAST](https:\u002F\u002Fgithub.com\u002FSakuraRiven\u002FEAST)\n- [EAST-Detector-for-text-detection-using-OpenCV](https:\u002F\u002Fgithub.com\u002FZER-0-NE\u002FEAST-Detector-for-text-detection-using-OpenCV) - Text Detection from images using OpenCV\n- [easy-EAST](https:\u002F\u002Fgithub.com\u002Fche220\u002Feasy-EAST)\n\n### TextBoxes [paper:2016](https:\u002F\u002Farxiv.org\u002Fabs\u002F1611.06779)\n- [TextBoxes (official)](https:\u002F\u002Fgithub.com\u002FMhLiao\u002FTextBoxes)\n- [TextBoxes-TensorFlow](https:\u002F\u002Fgithub.com\u002Fgxd1994\u002FTextBoxes-TensorFlow) - TextBoxes re-implement using tensorflow\n- [zj463261929\u002FTextBoxes](https:\u002F\u002Fgithub.com\u002Fzj463261929\u002FTextBoxes) - TextBoxes: A Fast Text Detector with a Single Deep Neural Network\n- [shinjayne\u002Ftextboxes](https:\u002F\u002Fgithub.com\u002Fshinjayne\u002Ftextboxes) - Textboxes implementation with Tensorflow (python)\n- [shinTB](https:\u002F\u002Fgithub.com\u002Fshinjayne\u002FshinTB) - Textboxes : Image Text Detection Model : python package (tensorflow)\n\n### CTPN [paper:2016](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1609.03605.pdf)\n- [text-detection-ctpn](https:\u002F\u002Fgithub.com\u002Feragonruan\u002Ftext-detection-ctpn)\n- [yizt\u002Fkeras-ctpn](https:\u002F\u002Fgithub.com\u002Fyizt\u002Fkeras-ctpn)\n- [tianzhi0549\u002FCTPN](https:\u002F\u002Fgithub.com\u002Ftianzhi0549\u002FCTPN) - Detecting Text in Natural Image with Connectionist Text Proposal Network\n\n## Video Text Spotting\n- [VideoTextSCM](https:\u002F\u002Fgithub.com\u002Flsabrinax\u002FVideoTextSCM)\n- [TransDETR](https:\u002F\u002Fgithub.com\u002Fweijiawu\u002FTransDETR)\n- [YORO](https:\u002F\u002Fgithub.com\u002Fhikopensource\u002FDAVAR-Lab-OCR\u002Ftree\u002Fmain\u002Fdemo\u002Fvideotext\u002Fyoro) ([paper:2021](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1903.03299.pdf))\n\n## Font detection\n\n- [typefont](https:\u002F\u002Fgithub.com\u002FVasile-Peste\u002FTypefont) - The first open-source library that detects the font of a text in a image. \n\n## Optical Character Recognition Engines and Frameworks\n\n- [texify](https:\u002F\u002Fgithub.com\u002FVikParuchuri\u002Ftexify) - OCR model for math that outputs LaTeX and markdown.\n- [DAVAR-lab-OCR](https:\u002F\u002Fgithub.com\u002Fhikopensource\u002Fdavar-lab-ocr)\n- [CRNN.tf2](https:\u002F\u002Fgithub.com\u002FFLming\u002FCRNN.tf2)\n- [ocr.pytorch](https:\u002F\u002Fgithub.com\u002Fcourao\u002Focr.pytorch)\n- [PytorchOCR](https:\u002F\u002Fgithub.com\u002FWenmuZhou\u002FPytorchOCR)\n- [MMOCR](https:\u002F\u002Fgithub.com\u002Fopen-mmlab\u002Fmmocr)\n- [doctr](https:\u002F\u002Fgithub.com\u002Fmindee\u002Fdoctr)\n- [Master OCR](https:\u002F\u002Fgithub.com\u002Fjiangxiluning\u002FMASTER-TF)\n- [xiaofengShi\u002FCHINESE-OCR](https:\u002F\u002Fgithub.com\u002FxiaofengShi\u002FCHINESE-OCR)\n- [PaddleOCR](https:\u002F\u002Fgithub.com\u002FPaddlePaddle\u002FPaddleOCR)\n- [Urdu-Ocr](https:\u002F\u002Fgithub.com\u002FHassamChundrigar\u002FUrdu-Ocr)\n- [ocr.pytorch](https:\u002F\u002Fgithub.com\u002Fcourao\u002Focr.pytorch)\n- [ocular](https:\u002F\u002Fgithub.com\u002Fndnlp\u002Focular) - Ocular is a state-of-the-art historical OCR system. \n- [OCR++](https:\u002F\u002Fgithub.com\u002Fmayank4490\u002FOCR-plus-plus)\n- [pytextrator](https:\u002F\u002Fgithub.com\u002Fdanwald\u002Fpytextractor) - python ocr using tesseract\u002F with EAST opencv detector \n- [OCR-D](https:\u002F\u002Focr-d.github.io\u002F)\n- [ocrd_tesserocr](https:\u002F\u002Fgithub.com\u002FOCR-D\u002Focrd_tesserocr)\n- [Deeplearning-OCR](https:\u002F\u002Fgithub.com\u002Fvinayakkailas\u002FDeeplearning-OCR)\n- [PICCL](https:\u002F\u002Fgithub.com\u002FLanguageMachines\u002FPICCL)\n- [cnn_lstm_ctc_ocr](https:\u002F\u002Fgithub.com\u002Fweinman\u002Fcnn_lstm_ctc_ocr) - Tensorflow-based CNN+LSTM trained with CTC-loss for OCR.\n- [PassportScanner](https:\u002F\u002Fgithub.com\u002Fevermeer\u002FPassportScanner) - Scan the MRZ code of a passport and extract the firstname, lastname, passport number, nationality, date of birth, expiration date and personal numer.\n- [pannous\u002Ftensorflow-ocr](https:\u002F\u002Fgithub.com\u002Fpannous\u002Ftensorflow-ocr) - OCR using tensorflow with attention.\n- [BowieHsu\u002Ftensorflow_ocr](https:\u002F\u002Fgithub.com\u002FBowieHsu\u002Ftensorflow_ocr) - OCR detection implement with tensorflow v1.4.\n- [GRCNN-for-OCR](https:\u002F\u002Fgithub.com\u002FJianfeng1991\u002FGRCNN-for-OCR) - This is the implementation of the paper \"Gated Recurrent Convolution Neural Network for OCR\"\n- [go-ocr](https:\u002F\u002Fgithub.com\u002Fmaxim2266\u002Fgo-ocr) - A tool for extracting text from scanned documents (via OCR), with user-defined post-processing.\n- [insightocr](https:\u002F\u002Fgithub.com\u002Fdeepinsight\u002Finsightocr) - MXNet OCR implementation. Including text recognition and detection.\n- [ocr_densenet](https:\u002F\u002Fgithub.com\u002Fyinchangchang\u002Focr_densenet) - The first Xi'an Jiaotong University Artificial Intelligence Practice Contest (2018AI Practice Contest - Picture Text Recognition) first; only use the densenet to identify the Chinese characters\n- [CNN_LSTM_CTC_Tensorflow](https:\u002F\u002Fgithub.com\u002Fwatsonyanghx\u002FCNN_LSTM_CTC_Tensorflow) - CNN+LSTM+CTC based OCR implemented using tensorflow.\n- [tmbdev\u002Fclstm](https:\u002F\u002Fgithub.com\u002Ftmbdev\u002Fclstm) - A small C++ implementation of LSTM networks, focused on OCR.\n- [VistaOCR](https:\u002F\u002Fgithub.com\u002Fisi-vista\u002FVistaOCR)\n- [tesseract.js](https:\u002F\u002Fgithub.com\u002Fnaptha\u002Ftesseract.js)\n- [Tesseract](https:\u002F\u002Fgithub.com\u002Ftesseract-ocr\u002Ftesseract)\n- [kaldi](https:\u002F\u002Fgithub.com\u002Fkaldi-asr\u002Fkaldi)\n- [ocropus3](https:\u002F\u002Fgithub.com\u002FNVlabs\u002Focropus3) - Repository collecting all the submodules for the new PyTorch-based OCR System.\n- [calamari](https:\u002F\u002Fgithub.com\u002FCalamari-OCR\u002Fcalamari)\n- [ocropy](https:\u002F\u002Fgithub.com\u002Ftmbdev\u002Focropy) - Python-based tools for document analysis and OCR\n- [chinese_ocr](https:\u002F\u002Fgithub.com\u002FYCG09\u002Fchinese_ocr)\n- [deep_ocr](https:\u002F\u002Fgithub.com\u002FJinpengLI\u002Fdeep_ocr) - make a better chinese character recognition OCR than tesseract.\n- [ocular](https:\u002F\u002Fgithub.com\u002Ftberg12\u002Focular)\n- [textDetectionWithScriptID](https:\u002F\u002Fgithub.com\u002Fisi-vista\u002FtextDetectionWithScriptID)\n- [transcribus](https:\u002F\u002Ftranskribus.eu\u002FTranskribus\u002F)\n- [FastText](https:\u002F\u002Ffasttext.cc\u002F) - Library for efficient text classification and representation learning\n- [GOCR](http:\u002F\u002Fwww-e.uni-magdeburg.de\u002Fjschulen\u002Focr\u002F)\n- [Ocrad](https:\u002F\u002Fwww.gnu.org\u002Fsoftware\u002Focrad\u002F)\n- [franc](https:\u002F\u002Fgithub.com\u002Fwooorm\u002Ffranc) - Natural language detection\n- [ocrfeeder](https:\u002F\u002Fgithub.com\u002FGNOME\u002Focrfeeder)\n- [emedvedev\u002Fattention-ocr](https:\u002F\u002Fgithub.com\u002Femedvedev\u002Fattention-ocr) - A Tensorflow model for text recognition (CNN + seq2seq with visual attention) available as a Python package and compatible with Google Cloud ML Engine.\n- [da03\u002Fattention-ocr](https:\u002F\u002Fgithub.com\u002Fda03\u002FAttention-OCR) - Visual Attention based OCR\n- [dhlab-epfl\u002FdhSegment](https:\u002F\u002Fgithub.com\u002Fdhlab-epfl\u002FdhSegment) - Generic framework for historical document processing\n- https:\u002F\u002Fgithub.com\u002Fmawanda-jun\u002FTableTrainNet\n- https:\u002F\u002Fgithub.com\u002Fkermitt2\u002Fdelft\n- https:\u002F\u002Fgithub.com\u002Fchulwoopack\u002Fdocstrum\n- [grobid](https:\u002F\u002Fgithub.com\u002Fkermitt2\u002Fgrobid) - A machine learning software for extracting information from scholarly documents\n- [lapdftext](http:\u002F\u002Fbmkeg.github.io\u002Flapdftext\u002F) - LA-PDFText is a system for extracting accurate text from PDF-based research articles\n- https:\u002F\u002Fgithub.com\u002Fberatkurar\u002Ftextline-segmentation-using-fcn\n- https:\u002F\u002Fgithub.com\u002FOCR4all\n- https:\u002F\u002Fgithub.com\u002FOCR4all\u002FLAREX\n- https:\u002F\u002Fgithub.com\u002FOCR4all\u002FOCR4all\n- https:\u002F\u002Fgithub.com\u002Fandbue\u002Fnashi\n- http:\u002F\u002Fkraken.re\u002F\n- [kraken](https:\u002F\u002Fgithub.com\u002Fmittagessen\u002Fkraken)\n- [gosseract](https:\u002F\u002Fgithub.com\u002Fotiai10\u002Fgosseract) - Go package for OCR (Optical Character Recognition), by using Tesseract C++ library.\n- [EasyOCR](https:\u002F\u002Fgithub.com\u002FJaidedAI\u002FEasyOCR) - Ready-to-use OCR with 40+ languages supported including Chinese, Japanese, Korean and Thai.\n- [invoice-scanner-react-native](https:\u002F\u002Fgithub.com\u002Fburhanuday\u002Finvoice-scanner-react-native)\n- [Arabic-OCR](https:\u002F\u002Fgithub.com\u002FHusseinYoussef\u002FArabic-OCR)\n\n## Awesome lists\n- https:\u002F\u002Fgithub.com\u002Fwhitelok\u002Fimage-text-localization-recognition\n- [Awesome-Scene-Text-Recognition](https:\u002F\u002Fgithub.com\u002Fchongyangtao\u002FAwesome-Scene-Text-Recognition) - \nA curated list of resources dedicated to scene text localization and recognition\n- [awesome-deep-text-detection-recognition](https:\u002F\u002Fgithub.com\u002Fhwalsuklee\u002Fawesome-deep-text-detection-recognition)\n- https:\u002F\u002Fgithub.com\u002Fkurapan\u002Fawesome-scene-text\n- [kba\u002Fawesome-ocr](https:\u002F\u002Fgithub.com\u002Fkba\u002Fawesome-ocr)\n- [perfectspr\u002Fawesome-ocr](https:\u002F\u002Fgithub.com\u002Fperfectspr\u002Fawesome-ocr)\n- https:\u002F\u002Fgithub.com\u002FZumingHuang\u002Fawesome-ocr-resources\n- https:\u002F\u002Fgithub.com\u002Fchongyangtao\u002FAwesome-Scene-Text-Recognition\n- https:\u002F\u002Fgithub.com\u002Fwhitelok\u002Fimage-text-localization-recognition\n- https:\u002F\u002Fgithub.com\u002Fhwalsuklee\u002Fawesome-deep-text-detection-recognition\n- https:\u002F\u002Fgithub.com\u002Fwanghaisheng\u002Fawesome-ocr\n- https:\u002F\u002Fgithub.com\u002FJyouhou\u002FSceneTextPapers\n- https:\u002F\u002Fgithub.com\u002Fjyhengcoder\u002FmyOCR\n- https:\u002F\u002Fgithub.com\u002Fhwalsuklee\u002Fawesome-deep-text-detection-recognition\n- https:\u002F\u002Fgithub.com\u002Ftangzhenyu\u002FScene-Text-Understanding\n- https:\u002F\u002Fgithub.com\u002Fwhitelok\u002Fimage-text-localization-recognition\n- https:\u002F\u002Fgithub.com\u002Fkba\u002Fawesome-ocr\n- https:\u002F\u002Fgithub.com\u002Fsoumendra\u002Fawesome-ocr\n- [chongyangtao\u002FAwesome-Scene-Text-Recognition](https:\u002F\u002Fgithub.com\u002Fchongyangtao\u002FAwesome-Scene-Text-Recognition) - Papers and datasets\n\n## Proprietary OCR Engines\n- [ABBYY](https:\u002F\u002Fwww.abbyy.com\u002Fen-us\u002F)\n- [Omnipage](https:\u002F\u002Fwww.nuance.com\u002Fprint-capture-and-pdf-solutions.html)\n- [Clova.ai](https:\u002F\u002Fdemo.ocr.clova.ai\u002F)\n- [Konfuzio](https:\u002F\u002Fkonfuzio.com\u002Fen\u002F)\n\n## Cloud based OCR Engines (SaaS)\n- [thehive.ai](https:\u002F\u002Fthehive.ai\u002Fhive-ocr-solutions)\n- [impira](https:\u002F\u002Fwww.impira.com\u002Ftry\u002Fsmarter-ocr)\n- [AWS Textract](https:\u002F\u002Faws.amazon.com\u002Ftextract\u002F)\n- [Nanonets](https:\u002F\u002Fnanonets.com\u002Focr-api\u002F)\n- [docparser](https:\u002F\u002Fdocparser.com\u002F\n- [ocrolus](https:\u002F\u002Fwww.ocrolus.com\u002F)\n- [Butler Labs](https:\u002F\u002Fwww.butlerlabs.ai\u002F)\n\n## File formats and tools\n- [nw-page-editor](https:\u002F\u002Fgithub.com\u002Fmauvilsa\u002Fnw-page-editor) - Simple app for visual editing of Page XML files\n- [hocr](http:\u002F\u002Fkba.cloud\u002Fhocr-spec\u002F1.2\u002F)\n- [alto](https:\u002F\u002Fgithub.com\u002Faltoxml)\n- [PageXML](https:\u002F\u002Fgithub.com\u002FPRImA-Research-Lab\u002FPAGE-XML)\n- [ocr-fileformat](https:\u002F\u002Fgithub.com\u002FUB-Mannheim\u002Focr-fileformat) - Validate and transform various OCR file formats\n- [hocr-tools](https:\u002F\u002Fgithub.com\u002Ftmbdev\u002Fhocr-tools) - Tools for manipulating and evaluating the hOCR format for representing multi-lingual OCR results by embedding them into HTML.\n\n## Datasets\n- http:\u002F\u002Fwww.iapr-tc11.org\u002Fmediawiki\u002Findex.php\u002FDatasets_List\n- https:\u002F\u002Ficdar2019.org\u002Fcompetitions-2\u002F\n- https:\u002F\u002Frrc.cvc.uab.es\u002F#\n- https:\u002F\u002Flionbridge.ai\u002Fdatasets\u002F15-best-ocr-handwriting-datasets\u002F\n- https:\u002F\u002Fgithub.com\u002Fxylcbd\u002Focr-open-dataset\n- ICDAR datasets\n- https:\u002F\u002Fgithub.com\u002FOpenArabic\u002FOCR_GS_Data\n- https:\u002F\u002Fgithub.com\u002Fcs-chan\u002FTotal-Text-Dataset\n- [scenetext](http:\u002F\u002Fwww.robots.ox.ac.uk\u002F~vgg\u002Fdata\u002Fscenetext\u002F) - This is a synthetically generated dataset, in which word instances are placed in natural scene images, while taking into account the scene layout.\n- [Total-Text-Dataset](https:\u002F\u002Fgithub.com\u002Fcs-chan\u002FTotal-Text-Dataset)\n- [ocr-open-dataset](https:\u002F\u002Fgithub.com\u002Fxylcbd\u002Focr-open-dataset)\n\n## Data augmentation and Synthetic data generation\n- [DocCreator](http:\u002F\u002Fdoc-creator.labri.fr\u002F) - DIAR software for synthetic document image and groundtruth generation, with various degradation models for data augmentation.\n- [Scene-Text-Image-Transformer](https:\u002F\u002Fgithub.com\u002FCanjie-Luo\u002FScene-Text-Image-Transformer) - Scene Text Image Transformer\n- [Belval\u002FTextRecognitionDataGenerator](https:\u002F\u002Fgithub.com\u002FBelval\u002FTextRecognitionDataGenerator) - A synthetic data generator for text recognition\n- [Sanster\u002Ftext_renderer](https:\u002F\u002Fgithub.com\u002FSanster\u002Ftext_renderer)\n- [awesome-SynthText](https:\u002F\u002Fgithub.com\u002FTianzhongSong\u002Fawesome-SynthText)\n- [Text-Image-Augmentation](https:\u002F\u002Fgithub.com\u002FCanjie-Luo\u002FText-Image-Augmentation)\n- [UnrealText](https:\u002F\u002Fgithub.com\u002FJyouhou\u002FUnrealText)\n- [SynthText_Chinese_version](https:\u002F\u002Fgithub.com\u002FJarveeLee\u002FSynthText_Chinese_version)\n\n## Pre OCR Processing\n- [ajgalleo\u002Fdocument-image-binarization](https:\u002F\u002Fgithub.com\u002Fajgallego\u002Fdocument-image-binarization)\n- [PRLib](https:\u002F\u002Fgithub.com\u002Fleha-bot\u002FPRLib) - Pre-Recognize Library - library with algorithms for improving OCR quality.\n- [sbb_binarization](https:\u002F\u002Fgithub.com\u002Fqurator-spk\u002Fsbb_binarization) - \n\n## Post OCR Correction\n- [KBNLresearch\u002Fochre](https:\u002F\u002Fgithub.com\u002FKBNLresearch\u002Fochre) - Toolbox for OCR post-correction\n- [cisocrgroup\u002FPoCoTo](https:\u002F\u002Fgithub.com\u002Fcisocrgroup\u002FPoCoTo) - The CIS OCR PostCorrectionTool\n- [afterscan](http:\u002F\u002Fwww.afterscan.com\u002F) \n\n## Benchmarks\n- [TedEval](https:\u002F\u002Fgithub.com\u002Fclovaai\u002FTedEval)\n- [clovaai\u002Fdeep-text-recognition-benchmark](https:\u002F\u002Fgithub.com\u002Fclovaai\u002Fdeep-text-recognition-benchmark) - Text recognition (optical character recognition) with deep learning methods.\n- [dinglehopper](https:\u002F\u002Fgithub.com\u002Fqurator-spk\u002Fdinglehopper) - dinglehopper is an OCR evaluation tool and reads ALTO, PAGE and text files.\n- [CLEval](https:\u002F\u002Fgithub.com\u002Fclovaai\u002FCLEval)\n\n## misc\n- [ocrodeg](https:\u002F\u002Fgithub.com\u002FNVlabs\u002Focrodeg) - a small Python library implementing document image degradation for data augmentation for handwriting recognition and OCR applications.\n- [scantailor](https:\u002F\u002Fgithub.com\u002Fscantailor\u002Fscantailor) - Scan Tailor is an interactive post-processing tool for scanned pages.\n- [jlsutherland\u002Fdoc2text](https:\u002F\u002Fgithub.com\u002Fjlsutherland\u002Fdoc2text) - help researchers fix these errors and extract the highest quality text from their pdfs as possible.\n- [mauvilsa\u002Fnw-page-editor](https:\u002F\u002Fgithub.com\u002Fmauvilsa\u002Fnw-page-editor) - Simple app for visual editing of Page XML files.\n- [Transkribus](https:\u002F\u002Ftranskribus.eu\u002FTranskribus\u002F) - Transkribus is a comprehensive platform for the digitisation, AI-powered recognition, transcription and searching of historical documents.\n- http:\u002F\u002Fprojectnaptha.com\u002F\n- https:\u002F\u002Fgithub.com\u002F4lex4\u002Fscantailor-advanced\n- [open-semantic-search](https:\u002F\u002Fgithub.com\u002Fopensemanticsearch\u002Fopen-semantic-search) - Open Semantic Search Engine and Open Source Text Mining & Text Analytics platform (Integrates ETL for document processing, OCR for images & PDF, named entity recognition for persons, organizations & locations, metadata management by thesaurus & ontologies, search user interface & search apps for fulltext search, faceted search & knowledge graph)\n- [ocrserver](https:\u002F\u002Fgithub.com\u002Fotiai10\u002Focrserver) - A simple OCR API server, seriously easy to be deployed by Docker, on Heroku as well\n- [cosc428-structor](https:\u002F\u002Fgithub.com\u002Fchadoliver\u002Fcosc428-structor) - ~1000 book pages + OpenCV + python = page regions identified as paragraphs, lines, images, captions, etc.\n- [nidaba](https:\u002F\u002Fgithub.com\u002Fopenphilology\u002Fnidaba\u002F) - An expandable and scalable OCR pipeline\n- https:\u002F\u002Fgithub.com\u002FMaybeShewill-CV\u002FCRNN_Tensorflow\n- [OCRmyPDF](https:\u002F\u002Fgithub.com\u002Fjbarlow83\u002FOCRmyPDF)\n","# 令人惊叹的OCR\n\n## 校正倾斜与去卷曲\n\n- [jdeskew](https:\u002F\u002Fgithub.com\u002Fphamquiluan\u002Fjdeskew) ([论文:2022](https:\u002F\u002Fwww.researchgate.net\u002Fpublication\u002F364320913_ADAPTIVE_RADIAL_PROJECTION_ON_FOURIER_MAGNITUDE_SPECTRUM_FOR_DOCUMENT_IMAGE_SKEW_ESTIMATION)) - 基于傅里叶幅度谱的自适应径向投影用于文档图像倾斜估计\n- [DewarpNet](https:\u002F\u002Fgithub.com\u002Fcvlab-stonybrook\u002FDewarpNet) ([论文:2019](https:\u002F\u002Fwww3.cs.stonybrook.edu\u002F~cvl\u002Fprojects\u002Fdewarpnet\u002Fstorage\u002Fpaper.pdf))\n- [MORAN_v2](https:\u002F\u002Fgithub.com\u002FCanjie-Luo\u002FMORAN_v2) ([论文:2019](https:\u002F\u002Farxiv.org\u002Fabs\u002F1901.03003)) - 用于场景文本识别的多目标校正注意力网络\n- [thomasjhaung\u002Fdeep-learning-for-document-dewarping](https:\u002F\u002Fgithub.com\u002Fthomasjhuang\u002Fdeep-learning-for-document-dewarping) 高分辨率GAN在扰动文档图像去卷曲中的应用\n- [unproject_text](https:\u002F\u002Fgithub.com\u002Fmzucker\u002Funproject_text) - 使用变换后的椭圆进行文本透视恢复\n- [unpaper](https:\u002F\u002Fgithub.com\u002FFlameeyes\u002Funpaper) - 一种用于扫描纸张的后处理工具，尤其适用于从先前复印件扫描而来的书页。\n- [deskew](https:\u002F\u002Fgithub.com\u002Fsbrunner\u002Fdeskew) - 用于校正扫描文档倾斜的库\n- [deskewing](https:\u002F\u002Fgithub.com\u002Fsauravbiswasiupr\u002Fdeskewing) - 包含使用MLP、LSTM和LLS变换来校正图像倾斜的代码\n- [skew_correction](https:\u002F\u002Fgithub.com\u002Fprajwalmylar\u002Fskew_correction) - 通过Canny边缘检测找到偏差，从而对倾斜内容的图像进行去倾斜处理。\n- [page_dewarp](https:\u002F\u002Fgithub.com\u002Fmzucker\u002Fpage_dewarp) (2016) - 使用“立方体片”模型进行页面去卷曲和阈值化\n- [text_deskewing](https:\u002F\u002Fgithub.com\u002Fdehaisea\u002Ftext_deskewing) - 如果文本图像不直，则将其旋转以提高文本检测和识别效果。\n- [galfar\u002Fdeskew](https:\u002F\u002Fgithub.com\u002Fgalfar\u002Fdeskew) - Deskew是一个用于校正扫描文本文档倾斜的命令行工具。它使用霍夫变换检测图像中的“文本行”。输出结果是旋转后的图像，使文本行变为水平。\n- [xellows1305\u002FDocument-Image-Dewarping](https:\u002F\u002Fgithub.com\u002Fxellows1305\u002FDocument-Image-Dewarping) - 没有代码 :(\n- https:\u002F\u002Fgithub.com\u002FRaymondMcGuire\u002FBOOK-CONTENT-SEGMENTATION-AND-DEWARPING\n- [Docuwarp](https:\u002F\u002Fgithub.com\u002Fthomasjhuang\u002Fdeep-learning-for-document-dewarping)\n- [Alyn](https:\u002F\u002Fgithub.com\u002Fkakul\u002FAlyn)\n\n\n## 分割\n\n### 行分割\n- [ARU-Net](https:\u002F\u002Fgithub.com\u002FTobiasGruening\u002FARU-Net) - 深度学习中文分词\n- [sbb_textline_detector](https:\u002F\u002Fgithub.com\u002Fqurator-spk\u002Fsbb_textline_detector)\n\n### 字符分割\n- [watersink\u002FCharacter-Segmentation](https:\u002F\u002Fgithub.com\u002Fwatersink\u002FCharacter-Segmentation)\n- [sharatsawhney\u002Fcharacter_segmentation](https:\u002F\u002Fgithub.com\u002Fsharatsawhney\u002Fcharacter_segmentation)\n\n### 单词分割\n- [githubharald\u002FWordSegmentation](https:\u002F\u002Fgithub.com\u002Fgithubharald\u002FWordSegmentation) 检测手写单词（基于经典图像处理的方法）。\n- [kcws](https:\u002F\u002Fgithub.com\u002Fkoth\u002Fkcws)\n\n### 文档分割\n- [LayoutParser](https:\u002F\u002Flayout-parser.github.io)\n- [eynollah](https:\u002F\u002Fgithub.com\u002Fqurator-spk\u002Feynollah)\n- [chulwoopack\u002Fdocstrum](https:\u002F\u002Fgithub.com\u002Fchulwoopack\u002Fdocstrum)\n- [LAREX](https:\u002F\u002Fgithub.com\u002FOCR4all\u002FLAREX) - LAREX是一款用于早期印刷书籍版面分析的半自动开源工具。\n- [leonlulu\u002FDeepLayout](https:\u002F\u002Fgithub.com\u002Fleonlulu\u002FDeepLayout) - 基于深度学习的页面版面分析\n- [dhSegment](https:\u002F\u002Fgithub.com\u002Fdhlab-epfl\u002FdhSegment)\n- [Pay20Y\u002FLayout_Analysis](https:\u002F\u002Fgithub.com\u002FPay20Y\u002FLayout_Analysis)\n- [rbaguila\u002Fdocument-layout-analysis](https:\u002F\u002Fgithub.com\u002Frbaguila\u002Fdocument-layout-analysis)\n- [P2PaLA](https:\u002F\u002Fgithub.com\u002Flquirosd\u002FP2PaLA) - 页面到页面的版面分析工具\n- [ocroseg](https:\u002F\u002Fgithub.com\u002FNVlabs\u002Focroseg\u002F) - 这是一个用于页面版面分析\u002F分割的深度学习模型。\n- [DIVA-DIA\u002FDIVA_Layout_Analysis_Evaluator](https:\u002F\u002Fgithub.com\u002FDIVA-DIA\u002FDIVA_Layout_Analysis_Evaluator) - 用于ICDAR 2017挑战性中世纪手稿版面分析竞赛的版面分析评估工具\n- [ocrsegment](https:\u002F\u002Fgithub.com\u002Fwatersink\u002Focrsegment) - 一个用于页面版面分析\u002F分割的深度学习模型。\n- [ARU-Net](https:\u002F\u002Fgithub.com\u002FTobiasGruening\u002FARU-Net)\n- [xy-cut-tree](https:\u002F\u002Fgithub.com\u002Fkavishgambhir\u002Fxy-cut-tree)\n- [ocrd_segment](https:\u002F\u002Fgithub.com\u002FOCR-D\u002Focrd_segment)\n- [LayoutML](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Funilm\u002Ftree\u002Fmaster\u002Flayoutlm)\n- [LayoutLMv2](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Funilm\u002Ftree\u002Fmaster\u002Flayoutlmv2)\n- [eynollah](https:\u002F\u002Fgithub.com\u002Fqurator-spk\u002Feynollah)\n\n### 表格分割\n- https:\u002F\u002Fgithub.com\u002Fdoxakis\u002Fform-segmentation\n\n## 手写文字\n- https:\u002F\u002Fgithub.com\u002Farthurflor23\u002Fhandwritten-text-recognition\n- https:\u002F\u002Fgithub.com\u002Fawslabs\u002Fhandwritten-text-recognition-for-apache-mxnet\n- https:\u002F\u002Fgithub.com\u002F0x454447415244\u002FHandwritingRecognitionSystem\n- https:\u002F\u002Fgithub.com\u002FSparshaSaha\u002FHandwritten-Number-Recognition-With-Image-Segmentation\n- https:\u002F\u002Fgithub.com\u002FThomasDelteil\u002FHandwrittenTextRecognition_MXNet\n- [SimpleHTR](https:\u002F\u002Fgithub.com\u002Fgithubharald\u002FSimpleHTR) - 使用TensorFlow实现的手写文本识别系统。\n- [handwriting-ocr](https:\u002F\u002Fgithub.com\u002FBreta01\u002Fhandwriting-ocr) - 用于识别手写文本的OCR软件\n- [AWSLabs：针对Apache MXNet的手写文本识别](https:\u002F\u002Fgithub.com\u002Fawslabs\u002Fhandwritten-text-recognition-for-apache-mxnet)\n- [vloison\u002FHandwritten_Text_Recognition](https:\u002F\u002Fgithub.com\u002Fvloison\u002FHandwritten_Text_Recognition)\n- https:\u002F\u002Fgithub.com\u002Fsushant097\u002FHandwritten-Line-Text-Recognition-using-Deep-Learning-with-Tensorflow\n- https:\u002F\u002Fgithub.com\u002Fqurator-spk\u002Fsbb_textline_detection\n- [Handprint](https:\u002F\u002Fgithub.com\u002Fcaltechlibrary\u002Fhandprint) - 将亚马逊、谷歌和\u002F或微软的HTR服务应用于扫描文档\n\n## 表格检测\n\n- [unitable](https:\u002F\u002Fgithub.com\u002Fpoloclub\u002Funitable) - [论文：2024](https:\u002F\u002Farxiv.org\u002Fabs\u002F2403.04822) UniTable：迈向统一的表格基础模型\n- [TableTransformer](https:\u002F\u002Fgithub.com\u002Fmicrosoft\u002Ftable-transformer)\n- [TableNet](https:\u002F\u002Fgithub.com\u002Fjainammm\u002FTableNet) - ICDAR 2019 论文《TableNet：用于从扫描文档图像中端到端检测表格并提取表格数据的深度学习模型》的非官方实现。\n- [image-table-ocr](https:\u002F\u002Fgithub.com\u002Feihli\u002Fimage-table-ocr)\n- [TreeStructure](https:\u002F\u002Fgithub.com\u002FHazyResearch\u002FTreeStructure) - 表格提取工具\n- [TableTrainNet](https:\u002F\u002Fgithub.com\u002Fmawanda-jun\u002FTableTrainNet) - 使用神经网络进行文档中的表格识别。\n- [table_layout_detection_research](https:\u002F\u002Fgithub.com\u002Fcbgaindia\u002Fparsers\u002Fblob\u002Fmaster\u002Fresearch\u002Flayout_detection_research.md) \n- [TableBank](https:\u002F\u002Fgithub.com\u002Fdoc-analysis\u002FTableBank)\n- [Camelot](https:\u002F\u002Fgithub.com\u002Fatlanhq\u002Fcamelot)\n- [ocr-table](https:\u002F\u002Fgithub.com\u002Fcseas\u002Focr-table) - 使用光学字符识别从扫描图像 PDF 中提取表格。\n- [ExtractTable-py](https:\u002F\u002Fgithub.com\u002FExtractTable\u002FExtractTable-py)\n- [image-table-ocr](https:\u002F\u002Fgithub.com\u002Feihli\u002Fimage-table-ocr)\n\n## 语言检测\n\n- [lingua](https:\u002F\u002Fgithub.com\u002Fpemistahl\u002Flingua) - 最准确的适用于 Java 及其他 JVM 语言的自然语言检测库，适合长文本和短文本。\n- [langdetect](https:\u002F\u002Fpypi.org\u002Fproject\u002Flangdetect\u002F)\n- [whatthelang](https:\u002F\u002Fgithub.com\u002Findix\u002Fwhatthelang) - 极速语言预测火箭\n- [wiki-lang-detect](https:\u002F\u002Fgithub.com\u002Fvseloved\u002Fwiki-lang-detect)\n\n## 文本检测与定位\n\n- [DB](https:\u002F\u002Fgithub.com\u002FMhLiao\u002FDB)\n- [DeepReg](https:\u002F\u002Fgithub.com\u002FDeepRegNet\u002FDeepReg)\n- [CornerText](https:\u002F\u002Fgithub.com\u002Flvpengyuan\u002Fcorner) - [论文：2018年](https:\u002F\u002Farxiv.org\u002Fabs\u002F1802.08948) - 基于角点定位与区域分割的多方向场景文本检测\n- [RRPN](https:\u002F\u002Fgithub.com\u002Fmjq11302010044\u002FRRPN) - ([论文：2018年](https:\u002F\u002Farxiv.org\u002Fabs\u002F1703.01086)) - 基于旋转提议的任意方向场景文本检测\n- [MASTER-TF](https:\u002F\u002Fgithub.com\u002Fjiangxiluning\u002FMASTER-TF) - ([论文：2021年](https:\u002F\u002Farxiv.org\u002Fabs\u002F1910.02562)) - “MASTER：用于场景文本识别的多视角非局部网络”的TensorFlow重实现（模式识别，2021年）。\n- [MaskTextSpotterV3](https:\u002F\u002Fgithub.com\u002FMhLiao\u002FMaskTextSpotterV3) - ([论文：2020年](https:\u002F\u002Farxiv.org\u002Fabs\u002F2007.09482)) - Mask TextSpotter v3是一个端到端可训练的场景文本检测器，采用分割提议网络（SPN）代替RPN。\n- [TextFuseNet](https:\u002F\u002Fgithub.com\u002Fying09\u002FTextFuseNet) - ([论文：2020年](https:\u002F\u002Fwww.ijcai.org\u002FProceedings\u002F2020\u002F72)) “TextFuseNet：具有更丰富融合特征的场景文本检测”的PyTorch实现。\n- [SATRN](https:\u002F\u002Fgithub.com\u002Fclovaai\u002FSATRN)- ([论文：2020年](https:\u002F\u002Fopenaccess.thecvf.com\u002Fcontent_CVPRW_2020\u002Fpapers\u002Fw34\u002FLee_On_Recognizing_Texts_of_Arbitrary_Shapes_With_2D_Self-Attention_CVPRW_2020_paper.pdf)) - 自注意力文本识别网络（SATRN）的官方TensorFlow实现（CVPR Workshop WTDDLE 2020）。\n- [cvpr20-scatter-text-recognizer](https:\u002F\u002Fgithub.com\u002Fphantrdat\u002Fcvpr20-scatter-text-recognizer) - ([论文：2020年](https:\u002F\u002Fopenaccess.thecvf.com\u002Fcontent_CVPR_2020\u002Fpapers\u002FLitman_SCATTER_Selective_Context_Attentional_Scene_Text_Recognizer_CVPR_2020_paper.pdf)) - CVPR 2020论文“SCATTER：选择性上下文注意力场景文本识别器”的非官方实现。\n- [seed](https:\u002F\u002Fgithub.com\u002FPay20Y\u002FSEED) - ([论文：2020年](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2005.10977.pdf)) - 这是论文“SEED：用于场景文本识别的语义增强编码器-解码器框架”的实现。\n- [vedastr](https:\u002F\u002Fgithub.com\u002FMedia-Smart\u002Fvedastr) - 基于PyTorch的场景文本识别工具箱\n- [AutoSTR](https:\u002F\u002Fgithub.com\u002FAutoML-4Paradigm\u002FAutoSTR) - ([论文：2020年](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2003.06567.pdf)) 场景文本识别中的高效骨干网络搜索\n- [Decoupled-attention-network](https:\u002F\u002Fgithub.com\u002FWang-Tianwei\u002FDecoupled-attention-network) - ([论文：2019年](https:\u002F\u002Farxiv.org\u002Fabs\u002F1912.10205)) “用于文本识别的解耦注意力网络”的PyTorch实现。\n- [Bi-STET](https:\u002F\u002Fgithub.com\u002FMauritsBleeker\u002FBi-STET) - ([论文：2020年](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1912.03656.pdf)) 单一解码器的双向场景文本识别实现\n- [kiss](https:\u002F\u002Fgithub.com\u002FBartzi\u002Fkiss) - ([论文：2019年](https:\u002F\u002Farxiv.org\u002Fabs\u002F1911.08400))\n- [Deformable Text Recognition](https:\u002F\u002Fgithub.com\u002FAlpaca07\u002Fdtr) - ([论文：2019年](https:\u002F\u002Fieeexplore.ieee.org\u002Fabstract\u002Fdocument\u002F9064428))\n- [MaskTextSpotter](https:\u002F\u002Fgithub.com\u002FMhLiao\u002FMaskTextSpotter) - ([论文：2019年](https:\u002F\u002Fieeexplore.ieee.org\u002Fdocument\u002F8812908))\n- [CUTIE](https:\u002F\u002Fgithub.com\u002Fvsymbol\u002FCUTIE) - ([论文：2019年](https:\u002F\u002Farxiv.org\u002Fabs\u002F1903.12363v4))\n- [AttentionOCR](https:\u002F\u002Fgithub.com\u002Fzhang0jhon\u002FAttentionOCR) - ([论文：2019年](https:\u002F\u002Farxiv.org\u002Fabs\u002F1912.04561))\n- [crpn](https:\u002F\u002Fgithub.com\u002Fxhzdeng\u002Fcrpn) - ([论文：2019年](https:\u002F\u002Farxiv.org\u002Fabs\u002F1804.02690))\n- [Scene-Text-Detection-with-SPECNET](https:\u002F\u002Fgithub.com\u002FAirBernard\u002FScene-Text-Detection-with-SPCNET) - 使用TensorFlow的监督金字塔上下文网络进行场景文本检测的仓库。\n- [Character-Region-Awareness-for-Text-Detection](https:\u002F\u002Fgithub.com\u002FguruL\u002FCharacter-Region-Awareness-for-Text-Detection-)\n- [Real-time-Scene-Text-Detection-and-Recognition-System](https:\u002F\u002Fgithub.com\u002Ffnzhan\u002FReal-time-Scene-Text-Detection-and-Recognition-System) - 实时场景文本检测与识别的端到端流程。\n- [ocr_attention](https:\u002F\u002Fgithub.com\u002Fmarvis\u002Focr_attention) - 具有自动校正功能的鲁棒场景文本识别。\n- [masktextspotter.caffee2](https:\u002F\u002Fgithub.com\u002Flvpengyuan\u002Fmasktextspotter.caffe2) - “Mask TextSpotter：一种可端到端训练的用于检测任意形状文本的神经网络”的代码。\n- [InceptText-Tensorflow](https:\u002F\u002Fgithub.com\u002Fxieyufei1993\u002FInceptText-Tensorflow) - IncepText论文中算法的实现：一种带有可变形PSROI池化的新型Inception-Text模块，用于多方向场景文本检测。\n- [textspotter](https:\u002F\u002Fgithub.com\u002Ftonghe90\u002Ftextspotter) - 具有显式对齐与注意力机制的端到端文本检测器\n- [RRD](https:\u002F\u002Fgithub.com\u002FMhLiao\u002FRRD) - RRD：面向旋转的回归模型，用于定向场景文本检测。\n- [crpn](https:\u002F\u002Fgithub.com\u002Fxhzdeng\u002Fcrpn) - 基于角点的区域提议网络。\n- [SSTDNet](https:\u002F\u002Fgithub.com\u002FHotaekHan\u002FSSTDNet) - 实现“单次射击带区域注意力的文本检测器，ICCV 2017亮点”。\n- [R2CNN](https:\u002F\u002Fgithub.com\u002Fbeacandler\u002FR2CNN) - R2CNN的Caffe重实现：旋转区域CNN，用于方向鲁棒的场景文本检测。\n- [RRPN](https:\u002F\u002Fgithub.com\u002Fmjq11302010044\u002FRRPN) - RRPN源代码 ---- 基于旋转提议的任意方向场景文本检测\n- [Tensorflow_SceneText_Oriented_Box_Predictor](https:\u002F\u002Fgithub.com\u002Fdafanghe\u002FTensorflow_SceneText_Oriented_Box_Predictor) - 该项目修改了TensorFlow目标检测API代码，以预测定向边界框。可用于场景文本检测。\n- [DeepSceneTextReader](https:\u002F\u002Fgithub.com\u002Fdafanghe\u002FDeepSceneTextReader) - 这是一个使用TensorFlow部署深度场景文本阅读流水线的C++项目。它从自然场景图像中读取文本。使用冻结的TensorFlow图。检测器检测场景文本位置，识别器从每个检测到的边界框中读取单词。\n- [DeRPN](https:\u002F\u002Fgithub.com\u002FHCIILAB\u002FDeRPN) - 一种用于更通用目标检测（包括场景文本检测）的新颖区域提议网络。\n- [Bartzi\u002Fsee](https:\u002F\u002Fgithub.com\u002FBartzi\u002Fsee) - SEE：迈向半监督端到端场景文本识别\n- [Bartzi\u002Fstn-ocr](https:\u002F\u002Fgithub.com\u002FBartzi\u002Fstn-ocr) - 论文STN-OCR的代码：一个用于文本检测和文本识别的单一神经网络\n- [beacandler\u002FR2CNN](https:\u002F\u002Fgithub.com\u002Fbeacandler\u002FR2CNN) - R2CNN的Caffe重实现：旋转区域CNN，用于方向鲁棒的场景文本检测\n- [HsiehYiChia\u002FScene-text-recognition](https:\u002F\u002Fgithub.com\u002FHsiehYiChia\u002FScene-text-recognition) - 基于极端区域（ER）的场景文本检测与识别\n- [R2CNN_Faster-RCNN_Tensorflow](https:\u002F\u002Fgithub.com\u002FDetectionTeamUCAS\u002FR2CNN_Faster-RCNN_Tensorflow) - 基于Faster-RCNN的旋转区域检测。\n- [corner](https:\u002F\u002Fgithub.com\u002Flvpengyuan\u002Fcorner) - 基于角点定位与区域分割的多方向场景文本检测\n- [Corner_Segmentation_TextDetection](https:\u002F\u002Fgithub.com\u002FJK-Rao\u002FCorner_Segmentation_TextDetection) - 基于角点定位与区域分割的多方向场景文本检测。\n- [TextSnake.pytorch](https:\u002F\u002Fgithub.com\u002Fprincewang1994\u002FTextSnake.pytorch) - ECCV2018论文“TextSnake：用于检测任意形状文本的灵活表示”的PyTorch实现\n- [AON](https:\u002F\u002Fgithub.com\u002Fhuizhang0110\u002FAON) - CVPR 2018文本识别论文的TensorFlow实现：“AON：迈向任意方向文本识别”\n- [pixel_link](https:\u002F\u002Fgithub.com\u002FZJULearning\u002Fpixel_link) - 我们在AAAI2018发表的论文“PixelLink：通过实例分割检测场景文本”的实现\n- [seglink](https:\u002F\u002Fgithub.com\u002Fdengdan\u002Fseglink) - 论文中“通过连接片段检测自然图像中的定向文本”算法的实现（=> pixel_link）\n- [SSTD](https:\u002F\u002Fgithub.com\u002FBestSonny\u002FSSTD) - 单次射击带区域注意力的文本检测器\n- [MORAN_v2](https:\u002F\u002Fgithub.com\u002FCanjie-Luo\u002FMORAN_v2) - MORAN：用于场景文本识别的多目标校正注意力网络\n- [Curve-Text-Detector](https:\u002F\u002Fgithub.com\u002FYuliang-Liu\u002FCurve-Text-Detector) - 该仓库提供训练和测试代码、数据集、检测与识别标注、评估脚本、标注工具以及排名表。\n- [HCIILAB\u002FDeRPN](https:\u002F\u002Fgithub.com\u002FHCIILAB\u002FDeRPN) - 一种用于更通用目标检测（包括场景文本检测）的新颖区域提议网络。\n- [TextField](https:\u002F\u002Fgithub.com\u002FYukangWang\u002FTextField) - TextField：学习用于不规则场景文本检测的深度方向场（TIP 2019）\n- [tensorflow-TextMountain](https:\u002F\u002Fgithub.com\u002Fliny23\u002Ftensorflow-TextMountain) - TextMountain：通过实例分割实现精确的场景文本检测\n- [Bartzi\u002Fsee](https:\u002F\u002Fgithub.com\u002FBartzi\u002Fsee) - AAAI 2018出版物“SEE：迈向半监督端到端场景文本识别”的代码\n- [bgshih\u002Faster](https:\u002F\u002Fgithub.com\u002Fbgshih\u002Faster) - 识别自然图像中裁剪后的文本。\n- [ReceiptParser](https:\u002F\u002Fgithub.com\u002FReceiptManager\u002Freceipt-parser) - 用Python编写的模糊收据解析器。\n- [vedastr](https:\u002F\u002Fgithub.com\u002FMedia-Smart\u002Fvedastr)\n\n### ABCNet [论文：2020年](https:\u002F\u002Farxiv.org\u002Fpdf\u002F2002.10200.pdf)\n- [AdelaiDet](https:\u002F\u002Fgithub.com\u002Faim-uofa\u002FAdelaiDet)\n- https:\u002F\u002Fgithub.com\u002FYuliang-Liu\u002Fbezier_curve_text_spotting\n- https:\u002F\u002Fgithub.com\u002Fquangvy2703\u002FABCNet-ESRGAN-SRTEXT\n- https:\u002F\u002Fgithub.com\u002FPxtri2156\u002FAdelaiDet_v2\n- https:\u002F\u002Fgithub.com\u002FzhubinQAQ\u002FIns\n\n### CRAFT [论文：2019年](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1904.01941.pdf)\n- [CRAFT-pytorch（官方）](https:\u002F\u002Fgithub.com\u002Fclovaai\u002FCRAFT-pytorch) - CRAFT文本检测器的PyTorch实现。\n- [autonise\u002FCRAFT-Remade](https:\u002F\u002Fgithub.com\u002Fautonise\u002FCRAFT-Remade)\n- [s3nh\u002Fpytorch-text-recognition](https:\u002F\u002Fgithub.com\u002Fs3nh\u002Fpytorch-text-recognition) \n- [backtime92\u002FCRAFT-Reimplementation](https:\u002F\u002Fgithub.com\u002Fbacktime92\u002FCRAFT-Reimplementation)\n- [fcakyon\u002Fcraft-text-detector](https:\u002F\u002Fgithub.com\u002Ffcakyon\u002Fcraft-text-detector) - CRAFT的PyTorch实现\n- [YongWookHa\u002Fcraft-text-detector](https:\u002F\u002Fgithub.com\u002FYongWookHa\u002Fcraft-text-detector)\n- [faustomorales\u002Fkeras-ocr](https:\u002F\u002Fgithub.com\u002Ffaustomorales\u002Fkeras-ocr) - 一个封装且灵活的CRAFT文本检测器和Keras CRNN识别模型版本。\n- [fcakyon\u002Fcraft-text-detector](https:\u002F\u002Fgithub.com\u002Ffcakyon\u002Fcraft-text-detector)\n\n### FOTS [论文：2018年](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1801.01671.pdf)\n- [FOTS](https:\u002F\u002Fgithub.com\u002Fxieyufei1993\u002FFOTS) - FOTS：统一网络下的快速方向文本定位的实现。\n- [FOTS_OCR](https:\u002F\u002Fgithub.com\u002FMasao-Taketani\u002FFOTS_OCR) \n\n### TextBoxes++ [论文：2018年](https:\u002F\u002Farxiv.org\u002Fabs\u002F1801.02765)\n- [TextBoxes_plusplus（官方）](https:\u002F\u002Fgithub.com\u002FMhLiao\u002FTextBoxes_plusplus) TextBoxes++：单次检测的方向场景文本检测器\n- [Shun14\u002FTextBoxes_plusplus_Tensorflo](https:\u002F\u002Fgithub.com\u002FShun14\u002FTextBoxes_plusplus_Tensorflow) - 使用TensorFlow（Python）实现的Textboxes_plusplus\n\n### PSENet [论文：2018年](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1806.02559.pdf)\n- [tensorflow_PSENet](https:\u002F\u002Fgithub.com\u002Fliuheng92\u002Ftensorflow_PSENet) - 这是PSENet：基于渐进尺度扩展网络的形状鲁棒文本检测的TensorFlow重实现。\n- [PAN-PSEnet](https:\u002F\u002Fgithub.com\u002Frahzaazhar\u002FPAN-PSEnet)\n- [PSENet](https:\u002F\u002Fgithub.com\u002Fwhai362\u002FPSENet) - 基于渐进尺度扩展网络的形状鲁棒文本检测。\n\n### EAST [论文：2017年](https:\u002F\u002Farxiv.org\u002Fabs\u002F1704.03155)\n- [EAST](https:\u002F\u002Fgithub.com\u002Fargman\u002FEAST)（官方） - （tf1\u002Fpy2）EAST文本检测器的TensorFlow实现\n- [AdvancedEAST](https:\u002F\u002Fgithub.com\u002Fhuoyijie\u002FAdvancedEAST) - （tf1\u002Fpy2）AdvancedEAST是一种用于场景图像文本检测的算法，主要基于EAST，并进行了显著改进，使长文本预测更加准确。\n- [kurapan\u002FEAST](https:\u002F\u002Fgithub.com\u002Fkurapan\u002FEAST) 在Keras中实现的EAST场景文本检测器\n- [songdejia\u002FEAST](https:\u002F\u002Fgithub.com\u002Fsongdejia\u002FEAST) - 这是EAST：一种高效准确的场景文本检测器的PyTorch重实现。\n- [HaozhengLi\u002FEAST_ICPR](https:\u002F\u002Fgithub.com\u002FHaozhengLi\u002FEAST_ICPR) - 从argman\u002FEAST分叉而来，用于ICPR MTWI 2018挑战赛\n- [deepthinking-qichao\u002FEAST_ICPR2018](https:\u002F\u002Fgithub.com\u002Fdeepthinking-qichao\u002FEAST_ICPR2018)\n- [SakuraRiven\u002FEAST](https:\u002F\u002Fgithub.com\u002FSakuraRiven\u002FEAST)\n- [EAST-Detector-for-text-detection-using-OpenCV](https:\u002F\u002Fgithub.com\u002FZER-0-NE\u002FEAST-Detector-for-text-detection-using-OpenCV) - 使用OpenCV进行图像文本检测\n- [easy-EAST](https:\u002F\u002Fgithub.com\u002Fche220\u002Feasy-EAST)\n\n### TextBoxes [论文：2016年](https:\u002F\u002Farxiv.org\u002Fabs\u002F1611.06779)\n- [TextBoxes（官方）](https:\u002F\u002Fgithub.com\u002FMhLiao\u002FTextBoxes)\n- [TextBoxes-TensorFlow](https:\u002F\u002Fgithub.com\u002Fgxd1994\u002FTextBoxes-TensorFlow) - 使用TensorFlow重新实现的TextBoxes\n- [zj463261929\u002FTextBoxes](https:\u002F\u002Fgithub.com\u002Fzj463261929\u002FTextBoxes) - TextBoxes：使用单个深度神经网络的快速文本检测器\n- [shinjayne\u002Ftextboxes](https:\u002F\u002Fgithub.com\u002Fshinjayne\u002Ftextboxes) - 使用TensorFlow（Python）实现的Textboxes\n- [shinTB](https:\u002F\u002Fgithub.com\u002Fshinjayne\u002FshinTB) - Textboxes：图像文本检测模型：Python包（TensorFlow）\n\n### CTPN [论文：2016年](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1609.03605.pdf)\n- [text-detection-ctpn](https:\u002F\u002Fgithub.com\u002Feragonruan\u002Ftext-detection-ctpn)\n- [yizt\u002Fkeras-ctpn](https:\u002F\u002Fgithub.com\u002Fyizt\u002Fkeras-ctpn)\n- [tianzhi0549\u002FCTPN](https:\u002F\u002Fgithub.com\u002Ftianzhi0549\u002FCTPN) - 使用连接主义文本提案网络检测自然图像中的文本\n\n## 视频文本定位\n- [VideoTextSCM](https:\u002F\u002Fgithub.com\u002Flsabrinax\u002FVideoTextSCM)\n- [TransDETR](https:\u002F\u002Fgithub.com\u002Fweijiawu\u002FTransDETR)\n- [YORO](https:\u002F\u002Fgithub.com\u002Fhikopensource\u002FDAVAR-Lab-OCR\u002Ftree\u002Fmain\u002Fdemo\u002Fvideotext\u002Fyoro) ([论文：2021年](https:\u002F\u002Farxiv.org\u002Fpdf\u002F1903.03299.pdf))\n\n## 字体检测\n\n- [typefont](https:\u002F\u002Fgithub.com\u002FVasile-Peste\u002FTypefont) - 首个开源库，能够检测图像中文本的字体。\n\n## 光学字符识别引擎与框架\n\n- [texify](https:\u002F\u002Fgithub.com\u002FVikParuchuri\u002Ftexify) - 用于数学公式的OCR模型，输出LaTeX和Markdown格式。\n- [DAVAR-lab-OCR](https:\u002F\u002Fgithub.com\u002Fhikopensource\u002Fdavar-lab-ocr)\n- [CRNN.tf2](https:\u002F\u002Fgithub.com\u002FFLming\u002FCRNN.tf2)\n- [ocr.pytorch](https:\u002F\u002Fgithub.com\u002Fcourao\u002Focr.pytorch)\n- [PytorchOCR](https:\u002F\u002Fgithub.com\u002FWenmuZhou\u002FPytorchOCR)\n- [MMOCR](https:\u002F\u002Fgithub.com\u002Fopen-mmlab\u002Fmmocr)\n- [doctr](https:\u002F\u002Fgithub.com\u002Fmindee\u002Fdoctr)\n- [Master OCR](https:\u002F\u002Fgithub.com\u002Fjiangxiluning\u002FMASTER-TF)\n- [xiaofengShi\u002FCHINESE-OCR](https:\u002F\u002Fgithub.com\u002FxiaofengShi\u002FCHINESE-OCR)\n- [PaddleOCR](https:\u002F\u002Fgithub.com\u002FPaddlePaddle\u002FPaddleOCR)\n- [Urdu-Ocr](https:\u002F\u002Fgithub.com\u002FHassamChundrigar\u002FUrdu-Ocr)\n- [ocr.pytorch](https:\u002F\u002Fgithub.com\u002Fcourao\u002Focr.pytorch)\n- [ocular](https:\u002F\u002Fgithub.com\u002Fndnlp\u002Focular) - Ocular是一个最先进的历史文档OCR系统。\n- [OCR++](https:\u002F\u002Fgithub.com\u002Fmayank4490\u002FOCR-plus-plus)\n- [pytextrator](https:\u002F\u002Fgithub.com\u002Fdanwald\u002Fpytextractor) - 使用Tesseract结合OpenCV的EAST检测器的Python OCR工具。\n- [OCR-D](https:\u002F\u002Focr-d.github.io\u002F)\n- [ocrd_tesserocr](https:\u002F\u002Fgithub.com\u002FOCR-D\u002Focrd_tesserocr)\n- [Deeplearning-OCR](https:\u002F\u002Fgithub.com\u002Fvinayakkailas\u002FDeeplearning-OCR)\n- [PICCL](https:\u002F\u002Fgithub.com\u002FLanguageMachines\u002FPICCL)\n- [cnn_lstm_ctc_ocr](https:\u002F\u002Fgithub.com\u002Fweinman\u002Fcnn_lstm_ctc_ocr) - 基于TensorFlow的CNN+LSTM模型，使用CTC损失进行训练的OCR。\n- [PassportScanner](https:\u002F\u002Fgithub.com\u002Fevermeer\u002FPassportScanner) - 扫描护照上的MRZ码，提取姓名、姓氏、护照号码、国籍、出生日期、有效期和个人编号。\n- [pannous\u002Ftensorflow-ocr](https:\u002F\u002Fgithub.com\u002Fpannous\u002Ftensorflow-ocr) - 使用带有注意力机制的TensorFlow实现的OCR。\n- [BowieHsu\u002Ftensorflow_ocr](https:\u002F\u002Fgithub.com\u002FBowieHsu\u002Ftensorflow_ocr) - 使用TensorFlow v1.4实现的OCR检测。\n- [GRCNN-for-OCR](https:\u002F\u002Fgithub.com\u002FJianfeng1991\u002FGRCNN-for-OCR) - 这是论文“用于OCR的门控循环卷积神经网络”的实现。\n- [go-ocr](https:\u002F\u002Fgithub.com\u002Fmaxim2266\u002Fgo-ocr) - 一个从扫描文档中提取文本的工具（通过OCR），并支持用户自定义后处理。\n- [insightocr](https:\u002F\u002Fgithub.com\u002Fdeepinsight\u002Finsightocr) - MXNet实现的OCR，包括文本识别和检测。\n- [ocr_densenet](https:\u002F\u002Fgithub.com\u002Fyinchangchang\u002Focr_densenet) - 第一届西安交通大学人工智能实践大赛（2018AI实践大赛 - 图片文字识别）一等奖；仅使用DenseNet识别汉字。\n- [CNN_LSTM_CTC_Tensorflow](https:\u002F\u002Fgithub.com\u002Fwatsonyanghx\u002FCNN_LSTM_CTC_Tensorflow) - 基于CNN+LSTM+CTC的OCR，使用TensorFlow实现。\n- [tmbdev\u002Fclstm](https:\u002F\u002Fgithub.com\u002Ftmbdev\u002Fclstm) - 一个小型的C++ LSTM网络实现，专注于OCR。\n- [VistaOCR](https:\u002F\u002Fgithub.com\u002Fisi-vista\u002FVistaOCR)\n- [tesseract.js](https:\u002F\u002Fgithub.com\u002Fnaptha\u002Ftesseract.js)\n- [Tesseract](https:\u002F\u002Fgithub.com\u002Ftesseract-ocr\u002Ftesseract)\n- [kaldi](https:\u002F\u002Fgithub.com\u002Fkaldi-asr\u002Fkaldi)\n- [ocropus3](https:\u002F\u002Fgithub.com\u002FNVlabs\u002Focropus3) - 收集所有子模块的新基于PyTorch的OCR系统的仓库。\n- [calamari](https:\u002F\u002Fgithub.com\u002FCalamari-OCR\u002Fcalamari)\n- [ocropy](https:\u002F\u002Fgithub.com\u002Ftmbdev\u002Focropy) - 基于Python的文档分析和OCR工具。\n- [chinese_ocr](https:\u002F\u002Fgithub.com\u002FYCG09\u002Fchinese_ocr)\n- [deep_ocr](https:\u002F\u002Fgithub.com\u002FJinpengLI\u002Fdeep_ocr) - 制作比Tesseract更好的中文字符识别OCR。\n- [ocular](https:\u002F\u002Fgithub.com\u002Ftberg12\u002Focular)\n- [textDetectionWithScriptID](https:\u002F\u002Fgithub.com\u002Fisi-vista\u002FtextDetectionWithScriptID)\n- [transcribus](https:\u002F\u002Ftranskribus.eu\u002FTranskribus\u002F)\n- [FastText](https:\u002F\u002Ffasttext.cc\u002F) - 用于高效文本分类和表示学习的库。\n- [GOCR](http:\u002F\u002Fwww-e.uni-magdeburg.de\u002Fjschulen\u002Focr\u002F)\n- [Ocrad](https:\u002F\u002Fwww.gnu.org\u002Fsoftware\u002Focrad\u002F)\n- [franc](https:\u002F\u002Fgithub.com\u002Fwooorm\u002Ffranc) - 自然语言检测。\n- [ocrfeeder](https:\u002F\u002Fgithub.com\u002FGNOME\u002Focrfeeder)\n- [emedvedev\u002Fattention-ocr](https:\u002F\u002Fgithub.com\u002Femedvedev\u002Fattention-ocr) - 一个基于TensorFlow的文本识别模型（CNN + seq2seq结合视觉注意力），以Python包形式提供，并兼容Google Cloud ML Engine。\n- [da03\u002Fattention-ocr](https:\u002F\u002Fgithub.com\u002Fda03\u002FAttention-OCR) - 基于视觉注意力的OCR。\n- [dhlab-epfl\u002FdhSegment](https:\u002F\u002Fgithub.com\u002Fdhlab-epfl\u002FdhSegment) - 用于历史文档处理的通用框架。\n- https:\u002F\u002Fgithub.com\u002Fmawanda-jun\u002FTableTrainNet\n- https:\u002F\u002Fgithub.com\u002Fkermitt2\u002Fdelft\n- https:\u002F\u002Fgithub.com\u002Fchulwoopack\u002Fdocstrum\n- [grobid](https:\u002F\u002Fgithub.com\u002Fkermitt2\u002Fgrobid) - 一种用于从学术文献中提取信息的机器学习软件。\n- [lapdftext](http:\u002F\u002Fbmkeg.github.io\u002Flapdftext\u002F) - LA-PDFText是一个从基于PDF的研究论文中提取精确文本的系统。\n- https:\u002F\u002Fgithub.com\u002Fberatkurar\u002Ftextline-segmentation-using-fcn\n- https:\u002F\u002Fgithub.com\u002FOCR4all\n- https:\u002F\u002Fgithub.com\u002FOCR4all\u002FLAREX\n- https:\u002F\u002Fgithub.com\u002FOCR4all\u002FOCR4all\n- https:\u002F\u002Fgithub.com\u002Fandbue\u002Fnashi\n- http:\u002F\u002Fkraken.re\u002F\n- [kraken](https:\u002F\u002Fgithub.com\u002Fmittagessen\u002Fkraken)\n- [gosseract](https:\u002F\u002Fgithub.com\u002Fotiai10\u002Fgosseract) - 使用Tesseract C++库的Go语言OCR包。\n- [EasyOCR](https:\u002F\u002Fgithub.com\u002FJaidedAI\u002FEasyOCR) - 即用型OCR，支持40多种语言，包括中文、日语、韩语和泰语。\n- [invoice-scanner-react-native](https:\u002F\u002Fgithub.com\u002Fburhanuday\u002Finvoice-scanner-react-native)\n- [Arabic-OCR](https:\u002F\u002Fgithub.com\u002FHusseinYoussef\u002FArabic-OCR)\n\n## 强大的列表\n- https:\u002F\u002Fgithub.com\u002Fwhitelok\u002Fimage-text-localization-recognition\n- [Awesome-Scene-Text-Recognition](https:\u002F\u002Fgithub.com\u002Fchongyangtao\u002FAwesome-Scene-Text-Recognition) - \n一个专门用于场景文本定位和识别的精选资源列表。\n- [awesome-deep-text-detection-recognition](https:\u002F\u002Fgithub.com\u002Fhwalsuklee\u002Fawesome-deep-text-detection-recognition)\n- https:\u002F\u002Fgithub.com\u002Fkurapan\u002Fawesome-scene-text\n- [kba\u002Fawesome-ocr](https:\u002F\u002Fgithub.com\u002Fkba\u002Fawesome-ocr)\n- [perfectspr\u002Fawesome-ocr](https:\u002F\u002Fgithub.com\u002Fperfectspr\u002Fawesome-ocr)\n- https:\u002F\u002Fgithub.com\u002FZumingHuang\u002Fawesome-ocr-resources\n- https:\u002F\u002Fgithub.com\u002Fchongyangtao\u002FAwesome-Scene-Text-Recognition\n- https:\u002F\u002Fgithub.com\u002Fwhitelok\u002Fimage-text-localization-recognition\n- https:\u002F\u002Fgithub.com\u002Fhwalsuklee\u002Fawesome-deep-text-detection-recognition\n- https:\u002F\u002Fgithub.com\u002Fwanghaisheng\u002Fawesome-ocr\n- https:\u002F\u002Fgithub.com\u002FJyouhou\u002FSceneTextPapers\n- https:\u002F\u002Fgithub.com\u002Fjyhengcoder\u002FmyOCR\n- https:\u002F\u002Fgithub.com\u002Fhwalsuklee\u002Fawesome-deep-text-detection-recognition\n- https:\u002F\u002Fgithub.com\u002Ftangzhenyu\u002FScene-Text-Understanding\n- https:\u002F\u002Fgithub.com\u002Fwhitelok\u002Fimage-text-localization-recognition\n- https:\u002F\u002Fgithub.com\u002Fkba\u002Fawesome-ocr\n- https:\u002F\u002Fgithub.com\u002Fsoumendra\u002Fawesome-ocr\n- [chongyangtao\u002FAwesome-Scene-Text-Recognition](https:\u002F\u002Fgithub.com\u002Fchongyangtao\u002FAwesome-Scene-Text-Recognition) - 论文和数据集\n\n## 专有 OCR 引擎\n- [ABBYY](https:\u002F\u002Fwww.abbyy.com\u002Fen-us\u002F)\n- [Omnipage](https:\u002F\u002Fwww.nuance.com\u002Fprint-capture-and-pdf-solutions.html)\n- [Clova.ai](https:\u002F\u002Fdemo.ocr.clova.ai\u002F)\n- [Konfuzio](https:\u002F\u002Fkonfuzio.com\u002Fen\u002F)\n\n## 基于云的 OCR 引擎（SaaS）\n- [thehive.ai](https:\u002F\u002Fthehive.ai\u002Fhive-ocr-solutions)\n- [impira](https:\u002F\u002Fwww.impira.com\u002Ftry\u002Fsmarter-ocr)\n- [AWS Textract](https:\u002F\u002Faws.amazon.com\u002Ftextract\u002F)\n- [Nanonets](https:\u002F\u002Fnanonets.com\u002Focr-api\u002F)\n- [docparser](https:\u002F\u002Fdocparser.com\u002F)\n- [ocrolus](https:\u002F\u002Fwww.ocrolus.com\u002F)\n- [Butler Labs](https:\u002F\u002Fwww.butlerlabs.ai\u002F)\n\n## 文件格式与工具\n- [nw-page-editor](https:\u002F\u002Fgithub.com\u002Fmauvilsa\u002Fnw-page-editor) - 用于可视化编辑 Page XML 文件的简单应用\n- [hocr](http:\u002F\u002Fkba.cloud\u002Fhocr-spec\u002F1.2\u002F)\n- [alto](https:\u002F\u002Fgithub.com\u002Faltoxml)\n- [PageXML](https:\u002F\u002Fgithub.com\u002FPRImA-Research-Lab\u002FPAGE-XML)\n- [ocr-fileformat](https:\u002F\u002Fgithub.com\u002FUB-Mannheim\u002Focr-fileformat) - 用于验证和转换各种 OCR 文件格式\n- [hocr-tools](https:\u002F\u002Fgithub.com\u002Ftmbdev\u002Fhocr-tools) - 用于操作和评估 hOCR 格式，该格式通过嵌入 HTML 来表示多语言 OCR 结果。\n\n## 数据集\n- http:\u002F\u002Fwww.iapr-tc11.org\u002Fmediawiki\u002Findex.php\u002FDatasets_List\n- https:\u002F\u002Ficdar2019.org\u002Fcompetitions-2\u002F\n- https:\u002F\u002Frrc.cvc.uab.es\u002F#\n- https:\u002F\u002Flionbridge.ai\u002Fdatasets\u002F15-best-ocr-handwriting-datasets\u002F\n- https:\u002F\u002Fgithub.com\u002Fxylcbd\u002Focr-open-dataset\n- ICDAR 数据集\n- https:\u002F\u002Fgithub.com\u002FOpenArabic\u002FOCR_GS_Data\n- https:\u002F\u002Fgithub.com\u002Fcs-chan\u002FTotal-Text-Dataset\n- [scenetext](http:\u002F\u002Fwww.robots.ox.ac.uk\u002F~vgg\u002Fdata\u002Fscenetext\u002F) - 这是一个合成数据集，其中单词实例被放置在自然场景图像中，并考虑了场景布局。\n- [Total-Text-Dataset](https:\u002F\u002Fgithub.com\u002Fcs-chan\u002FTotal-Text-Dataset)\n- [ocr-open-dataset](https:\u002F\u002Fgithub.com\u002Fxylcbd\u002Focr-open-dataset)\n\n## 数据增强与合成数据生成\n- [DocCreator](http:\u002F\u002Fdoc-creator.labri.fr\u002F) - DIAR 软件，用于生成合成文档图像及真实标签，提供多种退化模型以进行数据增强。\n- [Scene-Text-Image-Transformer](https:\u002F\u002Fgithub.com\u002FCanjie-Luo\u002FScene-Text-Image-Transformer) - 场景文本图像变换器\n- [Belval\u002FTextRecognitionDataGenerator](https:\u002F\u002Fgithub.com\u002FBelval\u002FTextRecognitionDataGenerator) - 文本识别用的合成数据生成器\n- [Sanster\u002Ftext_renderer](https:\u002F\u002Fgithub.com\u002FSanster\u002Ftext_renderer)\n- [awesome-SynthText](https:\u002F\u002Fgithub.com\u002FTianzhongSong\u002Fawesome-SynthText)\n- [Text-Image-Augmentation](https:\u002F\u002Fgithub.com\u002FCanjie-Luo\u002FText-Image-Augmentation)\n- [UnrealText](https:\u002F\u002Fgithub.com\u002FJyouhou\u002FUnrealText)\n- [SynthText_Chinese_version](https:\u002F\u002Fgithub.com\u002FJarveeLee\u002FSynthText_Chinese_version)\n\n## OCR 预处理\n- [ajgalleo\u002Fdocument-image-binarization](https:\u002F\u002Fgithub.com\u002Fajgallego\u002Fdocument-image-binarization)\n- [PRLib](https:\u002F\u002Fgithub.com\u002Fleha-bot\u002FPRLib) - 预识别库，包含用于提升 OCR 质量的算法。\n- [sbb_binarization](https:\u002F\u002Fgithub.com\u002Fqurator-spk\u002Fsbb_binarization) -\n\n## OCR 后期校正\n- [KBNLresearch\u002Fochre](https:\u002F\u002Fgithub.com\u002FKBNLresearch\u002Fochre) - OCR 后期校正工具箱\n- [cisocrgroup\u002FPoCoTo](https:\u002F\u002Fgithub.com\u002Fcisocrgroup\u002FPoCoTo) - CIS OCR 后期校正工具\n- [afterscan](http:\u002F\u002Fwww.afterscan.com\u002F)\n\n## 基准测试\n- [TedEval](https:\u002F\u002Fgithub.com\u002Fclovaai\u002FTedEval)\n- [clovaai\u002Fdeep-text-recognition-benchmark](https:\u002F\u002Fgithub.com\u002Fclovaai\u002Fdeep-text-recognition-benchmark) - 基于深度学习方法的文字识别（光学字符识别）。\n- [dinglehopper](https:\u002F\u002Fgithub.com\u002Fqurator-spk\u002Fdinglehopper) - dinglehopper 是一个 OCR 评估工具，可读取 ALTO、PAGE 和文本文件。\n- [CLEval](https:\u002F\u002Fgithub.com\u002Fclovaai\u002FCLEval)\n\n## 其他\n- [ocrodeg](https:\u002F\u002Fgithub.com\u002FNVlabs\u002Focrodeg) - 一个小型 Python 库，用于实现文档图像退化，以增强手写识别和 OCR 应用的数据。\n- [scantailor](https:\u002F\u002Fgithub.com\u002Fscantailor\u002Fscantailor) - Scan Tailor 是一款交互式的扫描页面后处理工具。\n- [jlsutherland\u002Fdoc2text](https:\u002F\u002Fgithub.com\u002Fjlsutherland\u002Fdoc2text) - 帮助研究人员修复这些错误，并尽可能从 PDF 中提取高质量文本。\n- [mauvilsa\u002Fnw-page-editor](https:\u002F\u002Fgithub.com\u002Fmauvilsa\u002Fnw-page-editor) - 用于可视化编辑 Page XML 文件的简单应用。\n- [Transkribus](https:\u002F\u002Ftranskribus.eu\u002FTranskribus\u002F) - Transkribus 是一个全面的平台，用于历史文献的数字化、AI 驱动的识别、转录和搜索。\n- http:\u002F\u002Fprojectnaptha.com\u002F\n- https:\u002F\u002Fgithub.com\u002F4lex4\u002Fscantailor-advanced\n- [open-semantic-search](https:\u002F\u002Fgithub.com\u002Fopensemanticsearch\u002Fopen-semantic-search) - 开放语义搜索引擎及开源文本挖掘与文本分析平台（集成用于文档处理的 ETL、用于图像和 PDF 的 OCR、用于人物、组织和地点的命名实体识别、基于词库和本体论的元数据管理、全文检索、分面检索和知识图谱的搜索用户界面及搜索应用）。\n- [ocrserver](https:\u002F\u002Fgithub.com\u002Fotiai10\u002Focrserver) - 一个简单的 OCR API 服务器，非常容易通过 Docker 部署，也可以在 Heroku 上运行。\n- [cosc428-structor](https:\u002F\u002Fgithub.com\u002Fchadoliver\u002Fcosc428-structor) - 约 1000 页书籍 + OpenCV + Python = 页面区域被识别为段落、行、图片、标题等。\n- [nidaba](https:\u002F\u002Fgithub.com\u002Fopenphilology\u002Fnidaba\u002F) - 一个可扩展且可升级的 OCR 流程。\n- https:\u002F\u002Fgithub.com\u002FMaybeShewill-CV\u002FCRNN_Tensorflow\n- [OCRmyPDF](https:\u002F\u002Fgithub.com\u002Fjbarlow83\u002FOCRmyPDF)","# Awesome OCR 快速上手指南\n\n`awesome-ocr` 并非单一的可安装软件包，而是一个汇集了顶级 OCR（光学字符识别）相关开源项目、论文和工具的精选列表。本指南将指导开发者如何根据具体需求（如去扭曲、版面分析、表格检测等）选择并部署相应的工具。\n\n## 环境准备\n\n由于列表中的工具多基于深度学习框架（主要是 PyTorch 和 TensorFlow），请确保满足以下基础环境要求：\n\n*   **操作系统**: Linux (推荐 Ubuntu 20.04+), macOS, 或 Windows (建议配合 WSL2 使用)\n*   **Python**: 3.8 或更高版本\n*   **GPU**: 推荐使用 NVIDIA GPU 以加速推理和训练（需安装对应的 CUDA 和 cuDNN）\n*   **前置依赖**:\n    *   `git`: 用于克隆仓库\n    *   `pip` 或 `conda`: 包管理工具\n    *   `opencv-python`: 大多数图像处理工具的基础依赖\n\n**国内加速建议**：\n在安装 Python 依赖时，强烈建议使用清华源或阿里源以提升下载速度：\n```bash\npip install -r requirements.txt -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n```\n\n## 安装步骤\n\n由于 `awesome-ocr` 是工具集合，你需要根据任务类型选择具体的子项目进行安装。以下是针对几类核心任务的通用安装流程示例：\n\n### 1. 克隆目标项目\n选择你需要的工具仓库进行克隆。例如，若需进行**文档去扭曲 (Dewarping)**，可选择 `DewarpNet`：\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Fcvlab-stonybrook\u002FDewarpNet.git\ncd DewarpNet\n```\n\n若需进行**版面分析 (Layout Analysis)**，可选择 `LayoutParser`：\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002FLayout-Parser\u002Flayout-parser.git\ncd layout-parser\n```\n\n### 2. 创建虚拟环境并安装依赖\n建议使用 `conda` 或 `venv` 隔离环境。\n\n**使用 Conda (推荐):**\n```bash\nconda create -n ocr_env python=3.8\nconda activate ocr_env\n# 安装 PyTorch (根据显卡情况选择，此处为通用 CPU\u002FGPU 示例)\npip install torch torchvision torchaudio --index-url https:\u002F\u002Fdownload.pytorch.org\u002Fwhl\u002Fcu118\n# 安装项目特定依赖\npip install -r requirements.txt -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n```\n\n**针对特定工具的额外安装示例：**\n*   **LayoutParser**:\n    ```bash\n    pip install layoutparser -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n    ```\n*   **SimpleHTR (手写识别)**:\n    ```bash\n    pip install tensorflow\n    pip install -r requirements.txt\n    ```\n\n## 基本使用\n\n不同工具的使用方式各异，以下提供三个典型场景的最简使用示例。\n\n### 场景一：文档版面分析 (使用 LayoutParser)\n快速检测文档中的文本块、标题和表格区域。\n\n```python\nimport layoutparser as lp\n\n# 加载预训练模型 (例如 Detectron2  backbone)\nmodel = lp.Detectron2LayoutModel('lp:\u002F\u002FPubLayNet\u002Ffaster_rcnn_R_50_FPN_3x\u002Fconfig')\n\n# 读取图像\nimage = lp.read_image('path\u002Fto\u002Fyour\u002Fdocument.png')\n\n# 执行检测\nlayout = model.detect(image)\n\n# 绘制结果\nlayout.show(image=image)\n```\n\n### 场景二：场景文本检测 (使用 DBNet)\n适用于自然场景下的弯曲或不规则文本检测。\n\n```bash\n# 假设已克隆 DB 仓库并安装依赖\n# 运行单张图片检测\npython tools\u002Finfer\u002Fpredict_system.py --image_dir=\".\u002Fdoc\u002Fimgs\u002Ftest.jpg\" --det_model_dir=\".\u002Finference\u002Fch_ppocr_mobile_v2.0_det_infer\u002F\" --rec_model_dir=\".\u002Finference\u002Fch_ppocr_mobile_v2.0_rec_infer\u002F\" --use_angle_cls=true --use_gpu=true\n```\n*(注：具体路径需根据实际下载的模型权重调整)*\n\n### 场景三：表格结构提取 (使用 Camelot)\n从 PDF 中提取表格数据并转换为 DataFrame。\n\n```python\nimport camelot\n\n# 读取 PDF 并提取指定页面的表格\ntables = camelot.read_pdf('foo.pdf', pages='1')\n\n# 导出为 CSV\ntables[0].to_csv('foo.csv')\n\n# 查看提取的数据\nprint(tables[0].df)\n```\n\n### 场景四：图像去扭曲 (使用 DewarpNet)\n校正弯曲的文档页面图像。\n\n```bash\n# 进入项目目录后运行测试脚本\npython test.py --img_path path\u002Fto\u002Fcurved_doc.jpg --output_dir .\u002Fresults\n```\n\n**提示**：对于列表中其他工具（如 `jdeskew`, `unpaper`, `TableTransformer` 等），请参考各自 GitHub 仓库 README 中的 \"Usage\" 章节，通常都遵循 `git clone` -> `pip install -r requirements.txt` -> `python script.py` 的标准流程。","某历史档案馆正在将一批百年前的手写书信数字化，这些信件因长期折叠和受潮，扫描后的图像存在严重的倾斜、弯曲褶皱以及字迹粘连问题。\n\n### 没有 awesome-ocr 时\n- 扫描图像中弯曲的纸面导致文字行呈波浪状，传统 OCR 引擎无法识别，直接报错或输出乱码。\n- 页面整体倾斜角度不一，人工逐张使用 Photoshop 手动校正耗时极长，且难以保证水平线绝对精准。\n- 密集的手写字体与背景污渍混在一起，缺乏有效的字符分割手段，导致单词被错误截断或合并。\n- 复杂的版面布局（如旁注、页眉）无法自动区分，提取出的文本顺序混乱，完全失去阅读逻辑。\n- 整个处理流程依赖大量人力重复操作，项目进度严重滞后，数字化成本居高不下。\n\n### 使用 awesome-ocr 后\n- 调用 DewarpNet 或 page_dewarp 模型，自动将弯曲褶皱的纸面“熨平”，文字行恢复笔直，OCR 识别率从不足 40% 提升至 95% 以上。\n- 利用 jdeskew 或 galfar\u002Fdeskew 批量检测并校正图像倾斜角，无需人工干预即可确保所有文档水平对齐。\n- 通过 watersink\u002FCharacter-Segmentation 等工具精准分割粘连字符，有效分离手写笔触与背景噪点，还原清晰字粒。\n- 借助 LayoutParser 或 dhSegment 智能分析版面结构，自动识别正文、批注及页码，按正确逻辑重组文本流。\n- 自动化流水线取代了繁琐的人工修图，处理效率提升数十倍，让珍贵历史文献得以快速高质量入库。\n\nawesome-ocr 通过集成顶尖的纠偏、去皱与分割算法，将原本需要数周人工修复的脏乱扫描件，瞬间转化为可检索、可编辑的高精度数字资产。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fzacharywhitley_awesome-ocr_1a6dbc38.png","zacharywhitley","Zachary Whitley","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fzacharywhitley_596a0d35.png","Clarksville, MD, US","zachary.whitley@gmail.com","http:\u002F\u002Fwww.zacharywhitley.com","https:\u002F\u002Fgithub.com\u002Fzacharywhitley",1003,120,"2026-04-09T17:54:18",4,"","未说明",{"notes":86,"python":84,"dependencies":87},"该 README 是一个 OCR 相关开源工具的集合列表（Awesome List），而非单一软件项目的说明文档。列表中包含了数十个不同的独立项目（如 DewarpNet, LayoutLM, DB, Camelot 等），每个项目都有各自独立的运行环境、依赖库和硬件要求。因此，无法从当前文本中提取出统一的操作系统、GPU、内存、Python 版本或依赖库信息。用户需要访问列表中具体某个工具的 GitHub 仓库以获取其详细的安装和运行指南。",[],[14,15],[90,91,92,93,94,95,96],"awesome","awesome-list","ocr","ocr-recognition","machine","machine-learning","deep-learning","2026-03-27T02:49:30.150509","2026-04-20T10:37:12.785238",[],[]]