[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-arbox--nlp-with-ruby":3,"tool-arbox--nlp-with-ruby":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",151314,2,"2026-04-11T23:32:58",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",108322,"2026-04-10T11:39:34",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[52,13,15,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":32,"last_commit_at":59,"category_tags":60,"status":17},4721,"markitdown","microsoft\u002Fmarkitdown","MarkItDown 是一款由微软 AutoGen 团队打造的轻量级 Python 工具，专为将各类文件高效转换为 Markdown 格式而设计。它支持 PDF、Word、Excel、PPT、图片（含 OCR）、音频（含语音转录）、HTML 乃至 YouTube 链接等多种格式的解析，能够精准提取文档中的标题、列表、表格和链接等关键结构信息。\n\n在人工智能应用日益普及的今天，大语言模型（LLM）虽擅长处理文本，却难以直接读取复杂的二进制办公文档。MarkItDown 恰好解决了这一痛点，它将非结构化或半结构化的文件转化为模型“原生理解”且 Token 效率极高的 Markdown 格式，成为连接本地文件与 AI 分析 pipeline 的理想桥梁。此外，它还提供了 MCP（模型上下文协议）服务器，可无缝集成到 Claude Desktop 等 LLM 应用中。\n\n这款工具特别适合开发者、数据科学家及 AI 研究人员使用，尤其是那些需要构建文档检索增强生成（RAG）系统、进行批量文本分析或希望让 AI 助手直接“阅读”本地文件的用户。虽然生成的内容也具备一定可读性，但其核心优势在于为机器",93400,"2026-04-06T19:52:38",[52,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":67,"readme_en":68,"readme_zh":69,"quickstart_zh":70,"use_case_zh":71,"hero_image_url":72,"owner_login":73,"owner_name":74,"owner_avatar_url":75,"owner_bio":76,"owner_company":77,"owner_location":78,"owner_email":79,"owner_twitter":80,"owner_website":80,"owner_url":81,"languages":82,"stars":87,"forks":88,"last_commit_at":89,"license":90,"difficulty_score":91,"env_os":92,"env_gpu":92,"env_ram":92,"env_deps":93,"category_tags":102,"github_topics":103,"view_count":32,"oss_zip_url":80,"oss_zip_packed_at":80,"status":17,"created_at":116,"updated_at":117,"faqs":118,"releases":119},6728,"arbox\u002Fnlp-with-ruby","nlp-with-ruby","Curated List: Practical Natural Language Processing done in Ruby","nlp-with-ruby 是一个专为 Ruby 开发者打造的自然语言处理（NLP）精选资源清单。它并非单一的软件库，而是一份汇聚了高质量教程、实用代码库、在线 API 及学术资料的“导航图”，旨在帮助开发者在 Ruby 生态中高效实现人类语言的计算与处理。\n\n面对自然语言处理领域概念繁杂、工具分散的痛点，nlp-from-ruby 系统性地梳理了从基础到高级的全流程任务。无论是分词、词干提取、句法分析等底层管道子任务，还是情感分析、机器翻译、命名实体识别等高阶应用，甚至是聊天机器人构建和光学字符识别（OCR），清单都提供了经过验证的解决方案。其独特亮点在于紧密围绕 Ruby 语言特性，不仅涵盖了传统的语言学规则处理，还整合了机器学习库与数据可视化工具，并特别关注多语言支持与互操作性，让 Ruby 也能轻松对接主流 AI 能力。\n\n这份资源非常适合熟悉 Ruby 的后端工程师、希望将文本分析功能集成到现有系统中的全栈开发者，以及对计算语言学感兴趣的研究人员。对于想要避免重复造轮子、快速寻找可靠 NLP 组件的技术团队而言，nlp-with-ruby 是探索文本挖掘与智能语言应用的理想起","nlp-with-ruby 是一个专为 Ruby 开发者打造的自然语言处理（NLP）精选资源清单。它并非单一的软件库，而是一份汇聚了高质量教程、实用代码库、在线 API 及学术资料的“导航图”，旨在帮助开发者在 Ruby 生态中高效实现人类语言的计算与处理。\n\n面对自然语言处理领域概念繁杂、工具分散的痛点，nlp-from-ruby 系统性地梳理了从基础到高级的全流程任务。无论是分词、词干提取、句法分析等底层管道子任务，还是情感分析、机器翻译、命名实体识别等高阶应用，甚至是聊天机器人构建和光学字符识别（OCR），清单都提供了经过验证的解决方案。其独特亮点在于紧密围绕 Ruby 语言特性，不仅涵盖了传统的语言学规则处理，还整合了机器学习库与数据可视化工具，并特别关注多语言支持与互操作性，让 Ruby 也能轻松对接主流 AI 能力。\n\n这份资源非常适合熟悉 Ruby 的后端工程师、希望将文本分析功能集成到现有系统中的全栈开发者，以及对计算语言学感兴趣的研究人员。对于想要避免重复造轮子、快速寻找可靠 NLP 组件的技术团队而言，nlp-with-ruby 是探索文本挖掘与智能语言应用的理想起点。","\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Farbox_nlp-with-ruby_readme_4c8f03dae674.png\" align=\"center\">\n\n[![Awesome](https:\u002F\u002Fawesome.re\u002Fbadge-flat.svg)](https:\u002F\u002Fgithub.com\u002Fsindresorhus\u002Fawesome#readme) [![Support Me](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F%F0%9F%92%97-Support%20Me-blue.svg?style=flat-square)](https:\u002F\u002Fwww.patreon.com\u002Farbox)\n\n[[RubyML](https:\u002F\u002Fgithub.com\u002Farbox\u002Fmachine-learning-with-ruby) |\n [RubyDataScience](https:\u002F\u002Fgithub.com\u002Farbox\u002Fdata-science-with-ruby) |\n [RubyInterop](https:\u002F\u002Fgithub.com\u002Farbox\u002Fruby-interoperability)]\n\n\n# Awesome NLP with Ruby [\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Farbox_nlp-with-ruby_readme_3f3894ee2234.jpg\" align=\"left\" width=\"30px\" height=\"30px\" \u002F>][ruby]\n\n> Useful resources for text processing in Ruby\n\nThis curated list comprises [_awesome_](https:\u002F\u002Fgithub.com\u002Fsindresorhus\u002Fawesome\u002Fblob\u002Fmaster\u002Fawesome.md)\nresources, libraries, information sources about computational processing of texts\nin human languages with the [Ruby programming language](ruby).\nThat field is often referred to as\n[NLP](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FNatural_language_processing),\n[Computational Linguistics](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FComputational_linguistics),\n[HLT](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FLanguage_technology) (Human Language Technology)\nand can be brought in conjunction with\n[Artificial Intelligence](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FArtificial_intelligence),\n[Machine Learning](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FMachine_learning),\n[Information Retrieval](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FInformation_retrieval),\n[Text Mining](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FText_mining),\n[Knowledge Extraction](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FKnowledge_extraction)\nand other related disciplines.\n\nThis list comes from our day to day work on Language Models and NLP Tools.\nRead [why](motivation.md) this list is awesome. Our [FAQ](FAQ.md) describes the\nimportant decisions and useful answers you may be interested in.\n\n:sparkles: Every [contribution](#contributing) is welcome! Add links through pull\nrequests or create an issue to start a discussion.\n\nFollow us on [Twitter](https:\u002F\u002Ftwitter.com\u002FNonWebRuby)\nand please spread the word using the `#RubyNLP` hash tag!\n\n\u003C!-- nodoc -->\n## Contents\n\n\u003C!-- toc -->\n\n- [:sparkles: Tutorials](#sparkles-tutorials)\n- [NLP Pipeline Subtasks](#nlp-pipeline-subtasks)\n  * [Pipeline Generation](#pipeline-generation)\n  * [Multipurpose Engines](#multipurpose-engines)\n    + [On-line APIs](#on-line-apis)\n  * [Language Identification](#language-identification)\n  * [Segmentation](#segmentation)\n  * [Lexical Processing](#lexical-processing)\n    + [Stemming](#stemming)\n    + [Lemmatization](#lemmatization)\n    + [Lexical Statistics: Counting Types and Tokens](#lexical-statistics-counting-types-and-tokens)\n    + [Filtering Stop Words](#filtering-stop-words)\n  * [Phrasal Level Processing](#phrasal-level-processing)\n  * [Syntactic Processing](#syntactic-processing)\n    + [Constituency Parsing](#constituency-parsing)\n  * [Semantic Analysis](#semantic-analysis)\n  * [Pragmatical Analysis](#pragmatical-analysis)\n- [High Level Tasks](#high-level-tasks)\n  * [Spelling and Error Correction](#spelling-and-error-correction)\n  * [Text Alignment](#text-alignment)\n  * [Machine Translation](#machine-translation)\n  * [Sentiment Analysis](#sentiment-analysis)\n  * [Numbers, Dates, and Time Parsing](#numbers-dates-and-time-parsing)\n  * [Named Entity Recognition](#named-entity-recognition)\n  * [Text-to-Speech-to-Text](#text-to-speech-to-text)\n- [Dialog Agents, Assistants, and Chatbots](#dialog-agents-assistants-and-chatbots)\n- [Linguistic Resources](#linguistic-resources)\n- [Machine Learning Libraries](#machine-learning-libraries)\n- [Data Visualization](#data-visualization)\n- [Optical Character Recognition](#optical-character-recognition)\n- [Text Extraction](#text-extraction)\n- [Full Text Search, Information Retrieval, Indexing](#full-text-search-information-retrieval-indexing)\n- [Language Aware String Manipulation](#language-aware-string-manipulation)\n- [Articles, Posts, Talks, and Presentations](#articles-posts-talks-and-presentations)\n- [Projects and Code Examples](#projects-and-code-examples)\n- [Books](#books)\n- [Community](#community)\n- [Needs your Help!](#needs-your-help)\n- [Related Resources](#related-resources)\n- [License](#license)\n\n\u003C!-- tocstop -->\n\n\u003C!-- doc -->\n\n## :sparkles: Tutorials\n\nPlease help us to fill out this section! :smiley:\n\n## NLP Pipeline Subtasks\n\nAn NLP Pipeline starts with a plain text.\n\n### Pipeline Generation\n\n- [composable_operations](https:\u002F\u002Fgithub.com\u002Ft6d\u002Fcomposable_operations) -\n  Definition framework for operation pipelines.\n- [ruby-spark](https:\u002F\u002Fgithub.com\u002Fondra-m\u002Fruby-spark) -\n  Spark bindings with an easy to understand DSL.\n- [phobos](https:\u002F\u002Fgithub.com\u002Fphobos\u002Fphobos) -\n  Simplified Ruby Client for [Apache Kafka](https:\u002F\u002Fkafka.apache.org\u002F).\n- [parallel](https:\u002F\u002Fgithub.com\u002Fgrosser\u002Fparallel) -\n  Supervisor for parallel execution on multiple CPUs or in many threads.\n- [pwrake](https:\u002F\u002Fgithub.com\u002Fmasa16\u002Fpwrake) -\n  Rake extensions to run local and remote tasks in parallel.\n\n### Multipurpose Engines\n\n- [open-nlp](https:\u002F\u002Fgithub.com\u002Flouismullie\u002Fopen-nlp) -\n  Ruby Bindings for the [OpenNLP](https:\u002F\u002Fopennlp.apache.org\u002F) Toolkit.\n- [stanford-core-nlp](https:\u002F\u002Fgithub.com\u002Flouismullie\u002Fstanford-core-nlp) -\n  Ruby Bindings for the Stanford [CoreNLP](https:\u002F\u002Fgithub.com\u002Fstanfordnlp\u002FCoreNLP) tools.\n- [treat](https:\u002F\u002Fgithub.com\u002Flouismullie\u002Ftreat) -\n  Natural Language Processing framework for Ruby (like [NLTK](http:\u002F\u002Fwww.nltk.org\u002F) for Python).\n- [nlp_toolz](https:\u002F\u002Fgithub.com\u002FLeFnord\u002Fnlp_toolz) -\n  Wrapper over some [OpenNLP](https:\u002F\u002Fopennlp.apache.org\u002F) classes and\n  the original [Berkeley Parser](https:\u002F\u002Fgithub.com\u002Fslavpetrov\u002Fberkeleyparser).\n- [open_nlp](https:\u002F\u002Fgithub.com\u002Fhck\u002Fopen_nlp) -\n  JRuby Bindings for the [OpenNLP](https:\u002F\u002Fopennlp.apache.org\u002F) Toolkit.\n- [ruby-spacy](https:\u002F\u002Fgithub.com\u002Fyohasebe\u002Fruby-spacy) &mdash;\n  Wrapper module for spaCy NLP library via [PyCall](https:\u002F\u002Fgithub.com\u002Fmrkn\u002Fpycall.rb).\n\n#### On-line APIs\n\n- [alchemyapi_ruby](https:\u002F\u002Fgithub.com\u002Falchemyapi\u002Falchemyapi_ruby) -\n  Legacy Ruby SDK for AlchemyAPI\u002FBluemix.\n- [wit-ruby](https:\u002F\u002Fgithub.com\u002Fwit-ai\u002Fwit-ruby) -\n  Ruby client library for the [Wit.ai](https:\u002F\u002Fwit.ai\u002F) Language Understanding Platform.\n- [wlapi](https:\u002F\u002Fgithub.com\u002Farbox\u002Fwlapi) - Ruby client library for\n  [Wortschatz Leipzig](http:\u002F\u002Fwortschatz.uni-leipzig.de\u002Fde) web services.\n- [monkeylearn-ruby](https:\u002F\u002Fgithub.com\u002Fmonkeylearn\u002Fmonkeylearn-ruby) - Sentiment\n  Analysis, Topic Modelling, Language Detection, Named Entity Recognition via\n  a Ruby based Web API client.\n- [google-cloud-language](https:\u002F\u002Fgithub.com\u002Fgoogleapis\u002Fgoogle-cloud-ruby\u002Ftree\u002Fmaster\u002Fgoogle-cloud-language) -\n  Google's Natural Language service API for Ruby.\n\n### Language Identification\n\nLanguage Identification is one of the first crucial steps in every NLP Pipeline.\n\n- [scylla](https:\u002F\u002Fgithub.com\u002Fhashwin\u002Fscylla) -\n  Language Categorization and Identification.\n\n### Segmentation\n\nTools for Tokenization, Word and Sentence Boundary Detection and Disambiguation.\n\n- [tokenizer](https:\u002F\u002Fgithub.com\u002Farbox\u002Ftokenizer) -\n  Simple multilingual tokenizer.\n  \u003Csup>[[tutorial](tutorials\u002Ftokenizer.md)]\u003C\u002Fsup>\n- [pragmatic_tokenizer](https:\u002F\u002Fgithub.com\u002Fdiasks2\u002Fpragmatic_tokenizer) -\n  Multilingual tokenizer to split a string into tokens.\n- [nlp-pure](https:\u002F\u002Fgithub.com\u002Fparhamr\u002Fnlp-pure) -\n  Natural language processing algorithms implemented in pure Ruby with minimal dependencies.\n- [textoken](https:\u002F\u002Fgithub.com\u002Fmanorie\u002Ftextoken) -\n  Simple and customizable text tokenization library.\n- [pragmatic_segmenter](https:\u002F\u002Fgithub.com\u002Fdiasks2\u002Fpragmatic_segmenter) -\n  Word Boundary Disambiguation with many cookies.\n- [punkt-segmenter](https:\u002F\u002Fgithub.com\u002Flfcipriani\u002Fpunkt-segmenter) -\n  Pure Ruby implementation of the Punkt Segmenter.\n- [tactful_tokenizer](https:\u002F\u002Fgithub.com\u002Fzencephalon\u002FTactful_Tokenizer) -\n  RegExp based tokenizer for different languages.\n- [scapel](https:\u002F\u002Fgithub.com\u002Flouismullie\u002Fscalpel) -\n  Sentence Boundary Disambiguation tool.\n\n### Lexical Processing\n\n#### Stemming\n\nStemming is the term used in information retrieval to describe the process for\nreducing wordforms to some base representation. Stemming should be distinguished\nfrom [Lemmatization](#lemmatization) since `stems` are not necessarily have\nlinguistic motivation.\n\n- [ruby-stemmer](https:\u002F\u002Fgithub.com\u002Faurelian\u002Fruby-stemmer) -\n  Ruby-Stemmer exposes the SnowBall API to Ruby.\n- [uea-stemmer](https:\u002F\u002Fgithub.com\u002Fealdent\u002Fuea-stemmer) -\n  Conservative stemmer for search and indexing.\n\n#### Lemmatization\n\nLemmatization is considered a process of finding a base form of a word. Lemmas\nare often collected in dictionaries.\n\n- [lemmatizer](https:\u002F\u002Fgithub.com\u002Fyohasebe\u002Flemmatizer) -\n  WordNet based Lemmatizer for English texts.\n\n#### Lexical Statistics: Counting Types and Tokens\n\n- [wc](https:\u002F\u002Fgithub.com\u002Fthesp0nge\u002Fwc) -\n  Facilities to count word occurrences in a text.\n- [word_count](https:\u002F\u002Fgithub.com\u002FAtelierConvivialite\u002Fword_count) -\n  Word counter for `String` and `Hash` objects.\n- [words_counted](https:\u002F\u002Fgithub.com\u002Fabitdodgy\u002Fwords_counted) -\n  Pure Ruby library counting word statistics with different custom options.\n\n#### Filtering Stop Words\n\n- [stopwords-filter](https:\u002F\u002Fgithub.com\u002Fbrenes\u002Fstopwords-filter) - Filter and\n  Stop Word Lexicon based on the SnowBall lemmatizer.\n\n### Phrasal Level Processing\n\n- [n_gram](https:\u002F\u002Fgithub.com\u002Freddavis\u002FN-Gram) -\n  N-Gram generator.\n- [ruby-ngram](https:\u002F\u002Fgithub.com\u002Ftkellen\u002Fruby-ngram) -\n  Break words and phrases into ngrams.\n- [raingrams](https:\u002F\u002Fgithub.com\u002Fpostmodern\u002Fraingrams) -\n  Flexible and general-purpose ngrams library written in pure Ruby.\n\n### Syntactic Processing\n\n#### Constituency Parsing\n\n- [stanfordparser](https:\u002F\u002Frubygems.org\u002Fgems\u002Fstanfordparser) -\n  Ruby based wrapper for the Stanford Parser.\n- [rley](https:\u002F\u002Fgithub.com\u002Ffamished-tiger\u002FRley) -\n  Pure Ruby implementation of the [Earley](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FEarley_parser)\n  Parsing Algorithm for Context-Free Constituency Grammars.\n- [rsyntaxtree](https:\u002F\u002Fgithub.com\u002Fyohasebe\u002Frsyntaxtree) -\n  Visualization for syntactic trees in Ruby based on [RMagick](https:\u002F\u002Fgithub.com\u002Frmagick\u002Frmagick).\n  \u003Csup>[dep: [ImageMagick](#imagemagick)]\u003C\u002Fsup>\n\n### Semantic Analysis\n\n- [amatch](https:\u002F\u002Fgithub.com\u002Fflori\u002Famatch) -\n  Set of five distance types between strings (including Levenshtein, Sellers, Jaro-Winkler, 'pair distance').\n- [damerau-levenshtein](https:\u002F\u002Fgithub.com\u002FGlobalNamesArchitecture\u002Fdamerau-levenshtein) -\n  Calculates edit distance using the Damerau-Levenshtein algorithm.\n- [hotwater](https:\u002F\u002Fgithub.com\u002Fcolinsurprenant\u002Fhotwater) -\n  Fast Ruby FFI string edit distance algorithms.\n- [levenshtein-ffi](https:\u002F\u002Fgithub.com\u002Fdbalatero\u002Flevenshtein-ffi) -\n  Fast string edit distance computation, using the Damerau-Levenshtein algorithm.\n- [tf_idf](https:\u002F\u002Fgithub.com\u002Freddavis\u002FTF-IDF) -\n  Term Frequency \u002F Inverse Document Frequency in pure Ruby.\n- [tf-idf-similarity](https:\u002F\u002Fgithub.com\u002Fjpmckinney\u002Ftf-idf-similarity) -\n  Calculate the similarity between texts using TF\u002FIDF.\n\n### Pragmatical Analysis\n- [SentimentLib](https:\u002F\u002Fgithub.com\u002Fnzaillian\u002Fsentiment_lib) -\n  Simple extensible sentiment analysis gem.\n\n## High Level Tasks\n\n### Spelling and Error Correction\n\n- [gingerice](https:\u002F\u002Fgithub.com\u002Fsubosito\u002Fgingerice) -\n  Spelling and Grammar corrections via the [Ginger](https:\u002F\u002Fwww.gingersoftware.com\u002F) API.\n- [hunspell-i18n](https:\u002F\u002Fgithub.com\u002Fromanbsd\u002Fhunspell) -\n  Ruby bindings to the standard [Hunspell](https:\u002F\u002Fhunspell.github.io\u002F) Spell Checker.\n- [ffi-hunspell](https:\u002F\u002Fgithub.com\u002Fpostmodern\u002Fffi-hunspell) -\n  FFI based Ruby bindings for [Hunspell](https:\u002F\u002Fhunspell.github.io\u002F).\n- [hunspell](https:\u002F\u002Fgithub.com\u002Fsegabor\u002FHunspell) -\n  Ruby bindings to [Hunspell](https:\u002F\u002Fhunspell.github.io\u002F) via Ruby C API.\n\n### Text Alignment\n\n- [alignment](https:\u002F\u002Fgithub.com\u002Fpovilasjurcys\u002Falignment) -\n  Alignment routines for bilingual texts (Gale-Church implementation).\n\n### Machine Translation\n\n- [google-api-client](https:\u002F\u002Fgithub.com\u002Fgoogleapis\u002Fgoogle-api-ruby-client) -\n  Google API Ruby Client.\n- [microsoft_translator](https:\u002F\u002Fgithub.com\u002Fikayzo\u002Fmicrosoft_translator) -\n  Ruby client for the microsoft translator API.\n- [termit](https:\u002F\u002Fgithub.com\u002Fpawurb\u002Ftermit) -\n  Google Translate with speech synthesis in your terminal.\n- [zipf](https:\u002F\u002Fgithub.com\u002Fpks\u002Fzipf) -\n  implementation of BLEU and other base algorithms.\n\n### Sentiment Analysis\n\n- [stimmung](https:\u002F\u002Fgithub.com\u002Fpachacamac\u002Fstimmung) -\n  Semantic Polarity based on the\n  [SentiWS](http:\u002F\u002Fwortschatz.uni-leipzig.de\u002Fen\u002Fdownload) lexicon.\n\n### Numbers, Dates, and Time Parsing\n\n- [chronic](https:\u002F\u002Fgithub.com\u002Fmojombo\u002Fchronic) -\n  Pure Ruby natural language date parser.\n- [chronic_between](https:\u002F\u002Fgithub.com\u002Fjrobertson\u002Fchronic_between) -\n  Simple Ruby natural language parser for date and time ranges.\n- [chronic_duration](https:\u002F\u002Fgithub.com\u002Fhenrypoydar\u002Fchronic_duration) -\n  Pure Ruby parser for elapsed time.\n- [kronic](https:\u002F\u002Fgithub.com\u002Fxaviershay\u002Fkronic) -\n  Methods for parsing and formatting human readable dates.\n- [nickel](https:\u002F\u002Fgithub.com\u002Fiainbeeston\u002Fnickel) -\n  Extracts date, time, and message information from naturally worded text.\n- [tickle](https:\u002F\u002Fgithub.com\u002Fyb66\u002Ftickle) -\n  Parser for recurring and repeating events.\n- [numerizer](https:\u002F\u002Fgithub.com\u002Fjduff\u002Fnumerizer) -\n  Ruby parser for English number expressions.\n\n### Named Entity Recognition\n\n- [ruby-ner](https:\u002F\u002Fgithub.com\u002Fmblongii\u002Fruby-ner) -\n  Named Entity Recognition with Stanford NER and Ruby.\n- [ruby-nlp](https:\u002F\u002Fgithub.com\u002Ftiendung\u002Fruby-nlp) -\n  Ruby Binding for Stanford Pos-Tagger and Name Entity Recognizer.\n\n### Text-to-Speech-to-Text\n\n- [espeak-ruby](https:\u002F\u002Fgithub.com\u002Fdejan\u002Fespeak-ruby) -\n  Small Ruby API for utilizing 'espeak' and 'lame' to create text-to-speech mp3 files.\n- [tts](https:\u002F\u002Fgithub.com\u002Fc2h2\u002Ftts) -\n  Text-to-Speech conversion using the Google translate service.\n- [att_speech](https:\u002F\u002Fgithub.com\u002Fadhearsion\u002Fatt_speech) -\n  Ruby wrapper over the AT&T Speech API for speech to text.\n- [pocketsphinx-ruby](https:\u002F\u002Fgithub.com\u002Fwatsonbox\u002Fpocketsphinx-ruby) -\n  Pocketsphinx bindings.\n\n## Dialog Agents, Assistants, and Chatbots\n\n- [chatterbot](https:\u002F\u002Fgithub.com\u002Fmuffinista\u002Fchatterbot) -\n  Straightforward ruby-based Twitter Bot Framework, using OAuth to authenticate.\n- [lita](https:\u002F\u002Fgithub.com\u002Flitaio\u002Flita) -\n  Highly extensible chat operation bot framework written with persistent storage on [Redis](https:\u002F\u002Fredis.io\u002F).\n\n## Linguistic Resources\n\n- [rwordnet](https:\u002F\u002Fgithub.com\u002Fdoches\u002Frwordnet) -\n  Pure Ruby self contained API library for the [Princeton WordNet®](https:\u002F\u002Fwordnet.princeton.edu\u002F).\n- [wordnet](https:\u002F\u002Fgithub.com\u002Fged\u002Fruby-wordnet\u002Fblob\u002Fmaster\u002FREADME.rdoc) -\n  Performance tuned bindings for the [Princeton WordNet®](https:\u002F\u002Fwordnet.princeton.edu\u002F).\n\n## Machine Learning Libraries\n\n[Machine Learning](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FMachine_learning) Algorithms\nin pure Ruby or written in other programming languages with appropriate bindings\nfor Ruby.\n\nFor more up-to-date list please look at the [Awesome ML with Ruby][ml-with-ruby] list.\n\n- [rb-libsvm](https:\u002F\u002Fgithub.com\u002Ffebeling\u002Frb-libsvm) -\n  Support Vector Machines with Ruby.\n- [weka](https:\u002F\u002Fgithub.com\u002Fpaulgoetze\u002Fweka-jruby) -\n  JRuby bindings for Weka, different ML algorithms implemented through Weka.\n- [decisiontree](https:\u002F\u002Fgithub.com\u002Figrigorik\u002Fdecisiontree) -\n  Decision Tree ID3 Algorithm in pure Ruby\n  \u003Csup>[[post](https:\u002F\u002Fwww.igvita.com\u002F2007\u002F04\u002F16\u002Fdecision-tree-learning-in-ruby\u002F)]\u003C\u002Fsup>.\n- [rtimbl](https:\u002F\u002Fgithub.com\u002Fmaspwr\u002Frtimbl) -\n  Memory based learners from the Timbl framework.\n- [classifier-reborn](https:\u002F\u002Fgithub.com\u002Fjekyll\u002Fclassifier-reborn) -\n  General classifier module to allow Bayesian and other types of classifications.\n- [lda-ruby](https:\u002F\u002Fgithub.com\u002Fealdent\u002Flda-ruby) -\n  Ruby implementation of the [LDA](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FLatent_Dirichlet_allocation)\n  (Latent Dirichlet Allocation) for automatic Topic Modelling and Document Clustering.\n- [liblinear-ruby-swig](https:\u002F\u002Fgithub.com\u002Ftomz\u002Fliblinear-ruby-swig) -\n  Ruby interface to LIBLINEAR (much more efficient than LIBSVM for text classification).\n- [linnaeus](https:\u002F\u002Fgithub.com\u002Fdjcp\u002Flinnaeus) -\n  Redis-backed Bayesian classifier.\n- [maxent_string_classifier](https:\u002F\u002Fgithub.com\u002Fmccraigmccraig\u002Fmaxent_string_classifier) -\n  JRuby maximum entropy classifier for string data, based on the OpenNLP Maxent framework.\n- [naive_bayes](https:\u002F\u002Fgithub.com\u002Freddavis\u002FNaive-Bayes) -\n  Simple Naive Bayes classifier.\n- [nbayes](https:\u002F\u002Fgithub.com\u002Foasic\u002Fnbayes) -\n  Full-featured, Ruby implementation of Naive Bayes.\n- [omnicat](https:\u002F\u002Fgithub.com\u002Fmustafaturan\u002Fomnicat) -\n  Generalized rack framework for text classifications.\n- [omnicat-bayes](https:\u002F\u002Fgithub.com\u002Fmustafaturan\u002Fomnicat-bayes) -\n  Naive Bayes text classification implementation as an OmniCat classifier strategy.\n- [ruby-fann](https:\u002F\u002Fgithub.com\u002Ftangledpath\u002Fruby-fann) -\n  Ruby bindings to the [Fast Artificial Neural Network Library (FANN)](http:\u002F\u002Fleenissen.dk\u002Ffann\u002Fwp\u002F).\n- [rblearn](https:\u002F\u002Fgithub.com\u002Fhimkt\u002Frblearn) - Feature Extraction and Crossvalidation library.\n\n## Data Visualization\n\nPlease refer to the [Data Visualization](https:\u002F\u002Fgithub.com\u002Farbox\u002Fdata-science-with-ruby#visualization)\nsection on the [Data Science with Ruby][ds-with-ruby] list.\n\n## Optical Character Recognition\n\n* [tesseract-ocr](https:\u002F\u002Fgithub.com\u002Fmeh\u002Fruby-tesseract-ocr) -\n  FFI based wrapper over the [Tesseract OCR Engine](https:\u002F\u002Fgithub.com\u002Ftesseract-ocr\u002Ftesseract).\n\n## Text Extraction\n\n- [yomu](https:\u002F\u002Fgithub.com\u002Fyomurb\u002Fyomu) -\n  library for extracting text and metadata from files and documents\n  using the [Apache Tika](https:\u002F\u002Ftika.apache.org\u002F) content analysis toolkit.\n\n## Full Text Search, Information Retrieval, Indexing\n\n- [rsolr](https:\u002F\u002Fgithub.com\u002Frsolr\u002Frsolr) -\n  Ruby and Rails client library for [Apache Solr](http:\u002F\u002Flucene.apache.org\u002Fsolr\u002F).\n- [sunspot](https:\u002F\u002Fgithub.com\u002Fsunspot\u002Fsunspot) -\n  Rails centric client for [Apache Solr](http:\u002F\u002Flucene.apache.org\u002Fsolr\u002F).\n- [thinking-sphinx](https:\u002F\u002Fgithub.com\u002Fpat\u002Fthinking-sphinx) -\n  [Active Record](https:\u002F\u002Fguides.rubyonrails.org\u002Factive_record_basics.html)\n  plugin for using [Sphinx](http:\u002F\u002Fsphinxsearch.com\u002F) in (not only) Rails based projects.\n- [elasticsearch](https:\u002F\u002Fgithub.com\u002Felastic\u002Felasticsearch-ruby\u002Ftree\u002Fmaster\u002Felasticsearch) -\n  Ruby client and API for [Elasticsearch](https:\u002F\u002Fwww.elastic.co\u002F).\n- [elasticsearch-rails](https:\u002F\u002Fgithub.com\u002Felastic\u002Felasticsearch-rails) -\n  Ruby and Rails integrations for [Elasticsearch](https:\u002F\u002Fwww.elastic.co\u002F).\n- [google-api-client](https:\u002F\u002Fgithub.com\u002Fgoogleapis\u002Fgoogle-api-ruby-client) -\n  Ruby API library for [Google](https:\u002F\u002Fdevelopers.google.com\u002Fapi-client-library\u002Fruby\u002F) services.\n\n## Language Aware String Manipulation\n\nLibraries for language aware string manipulation, i.e. search, pattern matching,\ncase conversion, transcoding, regular expressions which need information about\nthe underlying language.\n\n- [fuzzy_match](https:\u002F\u002Fgithub.com\u002Fseamusabshere\u002Ffuzzy_match) -\n  Fuzzy string comparison with Distance measures and Regular Expression.\n- [fuzzy-string-match](https:\u002F\u002Fgithub.com\u002Fkiyoka\u002Ffuzzy-string-match) -\n  Fuzzy string matching library for Ruby.\n- [active_support](https:\u002F\u002Fgithub.com\u002Frails\u002Frails\u002Ftree\u002Fmaster\u002Factivesupport\u002Flib\u002Factive_support) -\n  RoR `ActiveSupport` gem has various string extensions that can handle case.\n- [fuzzy_tools](https:\u002F\u002Fgithub.com\u002Fbrianhempel\u002Ffuzzy_tools) -\n  Toolset for fuzzy searches in Ruby tuned for accuracy.\n- [u](http:\u002F\u002Fdisu.se\u002Fsoftware\u002Fu-1.0\u002F) -\n  U extends Ruby’s Unicode support.\n- [unicode](https:\u002F\u002Fgithub.com\u002Fblackwinter\u002Funicode) -\n  Unicode normalization library.\n- [CommonRegexRuby](https:\u002F\u002Fgithub.com\u002Ftalyssonoc\u002FCommonRegexRuby) -\n  Find a lot of kinds of common information in a string.\n- [regexp-examples](https:\u002F\u002Fgithub.com\u002Ftom-lord\u002Fregexp-examples) -\n  Generate strings that match a given regular expression.\n- [verbal_expressions](https:\u002F\u002Fgithub.com\u002Fryan-endacott\u002Fverbal_expressions) -\n  Make difficult regular expressions easy.\n- [translit_kit](https:\u002F\u002Fgithub.com\u002FAnalyzePlatypus\u002FTranslitKit) -\n  Transliterate Hebrew & Yiddish text into Latin characters.\n- [re2](https:\u002F\u002Fgithub.com\u002Fmudge\u002Fre2) -\n  hight-speed Regular Expression library for Text Mining and Text Extraction.\n- [regex_sample](https:\u002F\u002Fgithub.com\u002Fmochizukikotaro\u002Fregex_sample) -\n  sample string generation from a given Regular Expression.\n- [iuliia](https:\u002F\u002Fgithub.com\u002Fadnikiforov\u002Fiuliia-rb) &mdash;\n  transliteration Cyrillic to Latin in many possible ways (defined by the [reference implementation](https:\u002F\u002Fgithub.com\u002Fnalgeon\u002Fiuliia)).\n\n## Articles, Posts, Talks, and Presentations\n\n- 2019\n  - _Extracting Text From Images Using Ruby_ by [aonemd](https:\u002F\u002Ftwitter.com\u002Faonemd)\n    \u003Csup>[[post](https:\u002F\u002Faonemd.github.io\u002Fblog\u002Fextracting-text-from-images-using-ruby) |\n    [code](https:\u002F\u002Fgist.github.com\u002Faonemd\u002F7bb3c4760d9e47a9ce8e270198cb40a0)]\u003C\u002Fsup>\n- 2018\n  - _Natural Language Processing and Tweet Sentiment Analysis_ by [Cassandra Corrales](https:\u002F\u002Ftwitter.com\u002Fcasita305)\n    \u003Csup>[[post](https:\u002F\u002Fmedium.com\u002F@cmcorrales3\u002Fnatural-language-processing-and-tweet-sentiment-analysis-fa1edbb5ddd5)]\u003C\u002Fsup>\n- 2017\n  - _The Google NLP API Meets Ruby_ by [Aja Hammerly](https:\u002F\u002Ftwitter.com\u002Fthe_thagomizer)\n    \u003Csup>[[post](http:\u002F\u002Fwww.thagomizer.com\u002Fblog\u002F2017\u002F04\u002F13\u002Fthe-google-nlp-api-meets-ruby.html)]\u003C\u002Fsup>\n  - _Syntax Isn't Everything: NLP For Rubyists_ by [Aja Hammerly](https:\u002F\u002Ftwitter.com\u002Fthe_thagomizer)\n    \u003Csup>[[slides](http:\u002F\u002Fwww.thagomizer.com\u002Ffiles\u002FNLP_RailsConf2017.pdf)]\u003C\u002Fsup>\n  - _Scientific Computing on JRuby_ by [Prasun Anand](https:\u002F\u002Ftwitter.com\u002Fprasun_anand)\n    \u003Csup>[[slides](https:\u002F\u002Fwww.slideshare.net\u002FPrasunAnand2\u002Ffosdem2017-scientific-computing-on-jruby) |\n    [video](https:\u002F\u002Fftp.fau.de\u002Ffosdem\u002F2017\u002FK.4.201\u002Fruby_scientific_computing_on_jruby.mp4) |\n    [slides](https:\u002F\u002Fwww.slideshare.net\u002FPrasunAnand2\u002Fscientific-computing-on-jruby) |\n    [slides](https:\u002F\u002Fwww.slideshare.net\u002FPrasunAnand2\u002Fscientific-computation-on-jruby)]\u003C\u002Fsup>\n  - _Unicode Normalization in Ruby_ by [Starr Horne](https:\u002F\u002Ftwitter.com\u002Fstarrhorne)\n    \u003Csup>[[post](https:\u002F\u002Fblog.honeybadger.io\u002Fruby_unicode_normalization\u002F)]\u003C\u002Fsup>\n- 2016\n  - _Quickly Create a Telegram Bot in Ruby_ by [Ardian Haxha](https:\u002F\u002Ftwitter.com\u002FArdianHaxha)\n    \u003Csup>[[tutorial](https:\u002F\u002Fwww.sitepoint.com\u002Fquickly-create-a-telegram-bot-in-ruby\u002F)]\u003C\u002Fsup>\n  - _Deep Learning: An Introduction for Ruby Developers_ by [Geoffrey Litt](https:\u002F\u002Ftwitter.com\u002Fgeoffreylitt)\n    \u003Csup>[[slides](https:\u002F\u002Fspeakerdeck.com\u002Fgeoffreylitt\u002Fdeep-learning-an-introduction-for-ruby-developers)]\u003C\u002Fsup>\n  - _How I made a pure-Ruby word2vec program more than 3x faster_ by [Kei Sawada](https:\u002F\u002Ftwitter.com\u002Fremore)\n    \u003Csup>[[slides](https:\u002F\u002Fspeakerdeck.com\u002Fremore\u002Fhow-i-made-a-pure-ruby-word2vec-program-more-than-3x-faster)]\u003C\u002Fsup>\n  - _Dōmo arigatō, Mr. Roboto: Machine Learning with Ruby_ by [Eric Weinstein](https:\u002F\u002Ftwitter.com\u002Fericqweinstein)\n    \u003Csup>[[slides](https:\u002F\u002Fspeakerdeck.com\u002Fericqweinstein\u002Fdomo-arigato-mr-roboto-machine-learning-with-ruby) | [video](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=T1nFQ49TyeA)]\u003C\u002Fsup>\n- 2015\n  - _N-gram Analysis for Fun and Profit_ by [Jesus Castello](https:\u002F\u002Fgithub.com\u002Fmatugm)\n    \u003Csup>[[tutorial](https:\u002F\u002Fwww.rubyguides.com\u002F2015\u002F09\u002Fngram-analysis-ruby\u002F)]\u003C\u002Fsup>\n  - _Machine Learning made simple with Ruby_ by [Lorenzo Masini](https:\u002F\u002Fgithub.com\u002Frugginoso)\n    \u003Csup>[[tutorial](https:\u002F\u002Fwww.leanpanda.com\u002Fblog\u002F2015\u002F08\u002F24\u002Fmachine-learning-automatic-classification\u002F)]\u003C\u002Fsup>\n  - _Using Ruby Machine Learning to Find Paris Hilton Quotes_ by [Rick Carlino](https:\u002F\u002Fgithub.com\u002FRickCarlino)\n    \u003Csup>[[tutorial](http:\u002F\u002Fweb.archive.org\u002Fweb\u002F20160414072324\u002Fhttp:\u002F\u002Fdatamelon.io\u002Fblog\u002F2015\u002Fusing-ruby-machine-learning-id-paris-hilton-quotes.html)]\u003C\u002Fsup>\n  - _Exploring Natural Language Processing in Ruby_ by [Kevin Dias](https:\u002F\u002Fgithub.com\u002Fdiasks2)\n    \u003Csup>[[slides](https:\u002F\u002Fwww.slideshare.net\u002Fdiasks2\u002Fexploring-natural-language-processing-in-ruby)]\u003C\u002Fsup>\n  - _Machine Learning made simple with Ruby_ by [Lorenzo Masini](https:\u002F\u002Ftwitter.com\u002Frugginoso)\n    \u003Csup>[[post](https:\u002F\u002Fwww.leanpanda.com\u002Fblog\u002F2015\u002F08\u002F24\u002Fmachine-learning-automatic-classification\u002F)]\u003C\u002Fsup>\n  - _Practical Data Science in Ruby_ by Bobby Grayson\n    \u003Csup>[[slides](http:\u002F\u002Fslides.com\u002Fbobbygrayson\u002Fp#\u002F)]\u003C\u002Fsup>\n- 2014\n  - _Natural Language Parsing with Ruby_ by [Glauco Custódio](https:\u002F\u002Fgithub.com\u002Fglaucocustodio)\n    \u003Csup>[[tutorial](http:\u002F\u002Fglaucocustodio.github.io\u002F2014\u002F11\u002F10\u002Fnatural-language-parsing-with-ruby\u002F)]\u003C\u002Fsup>\n  - _Demystifying Data Science: Analyzing Conference Talks with Rails and Ngrams_ by\n    [Todd Schneider](https:\u002F\u002Fgithub.com\u002Ftoddwschneider)\n    \u003Csup>[[video](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=2ZDCxwB29Bg) | [code](https:\u002F\u002Fgithub.com\u002FGenius\u002Fabstractogram)]\u003C\u002Fsup>\n  - _Natural Language Processing with Ruby_ by [Konstantin Tennhard](https:\u002F\u002Fgithub.com\u002Ft6d)\n    \u003Csup>[[video](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=5u86qVh8r0M) | [video](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=oFmy_QBQ5DU) |\n    [video](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=sPkeeWnsMn0) |\n    [slides](http:\u002F\u002Feuruko2013.org\u002Fspeakers\u002Fpresentations\u002Fnatural_language_processing_with_ruby_and_opennlp-tennhard.pdf)]\u003C\u002Fsup>\n- 2013\n  - _How to parse 'go' - Natural Language Processing in Ruby_ by\n    [Tom Cartwright](https:\u002F\u002Ftwitter.com\u002Ftomcartwrightuk)\n    \u003Csup>[[slides](https:\u002F\u002Fwww.slideshare.net\u002FTomCartwright\u002Fnatual-language-processing-in-ruby) |\n    [video](https:\u002F\u002Fskillsmatter.com\u002Fskillscasts\u002F4883-how-to-parse-go)]\u003C\u002Fsup>\n  - _Natural Language Processing in Ruby_ by [Brandon Black](https:\u002F\u002Ftwitter.com\u002Fbrandonmblack)\n    \u003Csup>[[slides](https:\u002F\u002Fspeakerdeck.com\u002Fbrandonblack\u002Fnatural-language-processing-in-ruby) |\n    [video](http:\u002F\u002Fconfreaks.tv\u002Fvideos\u002Frailsconf2013-natural-language-processing-with-ruby)]\u003C\u002Fsup>\n  - _Natural Language Processing with Ruby: n-grams_ by [Nathan Kleyn](https:\u002F\u002Fgithub.com\u002Fnathankleyn)\n    \u003Csup>[[tutorial](https:\u002F\u002Fwww.sitepoint.com\u002Fnatural-language-processing-ruby-n-grams\u002F) |\n    [code](https:\u002F\u002Fgithub.com\u002Fnathankleyn\u002Fruby-nlp)]\u003C\u002Fsup>\n  - _Seeking Lovecraft, Part 1: An introduction to NLP and the Treat Gem_ by\n    [Robert Qualls](https:\u002F\u002Fgithub.com\u002Frlqualls)\n    \u003Csup>[[tutorial](https:\u002F\u002Fwww.sitepoint.com\u002Fseeking-lovecraft-part-1-an-introduction-to-nlp-and-the-treat-gem\u002F)]\u003C\u002Fsup>\n- 2012\n  - _Machine Learning with Ruby, Part One_ by [Vasily Vasinov](https:\u002F\u002Ftwitter.com\u002Fvasinov)\n    \u003Csup>[[tutorial](http:\u002F\u002Fwww.vasinov.com\u002Fblog\u002Fmachine-learning-with-ruby-part-one\u002F)]\u003C\u002Fsup>\n- 2011\n  - _Ruby one-liners_ by [Benoit Hamelin](https:\u002F\u002Ftwitter.com\u002Fbenoithamelin)\n    \u003Csup>[[post](http:\u002F\u002Fbenoithamelin.tumblr.com\u002Fruby1line)]\u003C\u002Fsup>\n  - _Clustering in Ruby_ by [Colin Drake](https:\u002F\u002Ftwitter.com\u002Fcolinfdrake)\n    \u003Csup>[[post](https:\u002F\u002Fcolindrake.me\u002Fpost\u002Fk-means-clustering-in-ruby\u002F)\u002F)]\u003C\u002Fsup>\n- 2010\n  - _bayes_motel – Bayesian classification for Ruby_ by [Mike Perham](https:\u002F\u002Ftwitter.com\u002Fmperham)\n    \u003Csup>[[post](http:\u002F\u002Fwww.mikeperham.com\u002F2010\u002F04\u002F28\u002Fbayes_motel-bayesian-classification-for-ruby\u002F)]\u003C\u002Fsup>\n- 2009\n  - _Porting the UEA-Lite Stemmer to Ruby_ by [Jason Adams](https:\u002F\u002Ftwitter.com\u002Fealdent)\n    \u003Csup>[[post](https:\u002F\u002Fealdent.wordpress.com\u002F2009\u002F07\u002F16\u002Fporting-the-uea-lite-stemmer-to-ruby\u002F)]\u003C\u002Fsup>\n  - _NLP Resources for Ruby_ by [Jason Adams](https:\u002F\u002Ftwitter.com\u002Fealdent)\n    \u003Csup>[[post](https:\u002F\u002Fealdent.wordpress.com\u002F2009\u002F09\u002F13\u002Fnlp-resources-for-ruby\u002F)]\u003C\u002Fsup>\n- 2008\n  - _Support Vector Machines (SVM) in Ruby_ by [Ilya Grigorik](https:\u002F\u002Ftwitter.com\u002Figrigorik)\n    \u003Csup>[[post](https:\u002F\u002Fwww.igvita.com\u002F2008\u002F01\u002F07\u002Fsupport-vector-machines-svm-in-ruby\u002F)]\u003C\u002Fsup>\n  - _Practical text classification with Ruby_ by [Gleicon Moraes](https:\u002F\u002Ftwitter.com\u002Fgleicon)\n    \u003Csup>[[post](https:\u002F\u002Fzenmachine.wordpress.com\u002Fpractical-text-classification-with-ruby\u002F) |\n    [code](https:\u002F\u002Fgithub.com\u002Fgleicon\u002Fzenmachine)]\u003C\u002Fsup>\n- 2007\n  - _Decision Tree Learning in Ruby_ by [Ilya Grigorik](https:\u002F\u002Ftwitter.com\u002Figrigorik)\n    \u003Csup>[[post](https:\u002F\u002Fwww.igvita.com\u002F2007\u002F04\u002F16\u002Fdecision-tree-learning-in-ruby\u002F)]\u003C\u002Fsup>\n- 2006\n  - _Speak My Language: Natural Language Processing With Ruby_ by [Michael Granger](https:\u002F\u002Fdeveiate.org\u002Fresume.html)\n    \u003Csup>[[slides](https:\u002F\u002Fdeveiate.org\u002Fmisc\u002FSpeak-My-Language.pdf) |\n          [write-up](http:\u002F\u002Fblog.nicksieger.com\u002Farticles\u002F2006\u002F10\u002F22\u002Frubyconf-natural-language-generation-and-processing-in-ruby\u002F) |\n          [write-up](http:\u002F\u002Fjuixe.com\u002Fpapers\u002FRubyConf2006.pdf)]\u003C\u002Fsup>\n\n## Projects and Code Examples\n\n- [Going the Distance](https:\u002F\u002Fgithub.com\u002Fschneems\u002Fgoing_the_distance) -\n  Implementations of various distance algorithms with example calculations.\n- [Named entity recognition with Stanford NER and Ruby](https:\u002F\u002Fgithub.com\u002Fmblongii\u002Fruby-ner) -\n  NER Examples in Ruby and Java with some [explanations](https:\u002F\u002Fweb.archive.org\u002Fweb\u002F20120722225402\u002Fhttp:\u002F\u002Fmblongii.com\u002F2012\u002F04\u002F15\u002Fnamed-entity-recognition-with-stanford-ner-and-ruby\u002F).\n- [Words Counted](http:\u002F\u002Frubywordcount.com\u002F) -\n  examples of customizable word statistics powered by\n  [words_counted](https:\u002F\u002Fgithub.com\u002Fabitdodgy\u002Fwords_counted).\n- [RSyntaxTree](https:\u002F\u002Fyohasebe.com\u002Frsyntaxtree\u002F) -\n  Web based demonstration of the syntactic tree visualization.\n\n## Books\n\n-  [Miller, Rob](https:\u002F\u002Ftwitter.com\u002Frobmil\u002F).\n   _Text Processing with Ruby: Extract Value from the Data That Surrounds You._\n   Pragmatic Programmers, 2015.\n   \u003Csup>[[link](https:\u002F\u002Fwww.amazon.com\u002FText-Processing-Ruby-Extract-Surrounds\u002Fdp\u002F1680500708)]\u003C\u002Fsup>\n-  [Watson, Mark](https:\u002F\u002Ftwitter.com\u002Fmark_l_watson).\n   _Scripting Intelligence: Web 3.0 Information Gathering and Processing._\n   APRESS, 2010.\n   \u003Csup>[[link](https:\u002F\u002Fwww.amazon.de\u002FScripting-Intelligence-Information-Gathering-Processing\u002Fdp\u002F1430223510)]\u003C\u002Fsup>\n-  [Watson, Mark](https:\u002F\u002Ftwitter.com\u002Fmark_l_watson).\n   _Practical Semantic Web and Linked Data Applications._ Lulu, 2010.\n   \u003Csup>[[link](http:\u002F\u002Fwww.lulu.com\u002Fshop\u002Fmark-watson\u002Fpractical-semantic-web-and-linked-data-applications-java-edition\u002Fpaperback\u002Fproduct-10915016.html)]\u003C\u002Fsup>\n\n## Community\n\n- [Reddit](https:\u002F\u002Fwww.reddit.com\u002Fr\u002FLanguageTechnology\u002Fsearch?q=ruby&restrict_sr=on)\n- [Stack Overflow](https:\u002F\u002Fstackoverflow.com\u002Fsearch?q=%5Bnlp%5D+and+%5Bruby%5D)\n- [Twitter](https:\u002F\u002Ftwitter.com\u002Fsearch?q=Ruby%20NLP%20%23ruby%20OR%20%23nlproc%20OR%20%23rubynlp%20OR%20%23nlp&src=typd&lang=en)\n\n## Needs your Help!\n\nAll projects in this section are really important for the community but need\nmore attention. Please if you have spare time and dedication spend some hours\non the code here.\n\n- [ferret](https:\u002F\u002Fgithub.com\u002Fdbalmain\u002Fferret) -\n  Information Retrieval in C and Ruby.\n- [summarize](https:\u002F\u002Fgithub.com\u002Fssoper\u002Fsummarize) -\n  Ruby native wrapper for [Open Text Summarizer](https:\u002F\u002Fgithub.com\u002Fneopunisher\u002FOpen-Text-Summarizer).\n\n## Related Resources\n\n- [Neural Machine Translation Implementations](https:\u002F\u002Fgithub.com\u002Fjonsafari\u002Fnmt-list)\n- [Awesome Ruby](https:\u002F\u002Fgithub.com\u002Fmarkets\u002Fawesome-ruby#natural-language-processing) -\n  Among other awesome items a short list of NLP related projects.\n- [Ruby NLP](https:\u002F\u002Fgithub.com\u002Fdiasks2\u002Fruby-nlp) -\n  State-of-Art collection of Ruby libraries for NLP.\n- [Speech and Natural Language Processing](https:\u002F\u002Fgithub.com\u002Fedobashira\u002Fspeech-language-processing) -\n  General List of NLP related resources (mostly not for Ruby programmers).\n- [Scientific Ruby](http:\u002F\u002Fsciruby.com\u002F) -\n  Linear Algebra, Visualization and Scientific Computing for Ruby.\n- [iRuby](https:\u002F\u002Fgithub.com\u002FSciRuby\u002Firuby) - IRuby kernel for Jupyter (formelly IPython).\n- [Awesome OCR](https:\u002F\u002Fgithub.com\u002Fkba\u002Fawesome-ocr) -\n  Multitude of OCR (Optical Character Recognition) resources.\n- [Awesome TensorFlow](https:\u002F\u002Fgithub.com\u002Fjtoy\u002Fawesome-tensorflow) -\n  Machine Learning with TensorFlow libraries.\n- \u003Ca name=\"imagemagic\">\u003C\u002Fa>\n  [ImageMagick](https:\u002F\u002Fimagemagick.org\u002Findex.php)\n\n## License\n\n[![Creative Commons Zero 1.0](http:\u002F\u002Fmirrors.creativecommons.org\u002Fpresskit\u002Fbuttons\u002F80x15\u002Fsvg\u002Fcc-zero.svg)](https:\u002F\u002Fcreativecommons.org\u002Fpublicdomain\u002Fzero\u002F1.0\u002F) `Awesome NLP with Ruby` by [Andrei Beliankou](https:\u002F\u002Fgithub.com\u002Farbox) and\n[Contributors](https:\u002F\u002Fgithub.com\u002Farbox\u002Fnlp-with-ruby\u002Fgraphs\u002Fcontributors).\n\nTo the extent possible under law, the person who associated CC0 with\n`Awesome NLP with Ruby` has waived all copyright and related or neighboring rights\nto `Awesome NLP with Ruby`.\n\nYou should have received a copy of the CC0 legalcode along with this\nwork. If not, see \u003Chttps:\u002F\u002Fcreativecommons.org\u002Fpublicdomain\u002Fzero\u002F1.0\u002F>.\n\n\u003C!--- Links --->\n[ruby]: https:\u002F\u002Fwww.ruby-lang.org\u002Fen\u002F\n[motivation]: https:\u002F\u002Fgithub.com\u002Farbox\u002Fnlp-with-ruby\u002Fblob\u002Fmaster\u002Fmotivation.md\n[faq]: https:\u002F\u002Fgithub.com\u002Farbox\u002Fnlp-with-ruby\u002Fblob\u002Fmaster\u002FFAQ.md\n[ds-with-ruby]: https:\u002F\u002Fgithub.com\u002Farbox\u002Fdata-science-with-ruby\n[ml-with-ruby]: https:\u002F\u002Fgithub.com\u002Farbox\u002Fmachine-learning-with-ruby\n[change-pr]: https:\u002F\u002Fgithub.com\u002FRichardLitt\u002Fknowledge\u002Fblob\u002Fmaster\u002Fgithub\u002Famending-a-commit-guide.md\n","\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Farbox_nlp-with-ruby_readme_4c8f03dae674.png\" align=\"center\">\n\n[![Awesome](https:\u002F\u002Fawesome.re\u002Fbadge-flat.svg)](https:\u002F\u002Fgithub.com\u002Fsindresorhus\u002Fawesome#readme) [![Support Me](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002F%F0%9F%92%97-Support%20Me-blue.svg?style=flat-square)](https:\u002F\u002Fwww.patreon.com\u002Farbox)\n\n[[RubyML](https:\u002F\u002Fgithub.com\u002Farbox\u002Fmachine-learning-with-ruby) |\n [RubyDataScience](https:\u002F\u002Fgithub.com\u002Farbox\u002Fdata-science-with-ruby) |\n [RubyInterop](https:\u002F\u002Fgithub.com\u002Farbox\u002Fruby-interoperability)]\n\n\n# 用 Ruby 实现的优秀自然语言处理项目 [\u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Farbox_nlp-with-ruby_readme_3f3894ee2234.jpg\" align=\"left\" width=\"30px\" height=\"30px\" \u002F>][ruby]\n\n> 用于在 Ruby 中进行文本处理的实用资源\n\n这份精心整理的清单包含了关于使用 [Ruby 编程语言](ruby)对人类语言文本进行计算处理的 _awesome_（精彩）资源、库和信息来源。该领域通常被称为 **自然语言处理**（NLP）、**计算语言学**、**人机语言技术**（HLT），并且常常与 **人工智能**、**机器学习**、**信息检索**、**文本挖掘**、**知识抽取** 等相关学科相结合。\n\n这份清单源自我们在语言模型和 NLP 工具方面的日常工作。阅读 [为什么](motivation.md) 这份清单如此出色。我们的 [常见问题解答](FAQ.md) 描述了重要的决策以及您可能感兴趣的一些有用答案。\n\n:sparkles: 欢迎每一份 [贡献](#contributing)! 您可以通过拉取请求添加链接，或者创建一个议题来发起讨论。\n\n请关注我们的 [Twitter](https:\u002F\u002Ftwitter.com\u002FNonWebRuby)，并使用 `#RubyNLP` 标签分享此列表！\n\n\u003C!-- nodoc -->\n## 目录\n\n\u003C!-- toc -->\n\n- [:sparkles: 教程](#sparkles-tutorials)\n- [NLP 流水线子任务](#nlp-pipeline-subtasks)\n  * [流水线生成](#pipeline-generation)\n  * [多功能引擎](#multipurpose-engines)\n    + [在线 API](#on-line-apis)\n  * [语言识别](#language-identification)\n  * [分词](#segmentation)\n  * [词汇处理](#lexical-processing)\n    + [词干提取](#stemming)\n    + [词形还原](#lemmatization)\n    + [词汇统计：类型与标记计数](#lexical-statistics-counting-types-and-tokens)\n    + [停用词过滤](#filtering-stop-words)\n  * [短语级处理](#phrasal-level-processing)\n  * [句法分析](#syntactic-processing)\n    + [依存句法分析](#constituency-parsing)\n  * [语义分析](#semantic-analysis)\n  * [语用分析](#pragmatical-analysis)\n- [高级任务](#high-level-tasks)\n  * [拼写与错误纠正](#spelling-and-error-correction)\n  * [文本对齐](#text-alignment)\n  * [机器翻译](#machine-translation)\n  * [情感分析](#sentiment-analysis)\n  * [数字、日期和时间解析](#numbers-dates-and-time-parsing)\n  * [命名实体识别](#named-entity-recognition)\n  * [文本转语音再转文本](#text-to-speech-to-text)\n- [对话代理、助手和聊天机器人](#dialog-agents-assistants-and-chatbots)\n- [语言资源](#linguistic-resources)\n- [机器学习库](#machine-learning-libraries)\n- [数据可视化](#data-visualization)\n- [光学字符识别](#optical-character-recognition)\n- [文本提取](#text-extraction)\n- [全文搜索、信息检索、索引](#full-text-search-information-retrieval-indexing)\n- [语言感知的字符串操作](#language-aware-string-manipulation)\n- [文章、帖子、演讲和演示文稿](#articles-posts-talks-and-presentations)\n- [项目和代码示例](#projects-and-code-examples)\n- [书籍](#books)\n- [社区](#community)\n- [需要您的帮助！](#needs-your-help)\n- [相关资源](#related-resources)\n- [许可证](#license)\n\n\u003C!-- tocstop -->\n\n\u003C!-- doc -->\n\n## :sparkles: 教程\n\n请帮助我们完善这一部分！ :smiley:\n\n## NLP 流水线子任务\n\nNLP 流水线从一段纯文本开始。\n\n### 流水线生成\n\n- [composable_operations](https:\u002F\u002Fgithub.com\u002Ft6d\u002Fcomposable_operations) -\n  用于定义操作流水线的框架。\n- [ruby-spark](https:\u002F\u002Fgithub.com\u002Fondra-m\u002Fruby-spark) -\n  Spark 绑定，提供易于理解的 DSL。\n- [phobos](https:\u002F\u002Fgithub.com\u002Fphobos\u002Fphobos) -\n  Apache Kafka 的简化 Ruby 客户端。\n- [parallel](https:\u002F\u002Fgithub.com\u002Fgrosser\u002Fparallel) -\n  多 CPU 或多线程并行执行的管理工具。\n- [pwrake](https:\u002F\u002Fgithub.com\u002Fmasa16\u002Fpwrake) -\n  Rake 扩展，用于并行运行本地和远程任务。\n\n### 多功能引擎\n\n- [open-nlp](https:\u002F\u002Fgithub.com\u002Flouismullie\u002Fopen-nlp) -\n  OpenNLP 工具包的 Ruby 绑定。\n- [stanford-core-nlp](https:\u002F\u002Fgithub.com\u002Flouismullie\u002Fstanford-core-nlp) -\n  斯坦福 CoreNLP 工具的 Ruby 绑定。\n- [treat](https:\u002F\u002Fgithub.com\u002Flouismullie\u002Ftreat) -\n  Ruby 的自然语言处理框架（类似于 Python 的 NLTK）。\n- [nlp_toolz](https:\u002F\u002Fgithub.com\u002FLeFnord\u002Fnlp_toolz) -\n  对 OpenNLP 部分类及原始 Berkeley Parser 的封装。\n- [open_nlp](https:\u002F\u002Fgithub.com\u002Fhck\u002Fopen_nlp) -\n  JRuby 版本的 OpenNLP 工具包绑定。\n- [ruby-spacy](https:\u002F\u002Fgithub.com\u002Fyohasebe\u002Fruby-spacy) &mdash;\n  通过 PyCall 封装 spaCy NLP 库的 Ruby 模块。\n\n#### 在线 API\n\n- [alchemyapi_ruby](https:\u002F\u002Fgithub.com\u002Falchemyapi\u002Falchemyapi_ruby) -\n  AlchemyAPI\u002FBluemix 的旧版 Ruby SDK。\n- [wit-ruby](https:\u002F\u002Fgithub.com\u002Fwit-ai\u002Fwit-ruby) -\n  Wit.ai 语言理解平台的 Ruby 客户端库。\n- [wlapi](https:\u002F\u002Fgithub.com\u002Farbox\u002Fwlapi) - Wortschatz Leipzig\n  网站服务的 Ruby 客户端库。\n- [monkeylearn-ruby](https:\u002F\u002Fgithub.com\u002Fmonkeylearn\u002Fmonkeylearn-ruby) - 通过基于 Ruby 的 Web API 客户端实现情感分析、主题建模、语言检测和命名实体识别。\n- [google-cloud-language](https:\u002F\u002Fgithub.com\u002Fgoogleapis\u002Fgoogle-cloud-ruby\u002Ftree\u002Fmaster\u002Fgoogle-cloud-language) -\n  Google 的自然语言处理服务 API for Ruby。\n\n### 语言识别\n\n语言识别是每个 NLP 流水线中的关键第一步之一。\n\n- [scylla](https:\u002F\u002Fgithub.com\u002Fhashwin\u002Fscylla) -\n  语言分类与识别。\n\n### 分词\n\n用于分词、词语和句子边界检测及歧义消解的工具。\n\n- [tokenizer](https:\u002F\u002Fgithub.com\u002Farbox\u002Ftokenizer) -\n  简单的多语言分词器。\n  \u003Csup>[[教程](tutorials\u002Ftokenizer.md)]\u003C\u002Fsup>\n- [pragmatic_tokenizer](https:\u002F\u002Fgithub.com\u002Fdiasks2\u002Fpragmatic_tokenizer) -\n  多语言分词器，用于将字符串拆分为标记。\n- [nlp-pure](https:\u002F\u002Fgithub.com\u002Fparhamr\u002Fnlp-pure) -\n  用纯 Ruby 实现的自然语言处理算法，依赖极少。\n- [textoken](https:\u002F\u002Fgithub.com\u002Fmanorie\u002Ftextoken) -\n  简单且可定制的文本分词库。\n- [pragmatic_segmenter](https:\u002F\u002Fgithub.com\u002Fdiasks2\u002Fpragmatic_segmenter) -\n  带有多种配置选项的词语边界歧义消解工具。\n- [punkt-segmenter](https:\u002F\u002Fgithub.com\u002Flfcipriani\u002Fpunkt-segmenter) -\n  Punkt 分段器的纯 Ruby 实现。\n- [tactful_tokenizer](https:\u002F\u002Fgithub.com\u002Fzencephalon\u002FTactful_Tokenizer) -\n  基于正则表达式的多语言分词器。\n- [scapel](https:\u002F\u002Fgithub.com\u002Flouismullie\u002Fscalpel) -\n  句子边界歧义消解工具。\n\n### 词汇处理\n\n#### 词干提取\n\n在信息检索中，“词干提取”是指将单词的不同形式归约为某种基本形式的过程。词干提取应与[词形还原](#lemmatization)区分开来，因为“词干”并不一定具有语言学上的合理性。\n\n- [ruby-stemmer](https:\u002F\u002Fgithub.com\u002Faurelian\u002Fruby-stemmer) -\n  Ruby-Stemmer 将 SnowBall API 暴露给 Ruby。\n- [uea-stemmer](https:\u002F\u002Fgithub.com\u002Fealdent\u002Fuea-stemmer) -\n  用于搜索和索引的保守型词干提取器。\n\n#### 词形还原\n\n词形还原被认为是寻找单词基本形式的过程。词形通常收录在词典中。\n\n- [lemmatizer](https:\u002F\u002Fgithub.com\u002Fyohasebe\u002Flemmatizer) -\n  基于 WordNet 的英语文本词形还原工具。\n\n#### 词汇统计：类型与标记计数\n\n- [wc](https:\u002F\u002Fgithub.com\u002Fthesp0nge\u002Fwc) -\n  用于统计文本中单词出现次数的工具。\n- [word_count](https:\u002F\u002Fgithub.com\u002FAtelierConvivialite\u002Fword_count) -\n  用于 `String` 和 `Hash` 对象的单词计数工具。\n- [words_counted](https:\u002F\u002Fgithub.com\u002Fabitdodgy\u002Fwords_counted) -\n  纯 Ruby 库，可根据不同自定义选项统计单词相关数据。\n\n#### 停用词过滤\n\n- [stopwords-filter](https:\u002F\u002Fgithub.com\u002Fbrenes\u002Fstopwords-filter) - \n  基于 SnowBall 词形还原器的停用词过滤器及停用词词典。\n\n### 短语级处理\n\n- [n_gram](https:\u002F\u002Fgithub.com\u002Freddavis\u002FN-Gram) -\n  N 元组生成器。\n- [ruby-ngram](https:\u002F\u002Fgithub.com\u002Ftkellen\u002Fruby-ngram) -\n  将单词和短语分解为 n 元组。\n- [raingrams](https:\u002F\u002Fgithub.com\u002Fpostmodern\u002Fraingrams) -\n  用纯 Ruby 编写的灵活通用的 n 元组库。\n\n### 句法处理\n\n#### 句法树分析\n\n- [stanfordparser](https:\u002F\u002Frubygems.org\u002Fgems\u002Fstanfordparser) -\n  基于 Stanford Parser 的 Ruby 封装库。\n- [rley](https:\u002F\u002Fgithub.com\u002Ffamished-tiger\u002FRley) -\n  纯 Ruby 实现的基于 [Earley](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FEarley_parser) 算法的上下文无关句法树分析器。\n- [rsyntaxtree](https:\u002F\u002Fgithub.com\u002Fyohasebe\u002Frsyntaxtree) -\n  基于 [RMagick](https:\u002F\u002Fgithub.com\u002Frmagick\u002Frmagick) 的 Ruby 句法树可视化工具。\n  \u003Csup>[依赖: [ImageMagick](#imagemagick)]\u003C\u002Fsup>\n\n### 语义分析\n\n- [amatch](https:\u002F\u002Fgithub.com\u002Fflori\u002Famatch) -\n  提供五种字符串之间的距离度量方法（包括 Levenshtein、Sellers、Jaro-Winkler 以及“成对距离”）。\n- [damerau-levenshtein](https:\u002F\u002Fgithub.com\u002FGlobalNamesArchitecture\u002Fdamerau-levenshtein) -\n  使用 Damerau-Levenshtein 算法计算编辑距离。\n- [hotwater](https:\u002F\u002Fgithub.com\u002Fcolinsurprenant\u002Fhotwater) -\n  快速的 Ruby FFI 字符串编辑距离算法。\n- [levenshtein-ffi](https:\u002F\u002Fgithub.com\u002Fdbalatero\u002Flevenshtein-ffi) -\n  使用 Damerau-Levenshtein 算法进行快速的字符串编辑距离计算。\n- [tf_idf](https:\u002F\u002Fgithub.com\u002Freddavis\u002FTF-IDF) -\n  纯 Ruby 实现的词频\u002F逆文档频率计算。\n- [tf-idf-similarity](https:\u002F\u002Fgithub.com\u002Fjpmckinney\u002Ftf-idf-similarity) -\n  使用 TF\u002FIDF 计算文本之间的相似度。\n\n### 语用分析\n- [SentimentLib](https:\u002F\u002Fgithub.com\u002Fnzaillian\u002Fsentiment_lib) -\n  简单且可扩展的情感分析 gem。\n\n## 高层次任务\n\n### 拼写与错误纠正\n\n- [gingerice](https:\u002F\u002Fgithub.com\u002Fsubosito\u002Fgingerice) -\n  通过 [Ginger](https:\u002F\u002Fwww.gingersoftware.com\u002F) API 进行拼写和语法纠正。\n- [hunspell-i18n](https:\u002F\u002Fgithub.com\u002Fromanbsd\u002Fhunspell) -\n  Ruby 绑定到标准 [Hunspell](https:\u002F\u002Fhunspell.github.io\u002F) 拼写检查器。\n- [ffi-hunspell](https:\u002F\u002Fgithub.com\u002Fpostmodern\u002Fffi-hunspell) -\n  基于 FFI 的 Ruby 绑定，用于 [Hunspell](https:\u002F\u002Fhunspell.github.io\u002F)。\n- [hunspell](https:\u002F\u002Fgithub.com\u002Fsegabor\u002FHunspell) -\n  通过 Ruby C API 绑定到 [Hunspell](https:\u002F\u002Fhunspell.github.io\u002F)。\n\n### 文本对齐\n\n- [alignment](https:\u002F\u002Fgithub.com\u002Fpovilasjurcys\u002Falignment) -\n  用于双语文本的对齐程序（基于 Gale-Church 实现）。\n\n### 机器翻译\n\n- [google-api-client](https:\u002F\u002Fgithub.com\u002Fgoogleapis\u002Fgoogle-api-ruby-client) -\n  Google API 的 Ruby 客户端。\n- [microsoft_translator](https:\u002F\u002Fgithub.com\u002Fikayzo\u002Fmicrosoft_translator) -\n  微软翻译 API 的 Ruby 客户端。\n- [termit](https:\u002F\u002Fgithub.com\u002Fpawurb\u002Ftermit) -\n  在终端中使用语音合成功能的 Google 翻译。\n- [zipf](https:\u002F\u002Fgithub.com\u002Fpks\u002Fzipf) -\n  BLEU 及其他基础算法的实现。\n\n### 情感分析\n\n- [stimmung](https:\u002F\u002Fgithub.com\u002Fpachacamac\u002Fstimmung) -\n  基于 [SentiWS](http:\u002F\u002Fwortschatz.uni-leipzig.de\u002Fen\u002Fdownload) 词典的语义极性分析工具。\n\n### 数字、日期和时间解析\n\n- [chronic](https:\u002F\u002Fgithub.com\u002Fmojombo\u002Fchronic) -\n  纯 Ruby 自然语言日期解析器。\n- [chronic_between](https:\u002F\u002Fgithub.com\u002Fjrobertson\u002Fchronic_between) -\n  简单的 Ruby 自然语言日期和时间范围解析器。\n- [chronic_duration](https:\u002F\u002Fgithub.com\u002Fhenrypoydar\u002Fchronic_duration) -\n  纯 Ruby 的经过时间解析器。\n- [kronic](https:\u002F\u002Fgithub.com\u002Fxaviershay\u002Fkronic) -\n  用于解析和格式化人类可读日期的方法。\n- [nickel](https:\u002F\u002Fgithub.com\u002Fiainbeeston\u002Fnickel) -\n  从自然语言文本中提取日期、时间和消息信息。\n- [tickle](https:\u002F\u002Fgithub.com\u002Fyb66\u002Ftickle) -\n  用于解析周期性和重复性事件的工具。\n- [numerizer](https:\u002F\u002Fgithub.com\u002Fjduff\u002Fnumerizer) -\n  英语数字表达式的 Ruby 解析器。\n\n### 命名实体识别\n\n- [ruby-ner](https:\u002F\u002Fgithub.com\u002Fmblongii\u002Fruby-ner) -\n  结合 Stanford NER 和 Ruby 的命名实体识别工具。\n- [ruby-nlp](https:\u002F\u002Fgithub.com\u002Ftiendung\u002Fruby-nlp) -\n  Stanford POS 标注器和命名实体识别器的 Ruby 绑定。\n\n### 文本转语音转文本\n\n- [espeak-ruby](https:\u002F\u002Fgithub.com\u002Fdejan\u002Fespeak-ruby) -\n  一个小型 Ruby API，用于利用 `espeak` 和 `lame` 创建文本转语音的 MP3 文件。\n- [tts](https:\u002F\u002Fgithub.com\u002Fc2h2\u002Ftts) -\n  使用 Google 翻译服务进行文本到语音的转换。\n- [att_speech](https:\u002F\u002Fgithub.com\u002Fadhearsion\u002Fatt_speech) -\n  基于 AT&T 语音 API 的 Ruby 封装，用于语音转文本。\n- [pocketsphinx-ruby](https:\u002F\u002Fgithub.com\u002Fwatsonbox\u002Fpocketsphinx-ruby) -\n  Pocketsphinx 的绑定。\n\n## 对话代理、助手与聊天机器人\n\n- [chatterbot](https:\u002F\u002Fgithub.com\u002Fmuffinista\u002Fchatterbot) -\n  一个基于 OAuth 认证的简单 Ruby Twitter 机器人框架。\n- [lita](https:\u002F\u002Fgithub.com\u002Flitaio\u002Flita) -\n  一个高度可扩展的聊天运营机器人框架，使用 [Redis](https:\u002F\u002Fredis.io\u002F) 进行持久化存储。\n\n## 语言学资源\n\n- [rwordnet](https:\u002F\u002Fgithub.com\u002Fdoches\u002Frwordnet) -\n  一个纯 Ruby 的自包含 API 库，用于访问 [普林斯顿 WordNet®](https:\u002F\u002Fwordnet.princeton.edu\u002F)。\n- [wordnet](https:\u002F\u002Fgithub.com\u002Fged\u002Fruby-wordnet\u002Fblob\u002Fmaster\u002FREADME.rdoc) -\n  针对 [普林斯顿 WordNet®](https:\u002F\u002Fwordnet.princeton.edu\u002F) 的高性能绑定。\n\n## 机器学习库\n\n纯 Ruby 实现或通过适当绑定在其他编程语言中实现，并为 Ruby 提供接口的 [机器学习](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FMachine_learning) 算法。\n\n如需更详细的列表，请参阅 [Awesome ML with Ruby][ml-with-ruby] 列表。\n\n- [rb-libsvm](https:\u002F\u002Fgithub.com\u002Ffebeling\u002Frb-libsvm) -\n  使用 Ruby 实现的支持向量机。\n- [weka](https:\u002F\u002Fgithub.com\u002Fpaulgoetze\u002Fweka-jruby) -\n  Weka 的 JRuby 绑定，通过 Weka 实现多种机器学习算法。\n- [decisiontree](https:\u002F\u002Fgithub.com\u002Figrigorik\u002Fdecisiontree) -\n  纯 Ruby 实现的决策树 ID3 算法\n  \u003Csup>[[文章](https:\u002F\u002Fwww.igvita.com\u002F2007\u002F04\u002F16\u002Fdecision-tree-learning-in-ruby\u002F)]\u003C\u002Fsup>。\n- [rtimbl](https:\u002F\u002Fgithub.com\u002Fmaspwr\u002Frtimbl) -\n  基于 Timbl 框架的内存型学习器。\n- [classifier-reborn](https:\u002F\u002Fgithub.com\u002Fjekyll\u002Fclassifier-reborn) -\n  通用分类器模块，支持贝叶斯和其他类型的分类。\n- [lda-ruby](https:\u002F\u002Fgithub.com\u002Fealdent\u002Flda-ruby) -\n  Ruby 实现的 [LDA](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FLatent_Dirichlet_allocation)\n  （潜在狄利克雷分配），用于自动主题建模和文档聚类。\n- [liblinear-ruby-swig](https:\u002F\u002Fgithub.com\u002Ftomz\u002Fliblinear-ruby-swig) -\n  LIBLINEAR 的 Ruby 接口（在文本分类任务中比 LIBSVM 更高效）。\n- [linnaeus](https:\u002F\u002Fgithub.com\u002Fdjcp\u002Flinnaeus) -\n  基于 Redis 的贝叶斯分类器。\n- [maxent_string_classifier](https:\u002F\u002Fgithub.com\u002Fmccraigmccraig\u002Fmaxent_string_classifier) -\n  JRuby 最大熵分类器，适用于字符串数据，基于 OpenNLP Maxent 框架。\n- [naive_bayes](https:\u002F\u002Fgithub.com\u002Freddavis\u002FNaive-Bayes) -\n  简单的朴素贝叶斯分类器。\n- [nbayes](https:\u002F\u002Fgithub.com\u002Foasic\u002Fnbayes) -\n  功能齐全的 Ruby 朴素贝叶斯实现。\n- [omnicat](https:\u002F\u002Fgithub.com\u002Fmustafaturan\u002Fomnicat) -\n  通用的 Rack 框架，用于文本分类。\n- [omnicat-bayes](https:\u002F\u002Fgithub.com\u002Fmustafaturan\u002Fomnicat-bayes) -\n  作为 OmniCat 分类策略的朴素贝叶斯文本分类实现。\n- [ruby-fann](https:\u002F\u002Fgithub.com\u002Ftangledpath\u002Fruby-fann) -\n  [快速人工神经网络库 (FANN)](http:\u002F\u002Fleenissen.dk\u002Ffann\u002Fwp\u002F) 的 Ruby 绑定。\n- [rblearn](https:\u002F\u002Fgithub.com\u002Fhimkt\u002Frblearn) - 特征提取和交叉验证库。\n\n## 数据可视化\n\n请参考 [Data Science with Ruby][ds-with-ruby] 列表中的 [数据可视化](https:\u002F\u002Fgithub.com\u002Farbox\u002Fdata-science-with-ruby#visualization) 部分。\n\n## 光学字符识别\n\n* [tesseract-ocr](https:\u002F\u002Fgithub.com\u002Fmeh\u002Fruby-tesseract-ocr) -\n  基于 FFI 的封装，用于 [Tesseract OCR 引擎](https:\u002F\u002Fgithub.com\u002Ftesseract-ocr\u002Ftesseract)。\n\n## 文本提取\n\n- [yomu](https:\u002F\u002Fgithub.com\u002Fyomurb\u002Fyomu) -\n  使用 [Apache Tika](https:\u002F\u002Ftika.apache.org\u002F) 内容分析工具包，从文件和文档中提取文本和元数据的库。\n\n## 全文检索、信息检索、索引\n\n- [rsolr](https:\u002F\u002Fgithub.com\u002Frsolr\u002Frsolr) -\n  Apache Solr 的 Ruby 和 Rails 客户端库。\n- [sunspot](https:\u002F\u002Fgithub.com\u002Fsunspot\u002Fsunspot) -\n  以 Rails 为中心的 Apache Solr 客户端。\n- [thinking-sphinx](https:\u002F\u002Fgithub.com\u002Fpat\u002Fthinking-sphinx) -\n  一个 [Active Record](https:\u002F\u002Fguides.rubyonrails.org\u002Factive_record_basics.html) 插件，用于在（不仅限于）Rails 项目中使用 [Sphinx](http:\u002F\u002Fsphinxsearch.com\u002F)。\n- [elasticsearch](https:\u002F\u002Fgithub.com\u002Felastic\u002Felasticsearch-ruby\u002Ftree\u002Fmaster\u002Felasticsearch) -\n  Elasticsearch 的 Ruby 客户端和 API。\n- [elasticsearch-rails](https:\u002F\u002Fgithub.com\u002Felastic\u002Felasticsearch-rails) -\n  Elasticsearch 在 Ruby 和 Rails 中的集成。\n- [google-api-client](https:\u002F\u002Fgithub.com\u002Fgoogleapis\u002Fgoogle-api-ruby-client) -\n  Google 服务的 Ruby API 库。\n\n## 语言感知的字符串操作\n\n用于语言感知的字符串操作的库，例如搜索、模式匹配、大小写转换、转码以及需要了解底层语言信息的正则表达式。\n\n- [fuzzy_match](https:\u002F\u002Fgithub.com\u002Fseamusabshere\u002Ffuzzy_match) -\n  带有距离度量和正则表达式的模糊字符串比较。\n- [fuzzy-string-match](https:\u002F\u002Fgithub.com\u002Fkiyoka\u002Ffuzzy-string-match) -\n  用于 Ruby 的模糊字符串匹配库。\n- [active_support](https:\u002F\u002Fgithub.com\u002Frails\u002Frails\u002Ftree\u002Fmaster\u002Factivesupport\u002Flib\u002Factive_support) -\n  RoR 的 `ActiveSupport` gem 包含多种可以处理大小写的字符串扩展。\n- [fuzzy_tools](https:\u002F\u002Fgithub.com\u002Fbrianhempel\u002Ffuzzy_tools) -\n  一套针对 Ruby 的模糊搜索工具集，注重准确性。\n- [u](http:\u002F\u002Fdisu.se\u002Fsoftware\u002Fu-1.0\u002F) -\n  U 扩展了 Ruby 的 Unicode 支持。\n- [unicode](https:\u002F\u002Fgithub.com\u002Fblackwinter\u002Funicode) -\n  Unicode 规范化库。\n- [CommonRegexRuby](https:\u002F\u002Fgithub.com\u002Ftalyssonoc\u002FCommonRegexRuby) -\n  在字符串中查找多种常见信息。\n- [regexp-examples](https:\u002F\u002Fgithub.com\u002Ftom-lord\u002Fregexp-examples) -\n  生成符合给定正则表达式的字符串。\n- [verbal_expressions](https:\u002F\u002Fgithub.com\u002Fryan-endacott\u002Fverbal_expressions) -\n  将复杂的正则表达式变得简单易懂。\n- [translit_kit](https:\u002F\u002Fgithub.com\u002FAnalyzePlatypus\u002FTranslitKit) -\n  将希伯来语和意第绪语文本转写为拉丁字母。\n- [re2](https:\u002F\u002Fgithub.com\u002Fmudge\u002Fre2) -\n  高速正则表达式库，用于文本挖掘和文本提取。\n- [regex_sample](https:\u002F\u002Fgithub.com\u002Fmochizukikotaro\u002Fregex_sample) -\n  根据给定的正则表达式生成示例字符串。\n- [iuliia](https:\u002F\u002Fgithub.com\u002Fadnikiforov\u002Fiuliia-rb) &mdash;\n  多种方式将西里尔字母转写为拉丁字母（由 [参考实现](https:\u002F\u002Fgithub.com\u002Fnalgeon\u002Fiuliia) 定义）。\n\n## 文章、帖子、讲座和报告\n\n- 2019\n  - _使用 Ruby 从图像中提取文本_，作者：[aonemd](https:\u002F\u002Ftwitter.com\u002Faonemd)\n    \u003Csup>[[文章](https:\u002F\u002Faonemd.github.io\u002Fblog\u002Fextracting-text-from-images-using-ruby) |\n    [代码](https:\u002F\u002Fgist.github.com\u002Faonemd\u002F7bb3c4760d9e47a9ce8e270198cb40a0)]\u003C\u002Fsup>\n- 2018\n  - _自然语言处理与推文情感分析_，作者：[Cassandra Corrales](https:\u002F\u002Ftwitter.com\u002Fcasita305)\n    \u003Csup>[[文章](https:\u002F\u002Fmedium.com\u002F@cmcorrales3\u002Fnatural-language-processing-and-tweet-sentiment-analysis-fa1edbb5ddd5)]\u003C\u002Fsup>\n- 2017\n  - _Google NLP API 与 Ruby 的结合_，作者：[Aja Hammerly](https:\u002F\u002Ftwitter.com\u002Fthe_thagomizer)\n    \u003Csup>[[文章](http:\u002F\u002Fwww.thagomizer.com\u002Fblog\u002F2017\u002F04\u002F13\u002Fthe-google-nlp-api-meets-ruby.html)]\u003C\u002Fsup>\n  - _语法并非一切：面向 Ruby 开发者的自然语言处理_，作者：[Aja Hammerly](https:\u002F\u002Ftwitter.com\u002Fthe_thagomizer)\n    \u003Csup>[[幻灯片](http:\u002F\u002Fwww.thagomizer.com\u002Ffiles\u002FNLP_RailsConf2017.pdf)]\u003C\u002Fsup>\n  - _JRuby 上的科学计算_，作者：[Prasun Anand](https:\u002F\u002Ftwitter.com\u002Fprasun_anand)\n    \u003Csup>[[幻灯片](https:\u002F\u002Fwww.slideshare.net\u002FPrasunAnand2\u002Ffosdem2017-scientific-computing-on-jruby) |\n    [视频](https:\u002F\u002Fftp.fau.de\u002Ffosdem\u002F2017\u002FK.4.201\u002Fruby_scientific_computing_on_jruby.mp4) |\n    [幻灯片](https:\u002F\u002Fwww.slideshare.net\u002FPrasunAnand2\u002Fscientific-computing-on-jruby) |\n    [幻灯片](https:\u002F\u002Fwww.slideshare.net\u002FPrasunAnand2\u002Fscientific-computation-on-jruby)]\u003C\u002Fsup>\n  - _Ruby 中的 Unicode 规范化_，作者：[Starr Horne](https:\u002F\u002Ftwitter.com\u002Fstarrhorne)\n    \u003Csup>[[文章](https:\u002F\u002Fblog.honeybadger.io\u002Fruby_unicode_normalization\u002F)]\u003C\u002Fsup>\n- 2016\n  - _用 Ruby 快速创建 Telegram 机器人_，作者：[Ardian Haxha](https:\u002F\u002Ftwitter.com\u002FArdianHaxha)\n    \u003Csup>[[教程](https:\u002F\u002Fwww.sitepoint.com\u002Fquickly-create-a-telegram-bot-in-ruby\u002F)]\u003C\u002Fsup>\n  - _深度学习：面向 Ruby 开发者的入门_，作者：[Geoffrey Litt](https:\u002F\u002Ftwitter.com\u002Fgeoffreylitt)\n    \u003Csup>[[幻灯片](https:\u002F\u002Fspeakerdeck.com\u002Fgeoffreylitt\u002Fdeep-learning-an-introduction-for-ruby-developers)]\u003C\u002Fsup>\n  - _我是如何让纯 Ruby 实现的 word2vec 程序速度提升 3 倍以上_，作者：[Kei Sawada](https:\u002F\u002Ftwitter.com\u002Fremore)\n    \u003Csup>[[幻灯片](https:\u002F\u002Fspeakerdeck.com\u002Fremore\u002Fhow-i-made-a-pure-ruby-word2vec-program-more-than-3x-faster)]\u003C\u002Fsup>\n  - _非常感谢，Roboto 先生：使用 Ruby 进行机器学习_，作者：[Eric Weinstein](https:\u002F\u002Ftwitter.com\u002Fericqweinstein)\n    \u003Csup>[[幻灯片](https:\u002F\u002Fspeakerdeck.com\u002Fericqweinstein\u002Fdomo-arigato-mr-roboto-machine-learning-with-ruby) | [视频](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=T1nFQ49TyeA)]\u003C\u002Fsup>\n- 2015\n  - _N 元语法分析：乐趣与收益兼得_，作者：[Jesus Castello](https:\u002F\u002Fgithub.com\u002Fmatugm)\n    \u003Csup>[[教程](https:\u002F\u002Fwww.rubyguides.com\u002F2015\u002F09\u002Fngram-analysis-ruby\u002F)]\u003C\u002Fsup>\n  - _用 Ruby 轻松实现机器学习_，作者：[Lorenzo Masini](https:\u002F\u002Fgithub.com\u002Frugginoso)\n    \u003Csup>[[教程](https:\u002F\u002Fwww.leanpanda.com\u002Fblog\u002F2015\u002F08\u002F24\u002Fmachine-learning-automatic-classification\u002F)]\u003C\u002Fsup>\n  - _利用 Ruby 机器学习寻找帕丽斯·希尔顿的名言_，作者：[Rick Carlino](https:\u002F\u002Fgithub.com\u002FRickCarlino)\n    \u003Csup>[[教程](http:\u002F\u002Fweb.archive.org\u002Fweb\u002F20160414072324\u002Fhttp:\u002F\u002Fdatamelon.io\u002Fblog\u002F2015\u002Fusing-ruby-machine-learning-id-paris-hilton-quotes.html)]\u003C\u002Fsup>\n  - _在 Ruby 中探索自然语言处理_，作者：[Kevin Dias](https:\u002F\u002Fgithub.com\u002Fdiasks2)\n    \u003Csup>[[幻灯片](https:\u002F\u002Fwww.slideshare.net\u002Fdiasks2\u002Fexploring-natural-language-processing-in-ruby)]\u003C\u002Fsup>\n  - _用 Ruby 轻松实现机器学习_，作者：[Lorenzo Masini](https:\u002F\u002Ftwitter.com\u002Frugginoso)\n    \u003Csup>[[文章](https:\u002F\u002Fwww.leanpanda.com\u002Fblog\u002F2015\u002F08\u002F24\u002Fmachine-learning-automatic-classification\u002F)]\u003C\u002Fsup>\n  - _Ruby 中的实用数据科学_，作者：Bobby Grayson\n    \u003Csup>[[幻灯片](http:\u002F\u002Fslides.com\u002Fbobbygrayson\u002Fp#\u002F)]\u003C\u002Fsup>\n- 2014\n  - _使用 Ruby 进行自然语言解析_，作者：[Glauco Custódio](https:\u002F\u002Fgithub.com\u002Fglaucocustodio)\n    \u003Csup>[[教程](http:\u002F\u002Fglaucocustodio.github.io\u002F2014\u002F11\u002F10\u002Fnatural-language-parsing-with-ruby\u002F)]\u003C\u002Fsup>\n  - _揭秘数据科学：用 Rails 和 N 元语法分析会议演讲_，作者：\n    [Todd Schneider](https:\u002F\u002Fgithub.com\u002Ftoddwschneider)\n    \u003Csup>[[视频](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=2ZDCxwB29Bg) | [代码](https:\u002F\u002Fgithub.com\u002FGenius\u002Fabstractogram)]\u003C\u002Fsup>\n  - _使用 Ruby 进行自然语言处理_，作者：[Konstantin Tennhard](https:\u002F\u002Fgithub.com\u002Ft6d)\n    \u003Csup>[[视频](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=5u86qVh8r0M) | [视频](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=oFmy_QBQ5DU) |\n    [视频](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=sPkeeWnsMn0) |\n    [幻灯片](http:\u002F\u002Feuruko2013.org\u002Fspeakers\u002Fpresentations\u002Fnatural_language_processing_with_ruby_and_opennlp-tennhard.pdf)]\u003C\u002Fsup>\n- 2013\n  - _如何解析“go”——Ruby 中的自然语言处理_，作者：\n    [Tom Cartwright](https:\u002F\u002Ftwitter.com\u002Ftomcartwrightuk)\n    \u003Csup>[[幻灯片](https:\u002F\u002Fwww.slideshare.net\u002FTomCartwright\u002Fnatual-language-processing-in-ruby) |\n    [视频](https:\u002F\u002Fskillsmatter.com\u002Fskillscasts\u002F4883-how-to-parse-go)]\u003C\u002Fsup>\n  - _Ruby 中的自然语言处理_，作者：[Brandon Black](https:\u002F\u002Ftwitter.com\u002Fbrandonmblack)\n    \u003Csup>[[幻灯片](https:\u002F\u002Fspeakerdeck.com\u002Fbrandonblack\u002Fnatural-language-processing-in-ruby) |\n    [视频](http:\u002F\u002Fconfreaks.tv\u002Fvideos\u002Frailsconf2013-natural-language-processing-with-ruby)]\u003C\u002Fsup>\n  - _使用 Ruby 进行自然语言处理：N 元语法_，作者：[Nathan Kleyn](https:\u002F\u002Fgithub.com\u002Fnathankleyn)\n    \u003Csup>[[教程](https:\u002F\u002Fwww.sitepoint.com\u002Fnatural-language-processing-ruby-n-grams\u002F) |\n    [代码](https:\u002F\u002Fgithub.com\u002Fnathankleyn\u002Fruby-nlp)]\u003C\u002Fsup>\n  - _寻找洛夫克拉夫特，第 1 部分：NLP 与 Treat Gem 的介绍_，作者：\n    [Robert Qualls](https:\u002F\u002Fgithub.com\u002Frlqualls)\n    \u003Csup>[[教程](https:\u002F\u002Fwww.sitepoint.com\u002Fseeking-lovecraft-part-1-an-introduction-to-nlp-and-the-treat-gem\u002F)]\u003C\u002Fsup>\n- 2012\n  - _使用 Ruby 进行机器学习，第一部分_，作者：[Vasily Vasinov](https:\u002F\u002Ftwitter.com\u002Fvasinov)\n    \u003Csup>[[教程](http:\u002F\u002Fwww.vasinov.com\u002Fblog\u002Fmachine-learning-with-ruby-part-one\u002F)]\u003C\u002Fsup>\n- 2011\n  - _Ruby 单行脚本_，作者：[Benoit Hamelin](https:\u002F\u002Ftwitter.com\u002Fbenoithamelin)\n    \u003Csup>[[文章](http:\u002F\u002Fbenoithamelin.tumblr.com\u002Fruby1line)]\u003C\u002Fsup>\n  - _在 Ruby 中进行聚类分析_，作者：[Colin Drake](https:\u002F\u002Ftwitter.com\u002Fcolinfdrake)\n    \u003Csup>[[文章](https:\u002F\u002Fcolindrake.me\u002Fpost\u002Fk-means-clustering-in-ruby\u002F)\u002F)]\u003C\u002Fsup>\n- 2010\n  - _bayes_motel——适用于 Ruby 的贝叶斯分类_，作者：[Mike Perham](https:\u002F\u002Ftwitter.com\u002Fmperham)\n    \u003Csup>[[文章](http:\u002F\u002Fwww.mikeperham.com\u002F2010\u002F04\u002F28\u002Fbayes_motel-bayesian-classification-for-ruby\u002F)]\u003C\u002Fsup>\n- 2009\n  - _将 UEA-Lite 词干提取器移植到 Ruby_，作者：[Jason Adams](https:\u002F\u002Ftwitter.com\u002Fealdent)\n    \u003Csup>[[文章](https:\u002F\u002Fealdent.wordpress.com\u002F2009\u002F07\u002F16\u002Fporting-the-uea-lite-stemmer-to-ruby\u002F)]\u003C\u002Fsup>\n  - _Ruby 的 NLP 资源_，作者：[Jason Adams](https:\u002F\u002Ftwitter.com\u002Fealdent)\n    \u003Csup>[[文章](https:\u002F\u002Fealdent.wordpress.com\u002F2009\u002F09\u002F13\u002Fnlp-resources-for-ruby\u002F)]\u003C\u002Fsup>\n- 2008\n  - _在 Ruby 中实现支持向量机 (SVM)_，作者：[Ilya Grigorik](https:\u002F\u002Ftwitter.com\u002Figrigorik)\n    \u003Csup>[[文章](https:\u002F\u002Fwww.igvita.com\u002F2008\u002F01\u002F07\u002Fsupport-vector-machines-svm-in-ruby\u002F)]\u003C\u002Fsup>\n  - _使用 Ruby 进行实用的文本分类_，作者：[Gleicon Moraes](https:\u002F\u002Ftwitter.com\u002Fgleicon)\n    \u003Csup>[[文章](https:\u002F\u002Fzenmachine.wordpress.com\u002Fpractical-text-classification-with-ruby\u002F) |\n    [代码](https:\u002F\u002Fgithub.com\u002Fgleicon\u002Fzenmachine)]\u003C\u002Fsup>\n- 2007\n  - _在 Ruby 中学习决策树_，作者：[Ilya Grigorik](https:\u002F\u002Ftwitter.com\u002Figrigorik)\n    \u003Csup>[[文章](https:\u002F\u002Fwww.igvita.com\u002F2007\u002F04\u002F16\u002Fdecision-tree-learning-in-ruby\u002F)]\u003C\u002Fsup>\n- 2006\n  - _说我的语言：使用 Ruby 进行自然语言处理_，作者：[Michael Granger](https:\u002F\u002Fdeveiate.org\u002Fresume.html)\n    \u003Csup>[[幻灯片](https:\u002F\u002Fdeveiate.org\u002Fmisc\u002FSpeak-My-Language.pdf) |\n    [报道](http:\u002F\u002Fblog.nicksieger.com\u002Farticles\u002F2006\u002F10\u002F22\u002Frubyconf-natural-language-generation-and-processing-in-ruby\u002F) |\n    [报道](http:\u002F\u002Fjuixe.com\u002Fpapers\u002FRubyConf2006.pdf)]\u003C\u002Fsup>\n\n## 项目与代码示例\n\n- [Going the Distance](https:\u002F\u002Fgithub.com\u002Fschneems\u002Fgoing_the_distance) -\n  各种距离算法的实现及示例计算。\n- [使用 Stanford NER 和 Ruby 进行命名实体识别](https:\u002F\u002Fgithub.com\u002Fmblongii\u002Fruby-ner) -\n  Ruby 和 Java 中的 NER 示例，并附有[说明](https:\u002F\u002Fweb.archive.org\u002Fweb\u002F20120722225402\u002Fhttp:\u002F\u002Fmblongii.com\u002F2012\u002F04\u002F15\u002Fnamed-entity-recognition-with-stanford-ner-and-ruby\u002F)。\n- [Words Counted](http:\u002F\u002Frubywordcount.com\u002F) -\n  基于 [words_counted](https:\u002F\u002Fgithub.com\u002Fabitdodgy\u002Fwords_counted) 的可定制词频统计示例。\n- [RSyntaxTree](https:\u002F\u002Fyohasebe.com\u002Frsyntaxtree\u002F) -\n  基于 Web 的句法树可视化演示。\n\n## 图书\n\n-  [Miller, Rob](https:\u002F\u002Ftwitter.com\u002Frobmil\u002F)。\n   _用 Ruby 处理文本：从周围数据中提取价值。_\n   Pragmatic Programmers 出版社，2015 年。\n   \u003Csup>[[链接](https:\u002F\u002Fwww.amazon.com\u002FText-Processing-Ruby-Extract-Surrounds\u002Fdp\u002F1680500708)]\u003C\u002Fsup>\n-  [Watson, Mark](https:\u002F\u002Ftwitter.com\u002Fmark_l_watson)。\n   _脚本化智能：Web 3.0 信息收集与处理。_\n   APRESS 出版社，2010 年。\n   \u003Csup>[[链接](https:\u002F\u002Fwww.amazon.de\u002FScripting-Intelligence-Information-Gathering-Processing\u002Fdp\u002F1430223510)]\u003C\u002Fsup>\n-  [Watson, Mark](https:\u002F\u002Ftwitter.com\u002Fmark_l_watson)。\n   _实用语义网与关联数据应用。_ Lulu 出版社，2010 年。\n   \u003Csup>[[链接](http:\u002F\u002Fwww.lulu.com\u002Fshop\u002Fmark-watson\u002Fpractical-semantic-web-and-linked-data-applications-java-edition\u002Fpaperback\u002Fproduct-10915016.html)]\u003C\u002Fsup>\n\n## 社区\n\n- [Reddit](https:\u002F\u002Fwww.reddit.com\u002Fr\u002FLanguageTechnology\u002Fsearch?q=ruby&restrict_sr=on)\n- [Stack Overflow](https:\u002F\u002Fstackoverflow.com\u002Fsearch?q=%5Bnlp%5D+and+%5Bruby%5D)\n- [Twitter](https:\u002F\u002Ftwitter.com\u002Fsearch?q=Ruby%20NLP%20%23ruby%20OR%20%23nlproc%20OR%20%23rubynlp%20OR%20%23nlp&src=typd&lang=en)\n\n## 需要您的帮助！\n\n本节中的所有项目对社区都非常重要，但都需要更多关注。如果您有空闲时间和热情，请抽出一些时间来参与这些项目的开发。\n\n- [ferret](https:\u002F\u002Fgithub.com\u002Fdbalmain\u002Fferret) -\n  C 语言和 Ruby 语言的信息检索实现。\n- [summarize](https:\u002F\u002Fgithub.com\u002Fssoper\u002Fsummarize) -\n  [Open Text Summarizer](https:\u002F\u002Fgithub.com\u002Fneopunisher\u002FOpen-Text-Summarizer) 的 Ruby 原生封装库。\n\n## 相关资源\n\n- [神经机器翻译实现](https:\u002F\u002Fgithub.com\u002Fjonsafari\u002Fnmt-list)\n- [Awesome Ruby](https:\u002F\u002Fgithub.com\u002Fmarkets\u002Fawesome-ruby#natural-language-processing) -\n  其中包含一个简短的 NLP 相关项目列表。\n- [Ruby NLP](https:\u002F\u002Fgithub.com\u002Fdiasks2\u002Fruby-nlp) -\n  当前最先进的 Ruby NLP 库集合。\n- [语音与自然语言处理](https:\u002F\u002Fgithub.com\u002Fedobashira\u002Fspeech-language-processing) -\n  一般性的 NLP 相关资源列表（大多不针对 Ruby 程序员）。\n- [Scientific Ruby](http:\u002F\u002Fsciruby.com\u002F) -\n  用于 Ruby 的线性代数、可视化和科学计算工具。\n- [iRuby](https:\u002F\u002Fgithub.com\u002FSciRuby\u002Firuby) - Jupyter（原 IPython）的 IRuby 内核。\n- [Awesome OCR](https:\u002F\u002Fgithub.com\u002Fkba\u002Fawesome-ocr) -\n  大量的 OCR（光学字符识别）资源。\n- [Awesome TensorFlow](https:\u002F\u002Fgithub.com\u002Fjtoy\u002Fawesome-tensorflow) -\n  使用 TensorFlow 库进行机器学习。\n- \u003Ca name=\"imagemagic\">\u003C\u002Fa>\n  [ImageMagick](https:\u002F\u002Fimagemagick.org\u002Findex.php)\n\n## 许可证\n\n[![知识共享零协议 1.0](http:\u002F\u002Fmirrors.creativecommons.org\u002Fpresskit\u002Fbuttons\u002F80x15\u002Fsvg\u002Fcc-zero.svg)](https:\u002F\u002Fcreativecommons.org\u002Fpublicdomain\u002Fzero\u002F1.0\u002F) `用 Ruby 实现的 Awesome NLP` 由 [Andrei Beliankou](https:\u002F\u002Fgithub.com\u002Farbox) 和\n[贡献者](https:\u002F\u002Fgithub.com\u002Farbox\u002Fnlp-with-ruby\u002Fgraphs\u002Fcontributors) 创作。\n\n在法律允许的最大范围内，将 CC0 协议应用于 `Awesome NLP with Ruby` 的个人已放弃对该作品的所有版权及相关或邻接权利。\n\n您应当随本作品一起收到 CC0 法律文本的副本。如果没有，请参阅 \u003Chttps:\u002F\u002Fcreativecommons.org\u002Fpublicdomain\u002Fzero\u002F1.0\u002F>。\n\n\u003C!--- 链接 --->\n[ruby]: https:\u002F\u002Fwww.ruby-lang.org\u002Fen\u002F\n[motivation]: https:\u002F\u002Fgithub.com\u002Farbox\u002Fnlp-with-ruby\u002Fblob\u002Fmaster\u002Fmotivation.md\n[faq]: https:\u002F\u002Fgithub.com\u002Farbox\u002Fnlp-with-ruby\u002Fblob\u002Fmaster\u002FFAQ.md\n[ds-with-ruby]: https:\u002F\u002Fgithub.com\u002Farbox\u002Fdata-science-with-ruby\n[ml-with-ruby]: https:\u002F\u002Fgithub.com\u002Farbox\u002Fmachine-learning-with-ruby\n[change-pr]: https:\u002F\u002Fgithub.com\u002FRichardLitt\u002Fknowledge\u002Fblob\u002Fmaster\u002Fgithub\u002Famending-a-commit-guide.md","# nlp-with-ruby 快速上手指南\n\n`nlp-with-ruby` 并非单一的代码库，而是一个精选的 Ruby 自然语言处理（NLP）资源列表。它汇集了用于文本分词、词性标注、句法分析、情感分析及机器翻译等任务的优秀开源库和工具。本指南将帮助你搭建环境并快速开始使用其中的核心工具。\n\n## 环境准备\n\n在开始之前，请确保你的开发环境满足以下要求：\n\n*   **操作系统**: Linux, macOS 或 Windows (推荐 WSL2)。\n*   **Ruby 版本**: 建议安装 Ruby 3.0 或更高版本。\n    *   推荐使用 `rbenv` 或 `rvm` 管理 Ruby 版本。\n*   **构建工具**: 部分 NLP 库依赖原生扩展编译，需安装基础构建工具。\n    *   **Ubuntu\u002FDebian**: `sudo apt-get install build-essential libxml2-dev libxslt1-dev`\n    *   **macOS**: `xcode-select --install`\n*   **前置依赖 (可选但推荐)**:\n    *   **Java (JDK 8+)**: 若需使用 `stanford-core-nlp` 或 `open-nlp` 等基于 Java 的工具。\n    *   **Python**: 若需通过 `ruby-spacy` 调用 spaCy 库。\n    *   **ImageMagick**: 若需使用 `rsyntaxtree` 进行句法树可视化。\n\n> **国内加速提示**：\n> 安装 Ruby Gem 时，建议切换至国内镜像源以提升下载速度：\n> ```bash\n> gem sources --add https:\u002F\u002Fgems.ruby-china.com\u002F --remove https:\u002F\u002Frubygems.org\u002F\n> gem sources -l # 确认列表中包含 ruby-china 源\n> ```\n\n## 安装步骤\n\n由于该列表包含多个独立库，你可以根据具体需求选择安装。以下是安装几个最常用核心库的步骤：\n\n### 1. 创建项目并初始化\n```bash\nmkdir my_nlp_project\ncd my_nlp_project\nbundle init\n```\n\n### 2. 添加依赖\n编辑 `Gemfile`，添加你需要的 NLP 库。例如，若要使用通用框架 `treat` 和分词工具 `tokenizer`：\n\n```ruby\nsource 'https:\u002F\u002Fgems.ruby-china.com\u002F'\n\ngem 'treat'          # 综合 NLP 框架\ngem 'tokenizer'      # 多语言分词\ngem 'lemmatizer'     # 词形还原\ngem 'sentiment_lib'  # 情感分析\n```\n\n### 3. 安装依赖\n执行以下命令安装：\n\n```bash\nbundle install\n```\n\n> **注意**：如果你需要使用 `stanford-core-nlp`，还需单独下载 Stanford CoreNLP 的 Java jar 包并配置环境变量，具体请参考该库的 README。\n\n## 基本使用\n\n以下是一个简单的示例，演示如何使用 `treat` 框架加载文本，并结合 `tokenizer` 进行分词和基础统计。\n\n### 示例代码：文本分词与统计\n\n创建一个 `main.rb` 文件：\n\n```ruby\nrequire 'bundler\u002Fsetup'\nrequire 'treat'\nrequire 'tokenizer'\n\n# 启用 Treat 的默认设置\nTreat::Core::Plugin.load_all\n\n# 待处理的文本\ntext = \"Natural Language Processing with Ruby is powerful. 自然语言处理非常有趣！\"\n\n# 1. 使用 Treat 创建文本对象\nproject = Treat::Core::Project.new('my_project')\ndocument = Treat::Core::Document.new(text)\nproject \u003C\u003C document\n\n# 2. 获取句子和单词\nputs \"=== Treat 处理结果 ===\"\ndocument.sentences.each do |sentence|\n  puts \"句子：#{sentence.content}\"\n  puts \"单词数：#{sentence.words.size}\"\nend\n\n# 3. 使用 tokenizer 进行更细致的多语言分词\nputs \"\\n=== Tokenizer 分词结果 ===\"\ntokens = Tokenizer.tokenize(text)\nputs \"分词列表：#{tokens.inspect}\"\nputs \"Token 总数：#{tokens.size}\"\n\n# 4. 简单的词频统计 (Lexical Statistics)\nword_counts = Hash.new(0)\ntokens.each { |token| word_counts[token.downcase] += 1 }\n\nputs \"\\n=== 词频统计 ===\"\nword_counts.sort_by { |_, count| -count }.first(5).each do |word, count|\n  puts \"#{word}: #{count}\"\nend\n```\n\n### 运行程序\n\n在终端执行：\n\n```bash\nruby main.rb\n```\n\n**预期输出示例**：\n```text\n=== Treat 处理结果 ===\n句子：Natural Language Processing with Ruby is powerful.\n单词数：7\n句子：自然语言处理非常有趣！\n单词数：1 (视具体分词策略而定)\n\n=== Tokenizer 分词结果 ===\n分词列表：[\"Natural\", \"Language\", \"Processing\", \"with\", \"Ruby\", \"is\", \"powerful\", \".\", \"自然\", \"语言\", \"处理\", \"非常\", \"有趣\", \"！\"]\nToken 总数：14\n\n=== 词频统计 ===\nnatural: 1\nlanguage: 1\n...\n```\n\n通过以上步骤，你已经成功搭建了 Ruby NLP 开发环境并完成了基础的文本处理流程。你可以查阅 `nlp-with-ruby` 列表中的其他库文档，进一步探索命名实体识别、句法分析或情感分析等高级功能。","一家专注于日本市场的电商初创公司，其 Ruby 开发团队需要构建一个实时的用户评论情感分析系统，以快速响应客户反馈并优化产品策略。\n\n### 没有 nlp-with-ruby 时\n- **技术栈割裂严重**：团队被迫在 Ruby 后端中嵌入 Python 微服务来处理 NLP 任务，导致架构复杂、部署维护成本高昂且网络延迟增加。\n- **语言支持匮乏**：现有的通用 Ruby 库缺乏针对日语的分词（Segmentation）和词形还原（Lemmatization）能力，无法准确处理复杂的日文语法结构。\n- **开发效率低下**：开发者需从零编写基础文本清洗逻辑，如停用词过滤和词干提取，耗费大量时间在重复造轮子上而非业务逻辑创新。\n- **资源查找困难**：缺乏统一的权威指南，团队难以甄别哪些开源库适合生产环境，常因选用不成熟库而导致线上故障。\n\n### 使用 nlp-with-ruby 后\n- **原生集成高效便捷**：借助清单中推荐的成熟引擎（如 Rroonga 或 MeCab 的 Ruby 绑定），团队直接在 Ruby 进程内完成全流程处理，显著降低系统延迟。\n- **多语言处理精准**：利用 curated list 中专门针对日语优化的分词与语义分析库，实现了对用户评论中细微情感倾向的精准捕捉。\n- **流水线搭建迅速**：基于清单提供的标准化子任务模块（如拼写纠错、命名实体识别），快速组装出完整的 NLP 管道，研发周期缩短 60%。\n- **决策依据充分**：依托社区验证过的“精选列表”，团队直接复用经过生产环境考验的库和教程，避免了选型试错风险。\n\nnlp-with-ruby 通过提供一站式的高质量资源索引，让 Ruby 开发者无需切换技术栈即可在原生态环境中构建专业级的自然语言处理应用。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Farbox_nlp-with-ruby_4c8f03da.png","arbox","Andrei Beliankou","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Farbox_38b49dfd.jpg","AI Engineering in Python\u002FSQL.","@eon-com","BY","arbox@yandex.com",null,"https:\u002F\u002Fgithub.com\u002Farbox",[83],{"name":84,"color":85,"percentage":86},"Ruby","#701516",100,1075,68,"2026-04-05T16:10:29","CC0-1.0",1,"未说明",{"notes":94,"python":95,"dependencies":96},"这是一个 Ruby 自然语言处理（NLP）资源的精选列表，而非单一的 AI 模型或工具。因此没有统一的运行环境需求。具体需求取决于你选择使用的某个特定库（例如：使用 ruby-spacy 需要安装 Python 和 spaCy；使用 rsyntaxtree 需要 ImageMagick；部分库可能需要 JRuby）。建议根据具体选用的子库查阅其各自的文档。","不适用 (主要基于 Ruby)",[84,97,98,99,100,101],"OpenNLP (可选)","Stanford CoreNLP (可选)","spaCy (通过 PyCall)","ImageMagick (用于 rsyntaxtree)","Hunspell (可选)",[14,35],[104,105,106,107,108,109,110,111,112,113,114,115],"machine-learning","natural-language-processing","ruby","nlp","sentiment-analysis","pos-tag","awesome","awesome-list","list","computational-linguistics","rubynlp","rubyml","2026-03-27T02:49:30.150509","2026-04-12T07:51:56.642251",[],[]]