[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-openvenues--libpostal":3,"tool-openvenues--libpostal":62},[4,18,26,36,46,54],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",159267,2,"2026-04-17T11:29:14",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":42,"last_commit_at":43,"category_tags":44,"status":17},8272,"opencode","anomalyco\u002Fopencode","OpenCode 是一款开源的 AI 编程助手（Coding Agent），旨在像一位智能搭档一样融入您的开发流程。它不仅仅是一个代码补全插件，而是一个能够理解项目上下文、自主规划任务并执行复杂编码操作的智能体。无论是生成全新功能、重构现有代码，还是排查难以定位的 Bug，OpenCode 都能通过自然语言交互高效完成，显著减少开发者在重复性劳动和上下文切换上的时间消耗。\n\n这款工具专为软件开发者、工程师及技术研究人员设计，特别适合希望利用大模型能力来提升编码效率、加速原型开发或处理遗留代码维护的专业人群。其核心亮点在于完全开源的架构，这意味着用户可以审查代码逻辑、自定义行为策略，甚至私有化部署以保障数据安全，彻底打破了传统闭源 AI 助手的“黑盒”限制。\n\n在技术体验上，OpenCode 提供了灵活的终端界面（Terminal UI）和正在测试中的桌面应用程序，支持 macOS、Windows 及 Linux 全平台。它兼容多种包管理工具，安装便捷，并能无缝集成到现有的开发环境中。无论您是追求极致控制权的资深极客，还是渴望提升产出的独立开发者，OpenCode 都提供了一个透明、可信",144296,1,"2026-04-16T14:50:03",[13,45],"插件",{"id":47,"name":48,"github_repo":49,"description_zh":50,"stars":51,"difficulty_score":32,"last_commit_at":52,"category_tags":53,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",108322,"2026-04-10T11:39:34",[14,15,13],{"id":55,"name":56,"github_repo":57,"description_zh":58,"stars":59,"difficulty_score":32,"last_commit_at":60,"category_tags":61,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[45,13,15,14],{"id":63,"github_repo":64,"name":65,"description_en":66,"description_zh":67,"ai_summary_zh":68,"readme_en":69,"readme_zh":70,"quickstart_zh":71,"use_case_zh":72,"hero_image_url":73,"owner_login":74,"owner_name":74,"owner_avatar_url":75,"owner_bio":76,"owner_company":77,"owner_location":77,"owner_email":77,"owner_twitter":77,"owner_website":77,"owner_url":78,"languages":79,"stars":101,"forks":102,"last_commit_at":103,"license":104,"difficulty_score":105,"env_os":106,"env_gpu":107,"env_ram":107,"env_deps":108,"category_tags":118,"github_topics":121,"view_count":32,"oss_zip_url":77,"oss_zip_packed_at":77,"status":17,"created_at":132,"updated_at":133,"faqs":134,"releases":164},8595,"openvenues\u002Flibpostal","libpostal","A C library for parsing\u002Fnormalizing street addresses around the world. Powered by statistical NLP and open geo data.","libpostal 是一个专为全球街道地址解析与标准化设计的开源 C 语言库。它致力于让计算机能够像人类一样，理解并处理世界各地不同语言、不同格式的地址字符串。\n\n在日常应用中，地址数据往往充满挑战：各国书写习惯差异巨大，缩写、俚语和本地惯例层出不穷，导致传统搜索引擎难以准确索引或匹配。libpostal 正是为了解决这一痛点而生。它能将用户输入的自由格式地址（如“北京市朝阳区建国路 88 号”或\"1600 Amphitheatre Pkwy, Mountain View, CA\"）自动拆解并转换为干净、统一的标准化形式，极大提升了机器比对和地理编码的准确性。\n\n这款工具特别适合开发者、数据工程师及地理空间研究人员使用。无论是构建地图搜索、物流配送系统，还是清洗大规模位置数据，libpostal 都能作为强大的预处理组件，让应用在国际范围内表现得更智能、更一致。\n\n其核心技术亮点在于结合了统计自然语言处理（NLP）与 OpenStreetMap 等开放地理数据。不同于依赖固定规则的传统方案，libpostal 通过机器学习模型自动学习全球地址模式，无需手动维护繁琐的规则库，即可灵活适应","libpostal 是一个专为全球街道地址解析与标准化设计的开源 C 语言库。它致力于让计算机能够像人类一样，理解并处理世界各地不同语言、不同格式的地址字符串。\n\n在日常应用中，地址数据往往充满挑战：各国书写习惯差异巨大，缩写、俚语和本地惯例层出不穷，导致传统搜索引擎难以准确索引或匹配。libpostal 正是为了解决这一痛点而生。它能将用户输入的自由格式地址（如“北京市朝阳区建国路 88 号”或\"1600 Amphitheatre Pkwy, Mountain View, CA\"）自动拆解并转换为干净、统一的标准化形式，极大提升了机器比对和地理编码的准确性。\n\n这款工具特别适合开发者、数据工程师及地理空间研究人员使用。无论是构建地图搜索、物流配送系统，还是清洗大规模位置数据，libpostal 都能作为强大的预处理组件，让应用在国际范围内表现得更智能、更一致。\n\n其核心技术亮点在于结合了统计自然语言处理（NLP）与 OpenStreetMap 等开放地理数据。不同于依赖固定规则的传统方案，libpostal 通过机器学习模型自动学习全球地址模式，无需手动维护繁琐的规则库，即可灵活适应从巴西到日本等上百个国家的复杂地址结构。","# libpostal: international street address NLP\n\n[![Build Status](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Factions\u002Fworkflows\u002Ftest.yml\u002Fbadge.svg)](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Factions)\n[![Build Status](https:\u002F\u002Fci.appveyor.com\u002Fapi\u002Fprojects\u002Fstatus\u002Fgithub\u002Fopenvenues\u002Flibpostal?branch=master&svg=true)](https:\u002F\u002Fci.appveyor.com\u002Fproject\u002Falbarrentine\u002Flibpostal\u002Fbranch\u002Fmaster)\n[![License](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Flicense\u002Fopenvenues\u002Flibpostal.svg)](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Fblob\u002Fmaster\u002FLICENSE)\n[![OpenCollective Sponsors](https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsors\u002Fbadge.svg)](#sponsors)\n[![OpenCollective Backers](https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbackers\u002Fbadge.svg)](#backers)\n\nlibpostal is a C library for parsing\u002Fnormalizing street addresses around the world using statistical NLP and open data. The goal of this project is to understand location-based strings in every language, everywhere. For a more comprehensive overview of the research behind libpostal, be sure to check out the (lengthy) introductory blog posts:\n\n- **Original post**: [Statistical NLP on OpenStreetMap](https:\u002F\u002Fmedium.com\u002F@albarrentine\u002Fstatistical-nlp-on-openstreetmap-b9d573e6cc86)\n- **Follow-up for 1.0 release**: [Statistical NLP on OpenStreetMap: Part 2](https:\u002F\u002Fmedium.com\u002F@albarrentine\u002Fstatistical-nlp-on-openstreetmap-part-2-80405b988718)\n\n\u003Cspan>&#x1f1e7;&#x1f1f7;\u003C\u002Fspan> \u003Cspan>&#x1f1eb;&#x1f1ee;\u003C\u002Fspan>  \u003Cspan>&#x1f1f3;&#x1f1ec;\u003C\u002Fspan> :jp: \u003Cspan>&#x1f1fd;&#x1f1f0; \u003C\u002Fspan> \u003Cspan>&#x1f1e7;&#x1f1e9; \u003C\u002Fspan> \u003Cspan>&#x1f1f5;&#x1f1f1; \u003C\u002Fspan> \u003Cspan>&#x1f1fb;&#x1f1f3; \u003C\u002Fspan> \u003Cspan>&#x1f1e7;&#x1f1ea; \u003C\u002Fspan> \u003Cspan>&#x1f1f2;&#x1f1e6; \u003C\u002Fspan> \u003Cspan>&#x1f1fa;&#x1f1e6; \u003C\u002Fspan> \u003Cspan>&#x1f1ef;&#x1f1f2; \u003C\u002Fspan> :ru: \u003Cspan>&#x1f1ee;&#x1f1f3; \u003C\u002Fspan> \u003Cspan>&#x1f1f1;&#x1f1fb; \u003C\u002Fspan> \u003Cspan>&#x1f1e7;&#x1f1f4; \u003C\u002Fspan> :de: \u003Cspan>&#x1f1f8;&#x1f1f3; \u003C\u002Fspan>  \u003Cspan>&#x1f1e6;&#x1f1f2; \u003C\u002Fspan> :kr: \u003Cspan>&#x1f1f3;&#x1f1f4; \u003C\u002Fspan>  \u003Cspan>&#x1f1f2;&#x1f1fd; \u003C\u002Fspan> \u003Cspan>&#x1f1e8;&#x1f1ff; \u003C\u002Fspan> \u003Cspan>&#x1f1f9;&#x1f1f7; \u003C\u002Fspan> :es: \u003Cspan>&#x1f1f8;&#x1f1f8; \u003C\u002Fspan> \u003Cspan>&#x1f1ea;&#x1f1ea; \u003C\u002Fspan> \u003Cspan>&#x1f1e7;&#x1f1ed; \u003C\u002Fspan> \u003Cspan>&#x1f1f3;&#x1f1f1; \u003C\u002Fspan> :cn:  \u003Cspan>&#x1f1f5;&#x1f1f9; \u003C\u002Fspan> \u003Cspan>&#x1f1f5;&#x1f1f7; \u003C\u002Fspan> :gb: \u003Cspan>&#x1f1f5;&#x1f1f8; \u003C\u002Fspan>\n\nAddresses and the locations they represent are essential for any application dealing with maps (place search, transportation, on-demand\u002Fdelivery services, check-ins, reviews). Yet even the simplest addresses are packed with local conventions, abbreviations and context, making them difficult to index\u002Fquery effectively with traditional full-text search engines. This library helps convert the free-form addresses that humans use into clean normalized forms suitable for machine comparison and full-text indexing. Though libpostal is not itself a full geocoder, it can be used as a preprocessing step to make any geocoding application smarter, simpler, and more consistent internationally.\n\n\u003Cspan>&#x1f1f7;&#x1f1f4; \u003C\u002Fspan> \u003Cspan>&#x1f1ec;&#x1f1ed; \u003C\u002Fspan> \u003Cspan>&#x1f1e6;&#x1f1fa; \u003C\u002Fspan> \u003Cspan>&#x1f1f2;&#x1f1fe; \u003C\u002Fspan> \u003Cspan>&#x1f1ed;&#x1f1f7; \u003C\u002Fspan> \u003Cspan>&#x1f1ed;&#x1f1f9; \u003C\u002Fspan> :us: \u003Cspan>&#x1f1ff;&#x1f1e6; \u003C\u002Fspan> \u003Cspan>&#x1f1f7;&#x1f1f8; \u003C\u002Fspan> \u003Cspan>&#x1f1e8;&#x1f1f1; \u003C\u002Fspan> :it: \u003Cspan>&#x1f1f0;&#x1f1ea; \u003Cspan>&#x1f1e8;&#x1f1ed; \u003C\u002Fspan> \u003Cspan>&#x1f1e8;&#x1f1fa; \u003C\u002Fspan> \u003Cspan>&#x1f1f8;&#x1f1f0; \u003C\u002Fspan> \u003Cspan>&#x1f1e6;&#x1f1f4; \u003C\u002Fspan> \u003Cspan>&#x1f1e9;&#x1f1f0; \u003C\u002Fspan> \u003Cspan>&#x1f1f9;&#x1f1ff; \u003C\u002Fspan> \u003Cspan>&#x1f1e6;&#x1f1f1; \u003C\u002Fspan> \u003Cspan>&#x1f1e8;&#x1f1f4; \u003C\u002Fspan> \u003Cspan>&#x1f1ee;&#x1f1f1; \u003C\u002Fspan> \u003Cspan>&#x1f1ec;&#x1f1f9; \u003C\u002Fspan>  :fr: \u003Cspan>&#x1f1f5;&#x1f1ed; \u003C\u002Fspan> \u003Cspan>&#x1f1e6;&#x1f1f9; \u003C\u002Fspan> \u003Cspan>&#x1f1f1;&#x1f1e8; \u003C\u002Fspan>  \u003Cspan>&#x1f1ee;&#x1f1f8; \u003Cspan>&#x1f1ee;&#x1f1e9; \u003C\u002Fspan> \u003C\u002Fspan> \u003Cspan>&#x1f1e6;&#x1f1ea; \u003C\u002Fspan> \u003C\u002Fspan> \u003Cspan>&#x1f1f8;&#x1f1f0; \u003C\u002Fspan> \u003Cspan>&#x1f1f9;&#x1f1f3; \u003C\u002Fspan> \u003Cspan>&#x1f1f0;&#x1f1ed; \u003C\u002Fspan> \u003Cspan>&#x1f1e6;&#x1f1f7; \u003C\u002Fspan> \u003Cspan>&#x1f1ed;&#x1f1f0; \u003C\u002Fspan>\n\nThe core library is written in pure C. Language bindings for [Python](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fpypostal), [Ruby](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fruby_postal), [Go](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fgopostal), [Java](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fjpostal), [PHP](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fphp-postal), and [NodeJS](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fnode-postal) are officially supported and it's easy to write bindings in other languages.\n\nSponsors\n--------\n\nIf your company is using libpostal, consider asking your organization to sponsor the project. Interpreting what humans mean when they refer to locations is far from a solved problem, and sponsorships help us pursue new frontiers in geospatial NLP. As a sponsor, your company logo will appear prominently on the Github repo page along with a link to your site. [Sponsorship info](https:\u002F\u002Fopencollective.com\u002Flibpostal#sponsor)\n\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F0\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F0\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F1\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F1\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F2\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F2\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F3\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F3\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F4\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F4\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F5\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F5\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F6\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F6\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F7\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F7\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F8\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F8\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F9\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F9\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F10\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F10\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F11\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F11\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F12\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F12\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F13\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F13\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F14\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F14\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F15\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F15\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F16\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F16\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F17\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F17\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F18\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F18\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F19\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F19\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F20\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F20\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F21\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F21\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F22\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F22\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F23\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F23\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F24\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F24\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F25\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F25\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F26\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F26\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F27\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F27\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F28\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F28\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F29\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F29\u002Favatar.svg\">\u003C\u002Fa>\n\nBackers\n------------\n\nIndividual users can also help support open geo NLP research by making a monthly donation:\n\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F0\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F0\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F1\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F1\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F2\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F2\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F3\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F3\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F4\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F4\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F5\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F5\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F6\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F6\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F7\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F7\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F8\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F8\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F9\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F9\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F10\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F10\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F11\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F11\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F12\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F12\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F13\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F13\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F14\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F14\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F15\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F15\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F16\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F16\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F17\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F17\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F18\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F18\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F19\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F19\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F20\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F20\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F21\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F21\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F22\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F22\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F23\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F23\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F24\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F24\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F25\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F25\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F26\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F26\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F27\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F27\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F28\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F28\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F29\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F29\u002Favatar.svg\">\u003C\u002Fa>\n\nInstallation (Mac\u002FLinux)\n------------------------\n\nBefore you install, make sure you have the following prerequisites:\n\n**On Ubuntu\u002FDebian**\n```\nsudo apt-get install -y curl build-essential autoconf automake libtool pkg-config\n```\n\n**On CentOS\u002FRHEL**\n```\nsudo yum install curl autoconf automake libtool pkgconfig\n```\n\n**On macOS**\n\nInstall with one command via [MacPorts](https:\u002F\u002Fwww.macports.org\u002F):\n```\nport install libpostal\n```\n\nOr with [Homebrew](https:\u002F\u002Fbrew.sh\u002F):\n\n```\nbrew install libpostal\n```\n\nTo compile the C library from source:\n\nIf you're using an M1 Mac, add `--disable-sse2` to the `.\u002Fconfigure` command. This will result in poorer performance but the build will succeed.\n\n```\ngit clone https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\ncd libpostal\n\n# skip if installing for the first time\nmake distclean\n\n.\u002Fbootstrap.sh\n\n# omit --datadir flag to install data in current directory\n.\u002Fconfigure --datadir=[...some dir with a few GB of space where a \"libpostal\" directory exists or can be created\u002Fmodified...]\nmake -j4\n\n# For Intel\u002FAMD processors and the default model\n.\u002Fconfigure --datadir=[...some dir with a few GB of space where a \"libpostal\" directory exists or can be created\u002Fmodified...]\n\n# For Apple \u002F ARM cpus and the default model\n.\u002Fconfigure --datadir=[...some dir with a few GB of space where a \"libpostal\" directory exists or can be created\u002Fmodified...] --disable-sse2\n\n# For the improved Senzing model:\n.\u002Fconfigure --datadir=[...some dir with a few GB of space where a \"libpostal\" directory exists or can be created\u002Fmodified...] MODEL=senzing\n\nmake -j8\nsudo make install\n\n# On Linux it's probably a good idea to run\nsudo ldconfig\n```\n\nlibpostal has support for pkg-config, so you can use the pkg-config to print the flags needed to link your program against it:\n\n```\npkg-config --cflags libpostal         # print compiler flags\npkg-config --libs libpostal           # print linker flags\npkg-config --cflags --libs libpostal  # print both\n```\n\nFor example, if you write a program called app.c, you can compile it like this:\n\n```\ngcc app.c `pkg-config --cflags --libs libpostal`\n```\n\nInstallation (Windows)\n----------------------\n\n**MSys2\u002FMinGW**\n\nFor Windows the build procedure currently requires MSys2 and MinGW. This can be downloaded from http:\u002F\u002Fmsys2.org. Please follow the instructions on the MSys2 website for installation.\n\nPlease ensure Msys2 is up-to-date by running:\n```\npacman -Syu\n```\n\nInstall the following prerequisites:\n```\npacman -S autoconf automake curl git make libtool gcc mingw-w64-x86_64-gcc\n```\n\nThen to build the C library:\n```\ngit clone https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\ncd libpostal\ncp -rf windows\u002F* .\u002F\n.\u002Fbootstrap.sh\n.\u002Fconfigure --datadir=[...some dir with a few GB of space...]\nmake -j4\nmake install\n```\nNotes: When setting the datadir, the `C:` drive would be entered as `\u002Fc`. The libpostal build script automatically add `libpostal` on the end of the path, so '\u002Fc' would become `C:\\libpostal\\` on Windows.\n\nThe compiled .dll will be in the `src\u002F.libs\u002F` directory and should be called `libpostal-1.dll`.\n\nIf you require a .lib import library to link this to your application. You can generate one using the Visual Studio `lib.exe` tool and the `libpostal.def` definition file:\n```\nlib.exe \u002Fdef:libpostal.def \u002Fout:libpostal.lib \u002Fmachine:x64\n```\n\nInstallation with an alternative data model\n-------------------------------------------\n\nAn alternative data model is available for libpostal. It is created by Senzing Inc. for improved parsing on US, UK and Singapore addresses and improved US rural route address handling.\nTo enable this add `MODEL=senzing` to the configure line during installation:\n```\n.\u002Fconfigure --datadir=[...some dir with a few GB of space...] MODEL=senzing\n```\n\nThe data for this model is gotten from [OpenAddress](https:\u002F\u002Fopenaddresses.io\u002F), [OpenStreetMap](https:\u002F\u002Fwww.openstreetmap.org\u002F) and data generated by Senzing based on customer feedback (a few hundred records), a total of about 1.2 billion records of data from over 230 countries, in 100+ languages. The data from OpenStreetMap and OpenAddress is good but not perfect so the data set was modified by filtering out badly formed addresses, correcting misclassified address tokens and removing tokens that didn't belong in the addresses, whenever these conditions were encountered.\n\nSenzing created a data set of 12950 addresses from 89 countries that it uses to test and verify the quality of its models. The data set was generated using random addresses from OSM, minimally 50 per country. Hard-to-parse addresses were gotten from Senzing support team and customers and from the libpostal github page and added to this set. The Senzing model got 4.3% better parsing results than the default model, using this test set.\n\nThe size of this model is about 2.2GB compared to 1.8GB for the default model so keep that in mind if storages space is important.\n\nFurther information about this data model can be found at: https:\u002F\u002Fgithub.com\u002FSenzing\u002Flibpostal-data\nIf you run into any issues with this model, whether they have to do with parses, installation or any other problems, then please report them at https:\u002F\u002Fgithub.com\u002FSenzing\u002Flibpostal-data\n\nExamples of parsing\n-------------------\n\nlibpostal's international address parser uses machine learning (Conditional Random Fields) and is trained on over 1 billion addresses in every inhabited country on Earth. We use [OpenStreetMap](https:\u002F\u002Fopenstreetmap.org) and [OpenAddresses](https:\u002F\u002Fopenaddresses.io) as sources of structured addresses, and the OpenCage address format templates at: https:\u002F\u002Fgithub.com\u002FOpenCageData\u002Faddress-formatting to construct the training data, supplementing with containing polygons, and generating sub-building components like apartment\u002Ffloor numbers and PO boxes. We also add abbreviations, drop out components at random, etc. to make the parser as robust as possible to messy real-world input.\n\nThese example parse results are taken from the interactive address_parser program\nthat builds with libpostal when you run ```make```. Note that the parser can handle\ncommas vs. no commas as well as various casings and permutations of components (if the input\nis e.g. just city or just city\u002Fpostcode).\n\n![parser](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fopenvenues_libpostal_readme_648869e82e4a.gif)\n\nThe parser achieves very high accuracy on held-out data, currently 99.45%\ncorrect full parses (meaning a 1 in the numerator for getting *every* token\nin the address correct).\n\nUsage (parser)\n--------------\n\nHere's an example of the parser API using the Python bindings:\n\n```python\n\nfrom postal.parser import parse_address\nparse_address('The Book Club 100-106 Leonard St Shoreditch London EC2A 4RH, United Kingdom')\n```\n\nAnd an example with the C API:\n\n```c\n#include \u003Cstdio.h>\n#include \u003Cstdlib.h>\n#include \u003Clibpostal\u002Flibpostal.h>\n\nint main(int argc, char **argv) {\n    \u002F\u002F Setup (only called once at the beginning of your program)\n    if (!libpostal_setup() || !libpostal_setup_parser()) {\n        exit(EXIT_FAILURE);\n    }\n\n    libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();\n    libpostal_address_parser_response_t *parsed = libpostal_parse_address(\"781 Franklin Ave Crown Heights Brooklyn NYC NY 11216 USA\", options);\n\n    for (size_t i = 0; i \u003C parsed->num_components; i++) {\n        printf(\"%s: %s\\n\", parsed->labels[i], parsed->components[i]);\n    }\n\n    \u002F\u002F Free parse result\n    libpostal_address_parser_response_destroy(parsed);\n\n    \u002F\u002F Teardown (only called once at the end of your program)\n    libpostal_teardown();\n    libpostal_teardown_parser();\n}\n```\n\nParser labels\n-------------\n\nThe address parser can technically use any string labels that are defined in the training data, but these are the ones currently defined, based on the fields defined in [OpenCage's address-formatting library](https:\u002F\u002Fgithub.com\u002FOpenCageData\u002Faddress-formatting), as well as a few added by libpostal to handle specific patterns:\n\n- **house**: venue name e.g. \"Brooklyn Academy of Music\", and building names e.g. \"Empire State Building\"\n- **category**: for category queries like \"restaurants\", etc.\n- **near**: phrases like \"in\", \"near\", etc. used after a category phrase to help with parsing queries like \"restaurants in Brooklyn\"\n- **house_number**: usually refers to the external (street-facing) building number. In some countries this may be a compount, hyphenated number which also includes an apartment number, or a block number (a la Japan), but libpostal will just call it the house_number for simplicity.\n- **road**: street name(s)\n- **unit**: an apartment, unit, office, lot, or other secondary unit designator\n- **level**: expressions indicating a floor number e.g. \"3rd Floor\", \"Ground Floor\", etc.\n- **staircase**: numbered\u002Flettered staircase\n- **entrance**: numbered\u002Flettered entrance\n- **po_box**: post office box: typically found in non-physical (mail-only) addresses\n- **postcode**: postal codes used for mail sorting\n- **suburb**: usually an unofficial neighborhood name like \"Harlem\", \"South Bronx\", or \"Crown Heights\"\n- **city_district**: these are usually boroughs or districts within a city that serve some official purpose e.g. \"Brooklyn\" or \"Hackney\" or \"Bratislava IV\"\n- **city**: any human settlement including cities, towns, villages, hamlets, localities, etc.\n- **island**: named islands e.g. \"Maui\"\n- **state_district**: usually a second-level administrative division or county.\n- **state**: a first-level administrative division. Scotland, Northern Ireland, Wales, and England in the UK are mapped to \"state\" as well (convention used in OSM, GeoPlanet, etc.)\n- **country_region**: informal subdivision of a country without any political status\n- **country**: sovereign nations and their dependent territories, anything with an [ISO-3166 code](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FISO_3166-1_alpha-2).\n- **world_region**: currently only used for appending “West Indies” after the country name, a pattern frequently used in the English-speaking Caribbean e.g. “Jamaica, West Indies”\n\nExamples of normalization\n-------------------------\n\nThe expand_address API converts messy real-world addresses into normalized\nequivalents suitable for search indexing, hashing, etc.\n\nHere's an interactive example using the Python binding:\n\n![expand](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fopenvenues_libpostal_readme_59538734d27a.gif)\n\nlibpostal contains an OSM-trained language classifier to detect which language(s) are used in a given\naddress so it can apply the appropriate normalizations. The only input needed is the raw address string.\nHere's a short list of some less straightforward normalizations in various languages.\n\n| Input                               | Output (may be multiple in libpostal)   |\n| ----------------------------------- |-----------------------------------------|\n| One-hundred twenty E 96th St        | 120 east 96th street                    |\n| C\u002F Ocho, P.I. 4                     | calle 8 polígono industrial 4           |\n| V XX Settembre, 20                  | via 20 settembre 20                     |\n| Quatre vingt douze R. de l'Église   | 92 rue de l eglise                      |\n| ул Каретный Ряд, д 4, строение 7    | улица каретныи ряд дом 4 строение 7     |\n| ул Каретный Ряд, д 4, строение 7    | ulitsa karetnyy ryad dom 4 stroyeniye 7 |\n| Marktstraße 14                      | markt strasse 14                        |\n\nlibpostal currently supports these types of normalizations in *60+ languages*,\nand you can [add more](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Ftree\u002Fmaster\u002Fresources\u002Fdictionaries) (without having to write any C).\n\nFor further reading and some bizarre address edge-cases, see:\n[Falsehoods Programmers Believe About Addresses](https:\u002F\u002Fwww.mjt.me.uk\u002Fposts\u002Ffalsehoods-programmers-believe-about-addresses\u002F).\n\nUsage (normalization)\n---------------------\n\nHere's an example using the Python bindings for succinctness (most of the higher-level language bindings are similar):\n\n```python\nfrom postal.expand import expand_address\nexpansions = expand_address('Quatre-vingt-douze Ave des Champs-Élysées')\n\nassert '92 avenue des champs-elysees' in set(expansions)\n```\n\nThe C API equivalent is a few more lines, but still fairly simple:\n\n```c\n#include \u003Cstdio.h>\n#include \u003Cstdlib.h>\n#include \u003Clibpostal\u002Flibpostal.h>\n\nint main(int argc, char **argv) {\n    \u002F\u002F Setup (only called once at the beginning of your program)\n    if (!libpostal_setup() || !libpostal_setup_language_classifier()) {\n        exit(EXIT_FAILURE);\n    }\n\n    size_t num_expansions;\n    libpostal_normalize_options_t options = libpostal_get_default_options();\n    char **expansions = libpostal_expand_address(\"Quatre-vingt-douze Ave des Champs-Élysées\", options, &num_expansions);\n\n    for (size_t i = 0; i \u003C num_expansions; i++) {\n        printf(\"%s\\n\", expansions[i]);\n    }\n\n    \u002F\u002F Free expansions\n    libpostal_expansion_array_destroy(expansions, num_expansions);\n\n    \u002F\u002F Teardown (only called once at the end of your program)\n    libpostal_teardown();\n    libpostal_teardown_language_classifier();\n}\n```\n\nCommand-line usage (expand)\n---------------------------\n\nAfter building libpostal:\n\n```\ncd src\u002F\n\n.\u002Flibpostal \"Quatre vingt douze Ave des Champs-Élysées\"\n```\n\nIf you have a text file or stream with one address per line, the command-line interface also accepts input from stdin:\n\n```\ncat some_file | .\u002Flibpostal --json\n```\n\nCommand-line usage (parser)\n---------------------------\n\nAfter building libpostal:\n\n```\ncd src\u002F\n\n.\u002Faddress_parser\n```\n\naddress_parser is an interactive shell. Just type addresses and libpostal will\nparse them and print the result.\n\n\nBindings\n--------\n\nLibpostal is designed to be used by higher-level languages.  If you don't see your language of choice, or if you're writing a language binding, please let us know!\n\n**Officially supported language bindings**\n\n- Python: [pypostal](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fpypostal)\n- Ruby: [ruby_postal](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fruby_postal)\n- Go: [gopostal](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fgopostal)\n- Java\u002FJVM: [jpostal](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fjpostal)\n- PHP: [php-postal](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fphp-postal)\n- NodeJS: [node-postal](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fnode-postal)\n- R: [poster](https:\u002F\u002Fgithub.com\u002Fironholds\u002Fposter)\n\n**Unofficial language bindings**\n\n- Java: [javacpp-presets-libpostal](https:\u002F\u002Fgithub.com\u002Fbytedeco\u002Fjavacpp-presets\u002Ftree\u002Fmaster\u002Flibpostal)\n- LuaJIT: [lua-resty-postal](https:\u002F\u002Fgithub.com\u002Fbungle\u002Flua-resty-postal)\n- Perl: [Geo::libpostal](https:\u002F\u002Fmetacpan.org\u002Fpod\u002FGeo::libpostal)\n- Elixir: [Expostal](https:\u002F\u002Fgithub.com\u002FSweetIQ\u002Fexpostal)\n- Haskell: [haskell-postal](http:\u002F\u002Fgithub.com\u002Fnetom\u002Fhaskell-postal)\n- Rust: [rust-postal](https:\u002F\u002Fgithub.com\u002Fpnordahl\u002Frust-postal)\n- Rust: [rustpostal](https:\u002F\u002Fcrates.io\u002Fcrates\u002Frustpostal)\n\n**Unofficial database extensions**\n\n- PostgreSQL: [pgsql-postal](https:\u002F\u002Fgithub.com\u002Fpramsey\u002Fpgsql-postal)\n\n**Unofficial servers**\n\n- Libpostal REST GO Server (need ~4Gb memory) with basic security: [postal_server](https:\u002F\u002Fgithub.com\u002Fle0pard\u002Fpostal_server)\n- Libpostal REST Go Docker: [libpostal-rest-docker](https:\u002F\u002Fgithub.com\u002Fjohnlonganecker\u002Flibpostal-rest-docker)\n- Libpostal REST FastAPI Docker: [libpostal-fastapi](https:\u002F\u002Fgithub.com\u002Falpha-affinity\u002Flibpostal-fastapi)\n- Libpostal ZeroMQ Docker: [libpostal-zeromq](https:\u002F\u002Fgithub.com\u002Fpasupulaphani\u002Flibpostal-docker)\n\n\nTests\n-----\n\nlibpostal uses [greatest](https:\u002F\u002Fgithub.com\u002Fsilentbicycle\u002Fgreatest) for automated testing. To run the tests, use:\n\n```\nmake check\n```\n\nAdding [test cases](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Ftree\u002Fmaster\u002Ftest) is easy, even if your C is rusty\u002Fnon-existent, and we'd love contributions. We use mostly functional tests checking string input against string output.\n\nlibpostal also gets periodically battle-tested on millions of addresses from OSM (clean) as well as anonymized queries from a production geocoder (not so clean). During this process we use valgrind to check for memory leaks and other errors.\n\nData files\n----------\n\nlibpostal needs to download some data files from S3. The basic files are on-disk\nrepresentations of the data structures necessary to perform expansion. For address\nparsing, since model training takes a few days, we publish the fully trained model\nto S3 and will update it automatically as new addresses get added to OSM, OpenAddresses, etc. Same goes for the language classifier model.\n\nData files are automatically downloaded when you run make. To check for and download\nany new data files, you can either run ```make```, or run:\n\n```\nlibpostal_data download all $YOUR_DATA_DIR\u002Flibpostal\n```\n\nAnd replace $YOUR_DATA_DIR with whatever you passed to configure during install.\n\nLanguage dictionaries\n---------------------\n\nlibpostal contains a number of per-language dictionaries that influence expansion, the language classifier, and the parser. To explore the dictionaries or contribute abbreviations\u002Fphrases in your language, see [resources\u002Fdictionaries](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Ftree\u002Fmaster\u002Fresources\u002Fdictionaries).\n\nTraining data\n-------------\n\nIn machine learning, large amounts of training data are often essential for getting good results. Many open-source machine learning projects either release only the model code (results reproducible if and only if you're Google), or a pre-baked model where the training conditions are unknown.\n\nLibpostal is a bit different because it's trained on open data that's available to everyone, so we've released the entire training pipeline (the [geodata](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Ftree\u002Fmaster\u002Fscripts\u002Fgeodata) package in this repo), as well as the resulting training data itself on the Internet Archive. It's over 100GB unzipped.\n\nTraining data are stored on archive.org by the date they were created. There's also a file stored in the main directory of this repo called `current_parser_training_set` which stores the date of the most recently created training set. To always point to the latest data, try something like: ```latest=$(cat current_parser_training_set)``` and use that variable in place of the date.\n\n### Parser training sets ###\nAll files can be found at https:\u002F\u002Farchive.org\u002Fdownload\u002Flibpostal-parser-training-data-YYYYMMDD\u002F$FILE as gzip'd tab-separated values (TSV) files formatted like:```language\\tcountry\\taddress```.\n\n- **formatted_addresses_tagged.random.tsv.gz** (ODBL): OSM addresses. Apartments, PO boxes, categories, etc. are added primarily to these examples\n- **formatted_places_tagged.random.tsv.gz** (ODBL): every toponym in OSM (even cities represented as points, etc.), reverse-geocoded to its parent admins, possibly including postal codes if they're listed on the point\u002Fpolygon. Every place gets a base level of representation and places with higher populations get proportionally more.\n- **formatted_ways_tagged.random.tsv.gz** (ODBL): every street in OSM (ways with highway=*, with a few conditions), reverse-geocoded to its admins\n- **geoplanet_formatted_addresses_tagged.random.tsv.gz** (CC-BY): every postal code in Yahoo GeoPlanet (includes almost every postcode in the UK, Canada, etc.) and their parent admins. The GeoPlanet admins have been cleaned up and mapped to libpostal's tagset\n- **openaddresses_formatted_addresses_tagged.random.tsv.gz** (various licenses, mostly CC-BY): most of the address data sets from [OpenAddresses](https:\u002F\u002Fopenaddresses.io\u002F), which in turn come directly from government sources\n- **uk_openaddresses_formatted_addresses_tagged.random.tsv.gz** (CC-BY): addresses from [OpenAddresses UK](https:\u002F\u002Falpha.openaddressesuk.org\u002F)\n\nIf the parser doesn't perform as well as you'd hoped on a particular type of address, the best recourse is to use grep\u002Fawk to look through the training data and try to determine if there's some pattern\u002Fstyle of address that's not being captured.\n\nFeatures\n--------\n\n- **Abbreviation expansion**: e.g. expanding \"rd\" => \"road\" but for almost any\nlanguage. libpostal supports > 50 languages and it's easy to add new languages\nor expand the current dictionaries. Ideographic languages (not separated by\nwhitespace e.g. Chinese) are supported, as are Germanic languages where\nthoroughfare types are concatenated onto the end of the string, and may\noptionally be separated so Rosenstraße and Rosen Straße are equivalent.\n\n- **International address parsing**: [Conditional Random Field](https:\u002F\u002Fweb.archive.org\u002Fweb\u002F20240104172655\u002Fhttp:\u002F\u002Fblog.echen.me\u002F2012\u002F01\u002F03\u002Fintroduction-to-conditional-random-fields\u002F) which parses\n\"123 Main Street New York New York\" into {\"house_number\": 123, \"road\":\n\"Main Street\", \"city\": \"New York\", \"state\": \"New York\"}. The parser works\nfor a wide variety of countries and languages, not just US\u002FEnglish.\nThe model is trained on over 1 billion addresses and address-like strings, using the\ntemplates in the [OpenCage address formatting repo](https:\u002F\u002Fgithub.com\u002FOpenCageData\u002Faddress-formatting) to construct formatted,\ntagged training examples for every inhabited country in the world. Many types of [normalizations](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Fblob\u002Fmaster\u002Fscripts\u002Fgeodata\u002Faddresses\u002Fcomponents.py)\nare performed to make the training data resemble real messy geocoder input as closely as possible.\n\n- **Language classification**: multinomial logistic regression\ntrained (using the [FTRL-Proximal](https:\u002F\u002Fresearch.google.com\u002Fpubs\u002Farchive\u002F41159.pdf) method to induce sparsity) on all of OpenStreetMap ways, addr:* tags, toponyms and formatted\naddresses. Labels are derived using point-in-polygon tests for both OSM countries\nand official\u002Fregional languages for countries and admin 1 boundaries\nrespectively. So, for example, Spanish is the default language in Spain but\nin different regions e.g. Catalunya, Galicia, the Basque region, the respective\nregional languages are the default. Dictionary-based disambiguation is employed in\ncases where the regional language is non-default e.g. Welsh, Breton, Occitan.\nThe dictionaries are also used to abbreviate canonical phrases like \"Calle\" => \"C\u002F\"\n(performed on both the language classifier and the address parser training sets)\n\n- **Numeric expression parsing** (\"twenty first\" => 21st,\n\"quatre-vingt-douze\" => 92, again using data provided in CLDR), supports > 30\nlanguages. Handles languages with concatenated expressions e.g.\nmilleottocento => 1800. Optionally normalizes Roman numerals regardless of the\nlanguage (IX => 9) which occur in the names of many monarchs, popes, etc.\n\n- **Fast, accurate tokenization\u002Flexing**: clocked at > 1M tokens \u002F sec,\nimplements the TR-29 spec for UTF8 word segmentation, tokenizes East Asian\nlanguages character by character instead of on whitespace.\n\n- **UTF8 normalization**: optionally decompose UTF8 to NFD normalization form,\nstrips accent marks e.g. à => a and\u002For applies Latin-ASCII transliteration.\n\n- **Transliteration**: e.g. улица => ulica or ulitsa. Uses all\n[CLDR transforms](http:\u002F\u002Fwww.unicode.org\u002Frepos\u002Fcldr\u002Ftrunk\u002Fcommon\u002Ftransforms\u002F), the exact same source data as used by [ICU](http:\u002F\u002Fsite.icu-project.org\u002F),\nthough libpostal doesn't require pulling in all of ICU (might conflict\nwith your system's version). Note: some languages, particularly Hebrew, Arabic\nand Thai may not include vowels and thus will not often match a transliteration\ndone by a human. It may be possible to implement statistical transliterators\nfor some of these languages.\n\n- **Script detection**: Detects which script a given string uses (can be\nmultiple e.g. a free-form Hong Kong or Macau address may use both Han and\nLatin scripts in the same address). In transliteration we can use all\napplicable transliterators for a given Unicode script (Greek can for instance\nbe transliterated with Greek-Latin, Greek-Latin-BGN and Greek-Latin-UNGEGN).\n\nNon-goals\n---------\n\n- Verifying that a location is a valid address\n- Actually geocoding addresses to a lat\u002Flon (that requires a database\u002Fsearch index)\n- Extracting addresses from free text\n\nRaison d'être\n-------------\n\nlibpostal was originally created as part of the [OpenVenues](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fopenvenues) project to solve the problem of venue deduping. In OpenVenues, we have a data set of millions of\nplaces derived from terabytes of web pages from the [Common Crawl](http:\u002F\u002Fcommoncrawl.org\u002F).\nThe Common Crawl is published monthly, and so even merging the results of\ntwo crawls produces significant duplicates.\n\nDeduping is a relatively well-studied field, and for text documents\nlike web pages, academic papers, etc. there exist pretty decent approximate\nsimilarity methods such as [MinHash](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FMinHash).\n\nHowever, for physical addresses, the frequent use of conventional abbreviations\nsuch as Road == Rd, California == CA, or New York City == NYC complicates\nmatters a bit. Even using a technique like MinHash, which is well suited for\napproximate matches and is equivalent to the Jaccard similarity of two sets, we\nhave to work with very short texts and it's often the case that two equivalent\naddresses, one abbreviated and one fully specified, will not match very closely\nin terms of n-gram set overlap. In non-Latin scripts, say a Russian address and\nits transliterated equivalent, it's conceivable that two addresses referring to\nthe same place may not match even a single character.\n\nAs a motivating example, consider the following two equivalent ways to write a\nparticular Manhattan street address with varying conventions and degrees\nof verbosity:\n\n- 30 W 26th St Fl #7\n- 30 West Twenty-sixth Street Floor Number 7\n\nObviously '30 W 26th St Fl #7 != '30 West Twenty-sixth Street Floor Number 7'\nin a string comparison sense, but a human can grok that these two addresses\nrefer to the same physical location.\n\nlibpostal aims to create normalized geographic strings, parsed into components,\nsuch that we can more effectively reason about how well two addresses\nactually match and make automated server-side decisions about dupes.\n\nSo it's not a geocoder?\n-----------------------\n\nIf the above sounds a lot like geocoding, that's because it is in a way,\nonly in the OpenVenues case, we have to geocode without a UI or a user\nto select the correct address in an autocomplete dropdown. Given a database\nof source addresses such as OpenAddresses or OpenStreetMap (or all of the above),\nlibpostal can be used to implement things like address deduping and server-side\nbatch geocoding in settings like MapReduce or stream processing.\n\nNow, instead of trying to bake address-specific conventions into traditional\ndocument search engines like Elasticsearch using giant synonyms files, scripting,\ncustom analyzers, tokenizers, and the like, geocoding can look like this:\n\n1. Run the addresses in your database through libpostal's expand_address\n2. Store the normalized string(s) in your favorite search engine, DB,\n   hashtable, etc.\n3. Run your user queries or fresh imports through libpostal and search\n   the existing database using those strings\n\nIn this way, libpostal can perform fuzzy address matching in constant time\nrelative to the size of the data set.\n\nWhy C?\n------\n\nlibpostal is written in C for three reasons (in order of importance):\n\n1. **Portability\u002Fubiquity**: libpostal targets higher-level languages that\npeople actually use day-to-day: Python, Go, Ruby, NodeJS, etc. The beauty of C\nis that just about any programming language can bind to it and C compilers are\neverywhere, so pick your favorite, write a binding, and you can use libpostal\ndirectly in your application without having to stand up a separate server. We\nsupport Mac\u002FLinux (Windows is not a priority but happy to accept patches), have\na standard autotools build and an endianness-agnostic file format for the data\nfiles. The Python bindings, are maintained as part of this repo since they're\nneeded to construct the training data.\n\n2. **Memory-efficiency**: libpostal is designed to run in a MapReduce setting\nwhere we may be limited to \u003C 1GB of RAM per process depending on the machine\nconfiguration. As much as possible libpostal uses contiguous arrays, tries\n(built on contiguous arrays), bloom filters and compressed sparse matrices to\nkeep memory usage low. It's possible to use libpostal on a mobile device with\nmodels trained on a single country or a handful of countries.\n\n3. **Performance**: this is last on the list for a reason. Most of the\noptimizations in libpostal are for memory usage rather than performance.\nlibpostal is quite fast given the amount of work it does. It can process\n10-30k addresses \u002F second in a single thread\u002Fprocess on the platforms we've\ntested (that means processing every address in OSM planet in a little over\nan hour). Check out the simple benchmark program to test on your environment\nand various types of input. In the MapReduce setting, per-core performance\nisn't as important because everything's being done in parallel, but there are\nsome streaming ingestion applications at Mapzen where this needs to\nrun in-process.\n\nC conventions\n-------------\n\nlibpostal is written in modern, legible, C99 and uses the following conventions:\n\n- Roughly object-oriented, as much as allowed by C\n- Almost no pointer-based data structures, arrays all the way down\n- Uses dynamic character arrays (inspired by [sds](https:\u002F\u002Fgithub.com\u002Fantirez\u002Fsds)) for safer string handling\n- Confines almost all mallocs to *name*_new and all frees to *name*_destroy\n- Efficient existing implementations for simple things like hashtables\n- Generic containers (via [klib](https:\u002F\u002Fgithub.com\u002Fattractivechaos\u002Fklib)) whenever possible\n- Data structures take advantage of sparsity as much as possible\n- Efficient double-array trie implementation for most string dictionaries\n- Cross-platform as much as possible, particularly for *nix\n\nPreprocessing (Python)\n----------------------\n\nThe [geodata](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Ftree\u002Fmaster\u002Fscripts\u002Fgeodata) Python package in the libpostal repo contains the pipeline for preprocessing the various geo\ndata sets and building training data for the C models to use.\nThis package shouldn't be needed for most users, but for those interested in generating new types of addresses or improving libpostal's training data, this is where to look.\n\nAddress parser accuracy\n-----------------------\n\nOn held-out test data (meaning labeled parses that the model has _not_ seen\nbefore), the address parser achieves 99.45% full parse accuracy.\n\nFor some tasks like named entity recognition it's preferable to use something\nlike an F1 score or variants, mostly because there's a class bias problem (most\nwords are non-entities, and a system that simply predicted non-entity for\nevery token would actually do fairly well in terms of accuracy). That is not\nthe case for address parsing. Every token has a label and there are millions\nof examples of each class in the training data, so accuracy is preferable as it's\na clean, simple and intuitive measure of performance.\n\nHere we use full parse accuracy, meaning we only give the parser one \"point\" in\nthe numerator if it gets every single token in the address correct. That should\nbe a better measure than simply looking at whether each token was correct.\n\nImproving the address parser\n----------------------------\n\nThough the current parser works quite well for most standard addresses, there\nis still room for improvement, particularly in making sure the training data\nwe use is as close as possible to addresses in the wild. There are two primary\nways the address parser can be improved even further (in order of difficulty):\n\n1. Contribute addresses to OSM. Anything with an addr:housenumber tag will be\n   incorporated automatically into the parser next time it's trained.\n2. If the address parser isn't working well for a particular country, language\n   or style of address, chances are that some name variations or places being\n   missed\u002Fmislabeled during training data creation. Sometimes the fix is to\n   update the formats at: https:\u002F\u002Fgithub.com\u002FOpenCageData\u002Faddress-formatting,\n   and in many other cases there are relatively simple tweaks we can make\n   when creating the training data that will ensure the model is trained to\n   handle your use case without you having to do any manual data entry.\n   If you see a pattern of obviously bad address parses, the best thing to\n   do is post an issue to Github.\n\nContributing\n------------\n\nBug reports, issues and pull requests are welcome. Please read the [contributing guide](CONTRIBUTING.md) before submitting your issue, bug report, or pull request.\n\nSubmit issues at: https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Fissues.\n\n\nShoutouts\n---------\n\nSpecial thanks to @BenK10 for the initial Windows build and @AeroXuk for integrating it seamlessly into the project and setting up an Appveyor build.\n\nLicense\n-------\n\nThe software is available as open source under the terms of the [MIT License](http:\u002F\u002Fopensource.org\u002Flicenses\u002FMIT).\n","# libpostal：国际街道地址自然语言处理\n\n[![构建状态](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Factions\u002Fworkflows\u002Ftest.yml\u002Fbadge.svg)](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Factions)\n[![构建状态](https:\u002F\u002Fci.appveyor.com\u002Fapi\u002Fprojects\u002Fstatus\u002Fgithub\u002Fopenvenues\u002Flibpostal?branch=master&svg=true)](https:\u002F\u002Fci.appveyor.com\u002Fproject\u002Falbarrentine\u002Flibpostal\u002Fbranch\u002Fmaster)\n[![许可证](https:\u002F\u002Fimg.shields.io\u002Fgithub\u002Flicense\u002Fopenvenues\u002Flibpostal.svg)](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Fblob\u002Fmaster\u002FLICENSE)\n[![OpenCollective 赞助者](https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsors\u002Fbadge.svg)](#sponsors)\n[![OpenCollective 支持者](https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbackers\u002Fbadge.svg)](#backers)\n\nlibpostal 是一个使用统计自然语言处理和开放数据来解析\u002F规范化全球各地街道地址的 C 语言库。该项目的目标是在任何地方、用任何语言理解基于位置的字符串。如需更全面地了解 libpostal 背后的研究，请务必阅读以下介绍性博文（篇幅较长）：\n\n- **原文**：[OpenStreetMap 上的统计自然语言处理](https:\u002F\u002Fmedium.com\u002F@albarrentine\u002Fstatistical-nlp-on-openstreetmap-b9d573e6cc86)\n- **1.0 版发布后的后续文章**：[OpenStreetMap 上的统计自然语言处理：第二部分](https:\u002F\u002Fmedium.com\u002F@albarrentine\u002Fstatistical-nlp-on-openstreetmap-part-2-80405b988718)\n\n\u003Cspan>&#x1f1e7;&#x1f1f7;\u003C\u002Fspan> \u003Cspan>&#x1f1eb;&#x1f1ee;\u003C\u002Fspan>  \u003Cspan>&#x1f1f3;&#x1f1ec;\u003C\u002Fspan> :jp: \u003Cspan>&#x1f1fd;&#x1f1f0; \u003C\u002Fspan> \u003Cspan>&#x1f1e7;&#x1f1e9; \u003C\u002Fspan> \u003Cspan>&#x1f1f5;&#x1f1f1; \u003C\u002Fspan> \u003Cspan>&#x1f1fb;&#x1f1f3; \u003C\u002Fspan> \u003Cspan>&#x1f1e7;&#x1f1ea; \u003C\u002Fspan> \u003Cspan>&#x1f1f2;&#x1f1e6; \u003C\u002Fspan> \u003Cspan>&#x1f1fa;&#x1f1e6; \u003C\u002Fspan> \u003Cspan>&#x1f1ef;&#x1f1f2; \u003C\u002Fspan> :ru: \u003Cspan>&#x1f1ee;&#x1f1f3; \u003C\u002Fspan> \u003Cspan>&#x1f1f1;&#x1f1fb; \u003C\u002Fspan> \u003Cspan>&#x1f1e7;&#x1f1f4; \u003C\u002Fspan> :de: \u003Cspan>&#x1f1f8;&#x1f1f3; \u003C\u002Fspan>  \u003Cspan>&#x1f1e6;&#x1f1f2; \u003C\u002Fspan> :kr: \u003Cspan>&#x1f1f3;&#x1f1f4; \u003C\u002Fspan>  \u003Cspan>&#x1f1f2;&#x1f1fd; \u003C\u002Fspan> \u003Cspan>&#x1f1e8;&#x1f1ff; \u003C\u002Fspan> \u003Cspan>&#x1f1f9;&#x1f1f7; \u003C\u002Fspan> :es: \u003Cspan>&#x1f1f8;&#x1f1f8; \u003C\u002Fspan> \u003Cspan>&#x1f1ea;&#x1f1ea; \u003C\u002Fspan> \u003Cspan>&#x1f1e7;&#x1f1ed; \u003C\u002Fspan> \u003Cspan>&#x1f1f3;&#x1f1f1; \u003C\u002Fspan> :cn:  \u003Cspan>&#x1f1f5;&#x1f1f9; \u003C\u002Fspan> \u003Cspan>&#x1f1f5;&#x1f1f7; \u003C\u002Fspan> :gb: \u003Cspan>&#x1f1f5;&#x1f1f8; \u003C\u002Fspan>\n\n地址及其所代表的位置对于任何涉及地图的应用都至关重要（地点搜索、交通、按需\u002F配送服务、签到、评论等）。然而，即使是最简单的地址也充满了本地惯例、缩写和上下文信息，这使得它们难以通过传统的全文搜索引擎进行有效索引和查询。该库可以帮助将人类使用的自由格式地址转换为适合机器比较和全文索引的规范形式。尽管 libpostal 本身并不是一个完整的地理编码器，但它可以用作预处理步骤，使任何地理编码应用在国际范围内更加智能、简单和一致。\n\n\u003Cspan>&#x1f1f7;&#x1f1f4; \u003C\u002Fspan> \u003Cspan>&#x1f1ec;&#x1f1ed; \u003C\u002Fspan> \u003Cspan>&#x1f1e6;&#x1f1fa; \u003C\u002Fspan> \u003Cspan>&#x1f1f2;&#x1f1fe; \u003C\u002Fspan> \u003Cspan>&#x1f1ed;&#x1f1f7; \u003C\u002Fspan> \u003Cspan>&#x1f1ed;&#x1f1f9; \u003C\u002Fspan> :us: \u003Cspan>&#x1f1ff;&#x1f1e6; \u003C\u002Fspan> \u003Cspan>&#x1f1f7;&#x1f1f8; \u003C\u002Fspan> \u003Cspan>&#x1f1e8;&#x1f1f1; \u003C\u002Fspan> :it: \u003Cspan>&#x1f1f0;&#x1f1ea; \u003Cspan>&#x1f1e8;&#x1f1ed; \u003C\u002Fspan> \u003Cspan>&#x1f1e8;&#x1f1fa; \u003C\u002Fspan> \u003Cspan>&#x1f1f8;&#x1f1f0; \u003C\u002Fspan> \u003Cspan>&#x1f1e6;&#x1f1f4; \u003C\u002Fspan> \u003Cspan>&#x1f1e9;&#x1f1f0; \u003C\u002Fspan> \u003Cspan>&#x1f1f9;&#x1f1ff; \u003C\u002Fspan> \u003Cspan>&#x1f1e6;&#x1f1f1; \u003C\u002Fspan> \u003Cspan>&#x1f1e8;&#x1f1f4; \u003C\u002Fspan> \u003Cspan>&#x1f1ee;&#x1f1f1; \u003C\u002Fspan> \u003Cspan>&#x1f1ec;&#x1f1f9; \u003C\u002Fspan>  :fr: \u003Cspan>&#x1f1f5;&#x1f1ed; \u003C\u002Fspan> \u003Cspan>&#x1f1e6;&#x1f1f9; \u003C\u002Fspan> \u003Cspan>&#x1f1f1;&#x1f1e8; \u003C\u002Fspan>  \u003Cspan>&#x1f1ee;&#x1f1f8; \u003Cspan>&#x1f1ee;&#x1f1e9; \u003C\u002Fspan> \u003C\u002Fspan> \u003Cspan>&#x1f1e6;&#x1f1ea; \u003C\u002Fspan> \u003C\u002Fspan> \u003Cspan>&#x1f1f8;&#x1f1f0; \u003C\u002Fspan> \u003Cspan>&#x1f1f9;&#x1f1f3; \u003C\u002Fspan> \u003Cspan>&#x1f1f0;&#x1f1ed; \u003C\u002Fspan> \u003Cspan>&#x1f1e6;&#x1f1f7; \u003C\u002Fspan> \u003Cspan>&#x1f1ed;&#x1f1f0; \u003C\u002Fspan>\n\n核心库采用纯 C 语言编写。官方支持 [Python](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fpypostal)、[Ruby](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fruby_postal)、[Go](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fgopostal)、[Java](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fjpostal)、[PHP](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fphp-postal) 和 [NodeJS](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fnode-postal) 的语言绑定，并且也很容易为其他语言编写绑定。\n\n赞助商\n--------\n\n如果贵公司正在使用 libpostal，请考虑让贵组织赞助该项目。理解人类在提及地点时的意图远未解决，而赞助有助于我们探索地理空间自然语言处理的新领域。作为赞助商，贵公司的 logo 将 prominently 显示在 GitHub 仓库页面上，并附带指向贵公司网站的链接。[赞助信息](https:\u002F\u002Fopencollective.com\u002Flibpostal#sponsor)\n\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F0\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F0\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F1\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F1\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F2\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F2\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F3\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F3\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F4\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F4\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F5\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F5\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F6\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F6\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F7\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F7\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F8\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F8\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F9\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F9\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F10\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F10\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F11\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F11\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F12\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F12\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F13\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F13\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F14\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F14\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F15\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F15\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F16\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F16\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F17\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F17\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F18\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F18\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F19\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F19\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F20\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F20\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F21\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F21\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F22\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F22\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F23\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F23\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F24\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F24\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F25\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F25\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F26\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F26\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F27\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F27\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F28\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F28\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F29\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fsponsor\u002F29\u002Favatar.svg\">\u003C\u002Fa>\n\n支持者\n------------\n\n个人用户也可以通过每月捐款来支持开源地理自然语言处理研究：\n\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F0\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F0\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F1\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F1\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F2\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F2\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F3\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F3\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F4\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F4\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F5\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F5\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F6\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F6\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F7\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F7\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F8\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F8\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F9\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F9\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F10\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F10\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F11\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F11\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F12\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F12\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F13\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F13\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F14\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F14\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F15\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F15\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F16\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F16\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F17\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F17\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F18\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F18\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F19\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F19\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F20\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F20\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F21\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F21\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F22\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F22\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F23\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F23\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F24\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F24\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F25\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F25\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F26\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F26\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F27\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F27\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F28\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F28\u002Favatar.svg\">\u003C\u002Fa>\n\u003Ca href=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F29\u002Fwebsite\" target=\"_blank\">\u003Cimg src=\"https:\u002F\u002Fopencollective.com\u002Flibpostal\u002Fbacker\u002F29\u002Favatar.svg\">\u003C\u002Fa>\n\n安装（Mac\u002FLinux）\n------------------------\n\n在安装之前，请确保您已满足以下先决条件：\n\n**在 Ubuntu\u002FDebian 上**\n```\nsudo apt-get install -y curl build-essential autoconf automake libtool pkg-config\n```\n\n**在 CentOS\u002FRHEL 上**\n```\nsudo yum install curl autoconf automake libtool pkgconfig\n```\n\n**在 macOS 上**\n\n通过 [MacPorts](https:\u002F\u002Fwww.macports.org\u002F) 用一条命令安装：\n```\nport install libpostal\n```\n\n或者使用 [Homebrew](https:\u002F\u002Fbrew.sh\u002F)：\n\n```\nbrew install libpostal\n```\n\n要从源代码编译 C 库：\n\n如果您使用的是 M1 Mac，请在 `.\u002Fconfigure` 命令中添加 `--disable-sse2`。这会导致性能下降，但构建过程仍能成功完成。\n\n```\ngit clone https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\ncd libpostal\n\n\n\n# 如果是首次安装，则跳过此步骤\nmake distclean\n\n.\u002Fbootstrap.sh\n\n# 省略 --datadir 标志以将数据安装到当前目录\n.\u002Fconfigure --datadir=[...某个有几 GB 空间且存在或可创建\u002F修改 \"libpostal\" 目录的路径...]\nmake -j4\n\n# 对于 Intel\u002FAMD 处理器和默认模型\n.\u002Fconfigure --datadir=[...某个有几 GB 空间且存在或可创建\u002F修改 \"libpostal\" 目录的路径...]\n\n# 对于 Apple \u002F ARM CPU 和默认模型\n.\u002Fconfigure --datadir=[...某个有几 GB 空间且存在或可创建\u002F修改 \"libpostal\" 目录的路径...] --disable-sse2\n\n# 对于改进的 Senzing 模型：\n.\u002Fconfigure --datadir=[...某个有几 GB 空间且存在或可创建\u002F修改 \"libpostal\" 目录的路径...] MODEL=senzing\n\nmake -j8\nsudo make install\n\n# 在 Linux 上，最好运行\nsudo ldconfig\n```\n\nlibpostal 支持 pkg-config，因此您可以使用 pkg-config 打印出链接您的程序所需的标志：\n\n```\npkg-config --cflags libpostal         # 打印编译器标志\npkg-config --libs libpostal           # 打印链接器标志\npkg-config --cflags --libs libpostal  # 同时打印两者\n```\n\n例如，如果您编写了一个名为 app.c 的程序，可以这样编译它：\n\n```\ngcc app.c `pkg-config --cflags --libs libpostal`\n```\n\n安装（Windows）\n----------------------\n\n**MSys2\u002FMinGW**\n\n对于 Windows，目前的构建流程需要 MSys2 和 MinGW。这些工具可以从 http:\u002F\u002Fmsys2.org 下载。请按照 MSys2 官网上的说明进行安装。\n\n请确保 MSYS2 已更新到最新版本，运行以下命令：\n```\npacman -Syu\n```\n\n安装以下依赖项：\n```\npacman -S autoconf automake curl git make libtool gcc mingw-w64-x86_64-gcc\n```\n\n然后构建 C 库：\n```\ngit clone https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\ncd libpostal\ncp -rf windows\u002F* .\u002F\n.\u002Fbootstrap.sh\n.\u002Fconfigure --datadir=[...某个有几GB空间的目录...]\nmake -j4\nmake install\n```\n\n注意：设置 `datadir` 时，`C:` 盘应输入为 `\u002Fc`。libpostal 的构建脚本会在路径末尾自动添加 `libpostal`，因此在 Windows 上 `\u002Fc` 将变为 `C:\\libpostal\\`。\n\n编译后的 `.dll` 文件将位于 `src\u002F.libs\u002F` 目录下，文件名为 `libpostal-1.dll`。\n\n如果你需要一个 `.lib` 导入库来链接到你的应用程序，可以使用 Visual Studio 的 `lib.exe` 工具和 `libpostal.def` 定义文件生成：\n```\nlib.exe \u002Fdef:libpostal.def \u002Fout:libpostal.lib \u002Fmachine:x64\n```\n\n使用替代数据模型进行安装\n-------------------------------------------\n\nlibpostal 提供了一个替代数据模型，由 Senzing 公司创建，用于改进对美国、英国和新加坡地址的解析，并优化美国农村路线地址的处理。要启用此模型，在安装时的配置命令中添加 `MODEL=senzing`：\n```\n.\u002Fconfigure --datadir=[...某个有几GB空间的目录...] MODEL=senzing\n```\n\n该模型的数据来源于 [OpenAddress](https:\u002F\u002Fopenaddresses.io\u002F)、[OpenStreetMap](https:\u002F\u002Fwww.openstreetmap.org\u002F) 以及 Senzing 根据客户反馈生成的数据（约几百条记录），总计来自超过 230 个国家、100 多种语言的约 12 亿条记录。虽然 OpenStreetMap 和 OpenAddress 的数据质量较高，但并不完美，因此通过过滤掉格式错误的地址、纠正分类错误的地址标记以及移除不属于地址的标记等方式对数据集进行了修正。\n\nSenzing 还创建了一个包含来自 89 个国家 12,950 条地址的数据集，用于测试和验证其模型的质量。该数据集主要基于 OSM 中的随机地址，每个国家至少包含 50 条地址。此外，Senzing 支持团队和客户提供的难以解析的地址，以及从 libpostal GitHub 页面收集的地址也被加入到该数据集中。使用此测试集，Senzing 模型的解析准确率比默认模型高出 4.3%。\n\n该模型的大小约为 2.2GB，而默认模型为 1.8GB，因此如果存储空间有限，请务必考虑这一点。\n\n有关此数据模型的更多信息，请访问：https:\u002F\u002Fgithub.com\u002FSenzing\u002Flibpostal-data\n\n如果您在此模型的解析、安装或其他方面遇到任何问题，请在 https:\u002F\u002Fgithub.com\u002FSenzing\u002Flibpostal-data 上提交问题报告。\n\n解析示例\n-------------------\n\nlibpostal 的国际地址解析器采用机器学习技术（条件随机场），并基于地球上所有有人居住国家的超过 10 亿条地址进行训练。我们以 [OpenStreetMap](https:\u002F\u002Fopenstreetmap.org) 和 [OpenAddresses](https:\u002F\u002Fopenaddresses.io) 作为结构化地址的来源，并结合 OpenCage 地址格式模板（位于 https:\u002F\u002Fgithub.com\u002FOpenCageData\u002Faddress-formatting）构建训练数据，同时补充包含多边形信息，并生成公寓号、楼层号和邮政信箱等建筑物内部组件。此外，我们还会引入缩写、随机删除某些组件等方法，以使解析器能够更好地应对现实世界中混乱无序的输入。\n\n这些解析示例取自与 libpostal 一起编译的交互式 `address_parser` 程序，当你运行 `make` 命令时会生成该程序。请注意，解析器可以处理带逗号或不带逗号的情况，以及各种大小写和组件排列组合（例如，仅输入城市或仅输入城市\u002F邮编）。\n\n![parser](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fopenvenues_libpostal_readme_648869e82e4a.gif)\n\n该解析器在保留数据上的准确率非常高，目前完整解析的正确率达到 99.45%，即每解析 100 条地址，就有 99 条地址的所有标记都能被正确识别。\n\n使用方法（解析器）\n--------------\n\n以下是使用 Python 绑定调用解析器 API 的示例：\n\n```python\n\nfrom postal.parser import parse_address\nparse_address('The Book Club 100-106 Leonard St Shoreditch London EC2A 4RH, United Kingdom')\n```\n\n以下是使用 C API 的示例：\n\n```c\n#include \u003Cstdio.h>\n#include \u003Cstdlib.h>\n#include \u003Clibpostal\u002Flibpostal.h>\n\nint main(int argc, char **argv) {\n    \u002F\u002F 初始化（只需在程序开始时调用一次）\n    if (!libpostal_setup() || !libpostal_setup_parser()) {\n        exit(EXIT_FAILURE);\n    }\n\n    libpostal_address_parser_options_t options = libpostal_get_address_parser_default_options();\n    libpostal_address_parser_response_t *parsed = libpostal_parse_address(\"781 Franklin Ave Crown Heights Brooklyn NYC NY 11216 USA\", options);\n\n    for (size_t i = 0; i \u003C parsed->num_components; i++) {\n        printf(\"%s: %s\\n\", parsed->labels[i], parsed->components[i]);\n    }\n\n    \u002F\u002F 释放解析结果\n    libpostal_address_parser_response_destroy(parsed);\n\n    \u002F\u002F 清理（只需在程序结束时调用一次）\n    libpostal_teardown();\n    libpostal_teardown_parser();\n}\n```\n\n解析标签\n-------------\n\n地址解析器理论上可以使用训练数据中定义的任何字符串标签，但目前定义的标签主要基于 [OpenCage 地址格式库](https:\u002F\u002Fgithub.com\u002FOpenCageData\u002Faddress-formatting) 中的字段，此外 libpostal 还添加了一些用于处理特定模式的标签：\n\n- **house**: 场馆名称，例如“布鲁克林音乐学院”，以及建筑物名称，例如“帝国大厦”\n- **category**: 用于类别查询，如“餐馆”等。\n- **near**: 类似“在……内”、“靠近……”等短语，通常跟在类别短语之后，帮助解析类似“布鲁克林的餐馆”这样的查询。\n- **house_number**: 一般指建筑物的外部（面向街道）门牌号。在某些国家，这可能是一个包含公寓号或街区号的复合、带连字符的号码，但为了简化起见，libpostal 仍将其称为 house_number。\n- **road**: 街道名称\n- **unit**: 公寓、单元、办公室、地块或其他次要单位标识符\n- **level**: 表示楼层的表达方式，例如“3楼”、“地面层”等。\n- **staircase**: 编号或字母标记的楼梯\n- **entrance**: 编号或字母标记的入口\n- **po_box**: 邮政信箱：通常出现在非实体地址中（仅用于邮寄）\n- **postcode**: 用于邮件分拣的邮政编码\n- **suburb**: 通常是非官方的社区名称，如“哈莱姆”、“南布朗克斯”或“皇冠高地”\n- **city_district**: 这些通常是城市内的区或行政区，具有某种官方用途，例如“布鲁克林”、“哈克尼”或“布拉迪斯拉发四区”\n- **city**: 任何人类聚居地，包括城市、城镇、村庄、小村落、地方等。\n- **island**: 有名称的岛屿，例如“茂宜岛”\n- **state_district**: 通常是第二级行政分区或县。\n- **state**: 第一级行政分区。英国的苏格兰、北爱尔兰、威尔士和英格兰也被归类为“state”（OSM、GeoPlanet 等采用的惯例）。\n- **country_region**: 国家内部的非正式划分，不具有任何政治地位\n- **country**: 主权国家及其附属领土，任何拥有 [ISO-3166 代码](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FISO_3166-1_alpha-2) 的实体。\n- **world_region**: 目前仅用于在国家名称后附加“西印度群岛”，这是英语加勒比地区常用的一种模式，例如“牙买加，西印度群岛”。\n\n规范化示例\n-------------------------\n\nexpand_address API 将杂乱无章的真实世界地址转换为适合搜索索引、哈希等操作的规范化等效形式。\n\n以下是使用 Python 绑定的交互式示例：\n\n![expand](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fopenvenues_libpostal_readme_59538734d27a.gif)\n\nlibpostal 包含一个基于 OSM 训练的语言分类器，用于检测给定地址中使用的语言，以便应用适当的规范化规则。所需的唯一输入就是原始地址字符串。以下是一些不同语言中较为复杂的规范化示例。\n\n| 输入                               | 输出（libpostal 中可能有多个）   |\n| ----------------------------------- |-----------------------------------------|\n| One-hundred twenty E 96th St        | 120 east 96th street                    |\n| C\u002F Ocho, P.I. 4                     | calle 8 polígono industrial 4           |\n| V XX Settembre, 20                  | via 20 settembre 20                     |\n| Quatre vingt douze R. de l'Église   | 92 rue de l eglise                      |\n| ул Каретный Ряд, д 4, строение 7    | улица каретныи ряд дом 4 строение 7     |\n| ул Каретный Ряд, д 4, строение 7    | ulitsa karetnyy ryad dom 4 stroyeniye 7 |\n| Marktstraße 14                      | markt strasse 14                        |\n\n目前，libpostal 支持 *60 多种语言* 的此类规范化，并且您可以[添加更多](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Ftree\u002Fmaster\u002Fresources\u002Fdictionaries)（无需编写任何 C 代码）。\n\n如需进一步阅读及一些奇特的地址边缘案例，请参阅：\n[程序员对地址的误解](https:\u002F\u002Fwww.mjt.me.uk\u002Fposts\u002Ffalsehoods-programmers-believe-about-addresses\u002F)。\n\n用法（规范化）\n---------------------\n\n以下是使用 Python 绑定的示例，以简洁明了为主（大多数高级语言绑定也类似）：\n\n```python\nfrom postal.expand import expand_address\nexpansions = expand_address('Quatre-vingt-douze Ave des Champs-Élysées')\n\nassert '92 avenue des champs-elysees' in set(expansions)\n```\n\nC API 的等效代码虽然多几行，但仍然相当简单：\n\n```c\n#include \u003Cstdio.h>\n#include \u003Cstdlib.h>\n#include \u003Clibpostal\u002Flibpostal.h>\n\nint main(int argc, char **argv) {\n    \u002F\u002F 设置（只需在程序开始时调用一次）\n    if (!libpostal_setup() || !libpostal_setup_language_classifier()) {\n        exit(EXIT_FAILURE);\n    }\n\n    size_t num_expansions;\n    libpostal_normalize_options_t options = libpostal_get_default_options();\n    char **expansions = libpostal_expand_address(\"Quatre-vingt-douze Ave des Champs-Élysées\", options, &num_expansions);\n\n    for (size_t i = 0; i \u003C num_expansions; i++) {\n        printf(\"%s\\n\", expansions[i]);\n    }\n\n    \u002F\u002F 释放扩张结果\n    libpostal_expansion_array_destroy(expansions, num_expansions);\n\n    \u002F\u002F 清理（只需在程序结束时调用一次）\n    libpostal_teardown();\n    libpostal_teardown_language_classifier();\n}\n```\n\n命令行用法（expand）\n---------------------------\n\n构建 libpostal 后：\n\n```\ncd src\u002F\n\n.\u002Flibpostal \"Quatre vingt douze Ave des Champs-Élysées\"\n```\n\n如果您有一个每行包含一个地址的文本文件或流，命令行界面也支持从标准输入读取：\n\n```\ncat some_file | .\u002Flibpostal --json\n```\n\n命令行用法（parser）\n---------------------------\n\n构建 libpostal 后：\n\n```\ncd src\u002F\n\n.\u002Faddress_parser\n```\n\naddress_parser 是一个交互式 shell。您只需输入地址，libpostal 就会解析它们并打印结果。\n\n\n绑定\n--------\n\nLibpostal 专为高级语言设计。如果您没有找到自己偏好的语言绑定，或者您正在开发一种语言绑定，请告知我们！\n\n**官方支持的语言绑定**\n\n- Python: [pypostal](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fpypostal)\n- Ruby: [ruby_postal](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fruby_postal)\n- Go: [gopostal](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fgopostal)\n- Java\u002FJVM: [jpostal](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fjpostal)\n- PHP: [php-postal](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fphp-postal)\n- NodeJS: [node-postal](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fnode-postal)\n- R: [poster](https:\u002F\u002Fgithub.com\u002Fironholds\u002Fposter)\n\n**非官方语言绑定**\n\n- Java: [javacpp-presets-libpostal](https:\u002F\u002Fgithub.com\u002Fbytedeco\u002Fjavacpp-presets\u002Ftree\u002Fmaster\u002Flibpostal)\n- LuaJIT: [lua-resty-postal](https:\u002F\u002Fgithub.com\u002Fbungle\u002Flua-resty-postal)\n- Perl: [Geo::libpostal](https:\u002F\u002Fmetacpan.org\u002Fpod\u002FGeo::libpostal)\n- Elixir: [Expostal](https:\u002F\u002Fgithub.com\u002FSweetIQ\u002Fexpostal)\n- Haskell: [haskell-postal](http:\u002F\u002Fgithub.com\u002Fnetom\u002Fhaskell-postal)\n- Rust: [rust-postal](https:\u002F\u002Fgithub.com\u002Fpnordahl\u002Frust-postal)\n- Rust: [rustpostal](https:\u002F\u002Fcrates.io\u002Fcrates\u002Frustpostal)\n\n**非官方数据库扩展**\n\n- PostgreSQL: [pgsql-postal](https:\u002F\u002Fgithub.com\u002Fpramsey\u002Fpgsql-postal)\n\n**非官方服务器**\n\n- Libpostal REST Go 服务器（需要约4GB内存）并具备基本安全功能：[postal_server](https:\u002F\u002Fgithub.com\u002Fle0pard\u002Fpostal_server)\n- Libpostal REST Go Docker 镜像：[libpostal-rest-docker](https:\u002F\u002Fgithub.com\u002Fjohnlonganecker\u002Flibpostal-rest-docker)\n- Libpostal REST FastAPI Docker 镜像：[libpostal-fastapi](https:\u002F\u002Fgithub.com\u002Falpha-affinity\u002Flibpostal-fastapi)\n- Libpostal ZeroMQ Docker 镜像：[libpostal-zeromq](https:\u002F\u002Fgithub.com\u002Fpasupulaphani\u002Flibpostal-docker)\n\n\n测试\n-----\n\nlibpostal 使用 [greatest](https:\u002F\u002Fgithub.com\u002Fsilentbicycle\u002Fgreatest) 进行自动化测试。要运行测试，可以使用以下命令：\n\n```\nmake check\n```\n\n即使你的 C 语言基础较弱或完全不熟悉，添加 [测试用例](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Ftree\u002Fmaster\u002Ftest) 也非常简单，我们非常欢迎贡献。我们的测试主要采用功能测试，即通过对比字符串输入与输出来验证结果。\n\n此外，libpostal 还会定期在来自 OSM（干净数据）以及生产级地理编码器的匿名查询（相对复杂的数据）中进行大规模压力测试。在此过程中，我们会使用 valgrind 工具检查内存泄漏及其他潜在错误。\n\n数据文件\n----------\n\nlibpostal 需要从 S3 下载一些数据文件。这些基础文件是用于地址展开操作所需数据结构的磁盘存储表示。对于地址解析部分，由于模型训练可能需要数天时间，我们会在 S3 上发布经过完整训练的模型，并随着 OSM、OpenAddresses 等数据源新增地址时自动更新该模型。语言分类器模型也同样如此。\n\n在执行 `make` 命令时，数据文件会自动下载。若需检查并下载最新数据文件，可以运行 `make`，或者执行以下命令：\n\n```\nlibpostal_data download all $YOUR_DATA_DIR\u002Flibpostal\n```\n\n其中 `$YOUR_DATA_DIR` 应替换为您在安装时通过 `configure` 指定的路径。\n\n语言词典\n--------\n\nlibpostal 包含多个针对不同语言的词典，这些词典会影响地址展开、语言分类及解析器的行为。如需浏览这些词典或贡献您所在语言的缩写和短语，请参阅 [resources\u002Fdictionaries](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Ftree\u002Fmaster\u002Fresources\u002Fdictionaries) 目录。\n\n训练数据\n--------\n\n在机器学习领域，大量高质量的训练数据往往是取得良好效果的关键。许多开源机器学习项目要么只公开模型代码（只有谷歌才能复现结果），要么直接提供预训练好的模型，而训练的具体条件则对外保密。\n\nlibpostal 则有所不同，它基于对所有人开放的公开数据进行训练，因此我们不仅公开了完整的训练流程（本仓库中的 [geodata](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Ftree\u002Fmaster\u002Fscripts\u002Fgeodata) 包），还将最终的训练数据上传至 Internet Archive，解压后容量超过 100GB。\n\n训练数据按创建日期分别存储于 archive.org 上。此外，在本仓库的主目录下还有一个名为 `current_parser_training_set` 的文件，记录着最新训练集的创建日期。为了始终指向最新的数据，您可以使用类似以下的命令：```latest=$(cat current_parser_training_set)```，然后将该变量用于指定日期。\n\n### 解析器训练集 ###\n所有文件均可在 https:\u002F\u002Farchive.org\u002Fdownload\u002Flibpostal-parser-training-data-YYYYMMDD\u002F$FILE 找到，格式为 gzip 压缩的制表符分隔值（TSV）文件，内容形式如下：```language\\tcountry\\taddress```。\n\n- **formatted_addresses_tagged.random.tsv.gz**（ODBL）：OSM 中的地址数据。公寓、邮政信箱、类别等信息主要添加到这些示例中。\n- **formatted_places_tagged.random.tsv.gz**（ODBL）：OSM 中的所有地名（包括以点表示的城市等），均已反向地理编码至其上级行政区，若点或多边形上标注有邮政编码，则一并包含在内。每个地点都有最低限度的代表性，人口较多的地方则会分配更多样本。\n- **formatted_ways_tagged.random.tsv.gz**（ODBL）：OSM 中的所有街道（具有 highway=* 属性的道路，满足若干条件），均已反向地理编码至其上级行政区。\n- **geoplanet_formatted_addresses_tagged.random.tsv.gz**（CC-BY）：Yahoo GeoPlanet 中的所有邮政编码及其对应的上级行政区。GeoPlanet 的行政区信息已被清理并映射到 libpostal 的标签体系。\n- **openaddresses_formatted_addresses_tagged.random.tsv.gz**（多种许可，多数为 CC-BY）：来自 [OpenAddresses](https:\u002F\u002Fopenaddresses.io\u002F) 的大部分地址数据，这些数据直接来源于政府机构。\n- **uk_openaddresses_formatted_addresses_tagged.random.tsv.gz**（CC-BY）：来自 [OpenAddresses UK](https:\u002F\u002Falpha.openaddressesuk.org\u002F) 的地址数据。\n\n如果解析器在处理某类特定地址时表现不佳，最好的办法是使用 grep 或 awk 工具浏览训练数据，尝试找出是否存在未被捕捉到的地址模式或风格。\n\n功能特性\n--------\n\n- **缩写展开**：例如将“rd”扩展为“road”，几乎适用于任何语言。libpostal 支持超过 50 种语言，且易于添加新语言或扩充现有词典。支持非空格分隔的语言（如中文），也支持日耳曼语系中道路类型直接附加在地址末尾的情况；同时允许选择是否保留空格分隔，从而实现 Rosenstraße 和 Rosen Straße 的等价性。\n- **国际地址解析**：采用 [条件随机场](https:\u002F\u002Fweb.archive.org\u002Fweb\u002F20240104172655\u002Fhttp:\u002F\u002Fblog.echen.me\u002F2012\u002F01\u002F03\u002Fintroduction-to-conditional-random-fields\u002F) 技术，可将“123 Main Street New York New York”解析为 {\"house_number\": 123, \"road\": \"Main Street\", \"city\": \"New York\", \"state\": \"New York\"}。该解析器适用于多种国家和语言，而不仅限于美国英语环境。模型基于超过 10 亿条地址及相关字符串进行训练，使用 [OpenCage 地址格式化库](https:\u002F\u002Fgithub.com\u002FOpenCageData\u002Faddress-formatting) 中的模板，为全球各有人居住的国家构建格式化、带标签的训练样本。为使训练数据尽可能接近真实的混乱地理编码输入，还进行了多种 [归一化处理](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Fblob\u002Fmaster\u002Fscripts\u002Fgeodata\u002Faddresses\u002Fcomponents.py)。\n\n- **语言分类**：多项逻辑回归模型，使用 [FTRL-Proximal](https:\u002F\u002Fresearch.google.com\u002Fpubs\u002Farchive\u002F41159.pdf) 方法进行训练以引入稀疏性，训练数据涵盖 OpenStreetMap 中的所有道路、addr:* 标签、地名及格式化地址。标签通过点在多边形内测试获得，分别针对 OSM 国家以及各国和一级行政区的官方或地区语言。例如，在西班牙，西班牙语是默认语言，但在加泰罗尼亚、加利西亚、巴斯克地区等不同区域，各自的地方语言才是默认语言。当地方语言非默认语言时（如威尔士语、布列塔尼语、奥克西坦语），会采用基于词典的消歧方法。这些词典还用于缩写规范短语，比如“Calle” => “C\u002F”，这一处理同时应用于语言分类器和地址解析器的训练集。\n\n- **数字表达式解析**（“twenty first” => 21st，“quatre-vingt-douze” => 92，同样基于 CLDR 提供的数据），支持超过 30 种语言。能够处理包含复合表达式的语言，例如 milleottocento => 1800。还可选择对罗马数字进行标准化处理，无论语言如何（IX => 9），这在许多君主、教皇等的名称中常见。\n\n- **快速、准确的分词\u002F词法分析**：速度超过每秒 100 万个词元，遵循 TR-29 规范进行 UTF-8 分词，并以字符为单位对东亚语言进行分词，而非按空格分隔。\n\n- **UTF-8 规范化**：可选将 UTF-8 转换为 NFD 规范形式，去除重音符号（如 à => a）和\u002F或进行拉丁—ASCII 转写。\n\n- **音译**：例如 улица => ulica 或 ulitsa。使用所有 [CLDR 转换规则](http:\u002F\u002Fwww.unicode.org\u002Frepos\u002Fcldr\u002Ftrunk\u002Fcommon\u002Ftransforms\u002F)——与 [ICU](http:\u002F\u002Fsite.icu-project.org\u002F) 使用的源数据完全相同——但 libpostal 并不需要引入整个 ICU 库（以免与系统版本冲突）。注意：部分语言，尤其是希伯来语、阿拉伯语和泰语，可能不包含元音，因此其音译结果往往难以与人工音译一致。未来或许可以为这些语言开发统计型音译工具。\n\n- **文字脚本检测**：识别给定字符串所使用的文字脚本（可能包含多种脚本，例如香港或澳门的自由格式地址可能在同一地址中同时使用汉字和拉丁字母）。在音译过程中，我们可以针对特定 Unicode 脚本应用所有适用的音译规则（例如，希腊文可以分别用希腊—拉丁、希腊—拉丁—BGN 和希腊—拉丁—UNGEGN 等方式进行音译）。\n\n非目标\n---------\n\n- 验证某个位置是否为有效地址\n- 将地址实际地理编码为经纬度坐标（这需要数据库和搜索索引）\n- 从自由文本中提取地址\n\n存在意义\n-------------\n\nlibpostal 最初是作为 [OpenVenues](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fopenvenues) 项目的一部分而创建的，旨在解决场所重复数据清理的问题。在 OpenVenues 中，我们拥有一个由数百万个地点组成的数据库，这些地点来源于来自 [Common Crawl](http:\u002F\u002Fcommoncrawl.org\u002F) 的数 TB 网页数据。由于 Common Crawl 每月发布一次，即使合并两次抓取的结果也会产生大量重复数据。\n\n去重是一个研究较为充分的领域，对于网页、学术论文等文本文档，已有相当不错的近似相似度算法，例如 [MinHash](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FMinHash)。\n\n然而，对于实体地址而言，由于经常使用诸如 Road == Rd、California == CA 或 New York City == NYC 等约定俗成的缩写，情况就变得复杂一些。即便使用像 MinHash 这样适用于近似匹配且等同于两个集合 Jaccard 相似度的方法，我们也只能处理非常短的文本，而且通常情况下，两个含义相同的地址——一个缩写、另一个完整表述——在 n 元组集合的重叠程度上并不会十分接近。而在非拉丁文字系统中，比如一条俄语地址及其音译版本，即使是指向同一地点的两条地址，也可能连一个字符都对不上。\n\n举个例子，以下是两种书写方式不同的曼哈顿街道地址，它们在语法和详略程度上各有差异，但实际上指代的是同一个地点：\n\n- 30 W 26th St Fl #7\n- 30 West Twenty-sixth Street Floor Number 7\n\n显然，在字符串比较层面，“30 W 26th St Fl #7”并不等于“30 West Twenty-sixth Street Floor Number 7”，但人类却能理解这两条地址指的是同一处物理位置。\n\nlibpostal 的目标是生成规范化后的地理字符串，并将其解析为各个组成部分，从而更有效地判断两条地址的实际匹配程度，以便在服务器端自动做出去重决策。\n\n那么它不是地理编码工具吗？\n-----------------------\n\n如果上述描述听起来很像地理编码，那确实如此。只不过在 OpenVenues 的场景下，我们需要在没有用户界面、也没有用户通过自动补全下拉菜单选择正确地址的情况下完成地理编码。借助诸如 OpenAddresses 或 OpenStreetMap 等原始地址数据库（或两者兼备），libpostal 可以用于实现地址去重以及在 MapReduce 或流处理等环境中进行服务器端批量地理编码。\n\n现在，与其尝试通过庞大的同义词文件、脚本、自定义分析器、分词器等方式，将地址特有的约定嵌入到 Elasticsearch 等传统文档搜索引擎中，不如让地理编码按照以下步骤进行：\n\n1. 将数据库中的地址输入 libpostal 的 expand_address 函数；\n2. 将规范化后的字符串存储在您喜爱的搜索引擎、数据库、哈希表等中；\n3. 对用户的查询或新导入的数据运行 libpostal 处理，并使用这些字符串在现有数据库中进行搜索。\n\n通过这种方式，libpostal 可以在与数据集大小无关的时间复杂度内完成模糊地址匹配。\n\n为何选择 C 语言？\n------\n\nlibpostal 使用 C 语言编写主要有三个原因（按重要性排序）：\n\n1. **可移植性\u002F普适性**：libpostal 面向的是人们日常使用的高级编程语言，如 Python、Go、Ruby、NodeJS 等。C 语言的优势在于几乎任何编程语言都可以与之绑定，且 C 编译器无处不在。因此，您可以选择自己喜欢的语言编写绑定代码，直接在应用程序中使用 libpostal，而无需搭建独立的服务器。我们支持 Mac 和 Linux 系统（Windows 不是优先考虑对象，但欢迎接受补丁提交），并采用标准的 autotools 构建流程以及字节序无关的数据文件格式。Python 绑定代码则作为本仓库的一部分维护，因为它们是构建训练数据所必需的。\n\n2. **内存效率**：libpostal 专为 MapReduce 环境设计，在该环境中，根据机器配置，每个进程的可用内存可能受限于 \u003C 1GB。为了尽可能降低内存占用，libpostal 大量使用连续数组、基于连续数组的前缀树（Trie）、布隆过滤器以及压缩稀疏矩阵等数据结构。即使在仅训练了单个国家或少数几个国家模型的情况下，也可以在移动设备上运行 libpostal。\n\n3. **性能**：性能之所以排在最后是有原因的。libpostal 的大部分优化都集中在内存使用上，而非单纯追求速度。尽管如此，考虑到其处理的工作量，libpostal 的速度仍然相当快。在我们测试过的平台上，单线程\u002F单进程每秒可处理 1–3 万个地址（这意味着只需略超过一小时即可处理完 OSM 全球数据集中的所有地址）。您可以使用简单的基准测试程序，在自己的环境中和不同类型的输入上进行测试。在 MapReduce 环境中，单核性能并不那么重要，因为所有任务都是并行执行的；但在 Mapzen 的一些流式数据导入应用中，仍需要在进程中高效运行。\n\nC 语言编码规范\n-------------\n\nlibpostal 使用现代且易读的 C99 语言编写，并遵循以下编码规范：\n\n- 尽可能以 C 语言实现面向对象风格；\n- 几乎不使用基于指针的数据结构，全程使用数组；\n- 采用动态字符数组（受 [sds](https:\u002F\u002Fgithub.com\u002Fantirez\u002Fsds) 启发）来更安全地处理字符串；\n- 将几乎所有 malloc 操作限制在 *name*_new 函数中，而所有释放操作则集中于 *name*_destroy 函数；\n- 对哈希表等简单数据结构采用高效的现有实现；\n- 尽可能使用通用容器（通过 [klib](https:\u002F\u002Fgithub.com\u002Fattractivechaos\u002Fklib) 库）；\n- 数据结构尽可能利用稀疏性特点；\n- 对大多数字符串词典采用高效的双数组 Trie 实现；\n- 尽可能实现跨平台兼容，尤其是针对类 Unix 系统。\n\n预处理（Python）\n----------------------\n\nlibpostal 仓库中的 [geodata](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Ftree\u002Fmaster\u002Fscripts\u002Fgeodata) Python 包包含用于预处理各类地理数据集并构建 C 模型训练数据的完整流程。对于大多数用户而言，此包并非必需；但对于希望生成新型地址或改进 libpostal 训练数据的用户来说，这里正是所需之处。\n\n地址解析器准确率\n-----------------------\n\n在保留的测试数据上（即模型从未见过的已标注解析结果），地址解析器的完整解析准确率达到 99.45%。\n\n对于命名实体识别等任务，通常更适合使用 F1 分数或其他变体，主要是因为存在类别偏差问题（大多数词语并非实体，如果系统简单地将每个标记都预测为非实体，其准确率反而会很高）。然而，地址解析并不属于这种情况。每个标记都有明确的标签，且训练数据中每种类别都有数百万个示例，因此准确率是一种更为清晰、简单且直观的性能衡量标准。\n\n在此，我们采用完整解析准确率，即只有当解析器正确识别出地址中的每一个标记时，才会在分子中计为“1 分”。这种度量方式比单纯检查每个标记是否正确更为合理。\n\n改进地址解析器\n--------------------\n\n尽管当前的解析器对大多数标准地址表现良好，但仍有一定改进空间，尤其是在确保所使用的训练数据尽可能贴近真实环境中的地址方面。进一步提升地址解析器性能主要有两种途径（按难度排序）：\n\n1. 向 OSM 贡献地址数据。凡是带有 addr:housenumber 标签的地址，下次训练解析器时都会自动纳入其中。\n2. 如果地址解析器对某个特定国家、语言或地址格式表现不佳，很可能是在创建训练数据时遗漏或错误标注了一些名称变体或地点。有时只需更新 https:\u002F\u002Fgithub.com\u002FOpenCageData\u002Faddress-formatting 中的地址格式定义即可解决问题；而在许多其他情况下，只需在创建训练数据时进行一些相对简单的调整，就能确保模型被训练成能够处理您的具体用例，而无需您手动录入任何数据。如果您发现明显的地址解析错误模式，请直接在 GitHub 上提交问题。\n\n贡献\n-----\n\n欢迎提交 bug 报告、问题和拉取请求。请在提交问题、bug 报告或拉取请求之前阅读 [贡献指南](CONTRIBUTING.md)。\n\n问题提交地址：https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Fissues。\n\n致谢\n---------\n\n特别感谢 @BenK10 完成了最初的 Windows 构建工作，以及 @AeroXuk 将其无缝集成到项目中，并搭建了 Appveyor 构建系统。\n\n许可证\n-------\n\n本软件以开源形式发布，遵循 [MIT 许可证](http:\u002F\u002Fopensource.org\u002Flicenses\u002FMIT) 的条款。","# libpostal 快速上手指南\n\nlibpostal 是一个用纯 C 编写的高性能库，利用统计自然语言处理（NLP）和开放数据，对全球各地的街道地址进行解析和标准化。它能够将人类输入的非结构化地址转换为机器可比较的规范化格式，是地理编码、地图搜索和物流应用的理想预处理工具。\n\n## 环境准备\n\n### 系统要求\n- **操作系统**：Linux (Ubuntu\u002FDebian, CentOS\u002FRHEL), macOS, Windows (通过 MSYS2 或 WSL)\n- **编译器**：GCC 或 Clang\n- **构建工具**：`make`, `autoconf`, `automake`, `libtool`\n\n### 前置依赖\n在开始之前，请确保安装以下依赖项：\n\n**Ubuntu \u002F Debian:**\n```bash\nsudo apt-get install -y autoconf automake git curl build-essential\n```\n\n**CentOS \u002F RHEL:**\n```bash\nsudo yum install -y autoconf automake git curl gcc make libtool\n```\n\n**macOS (需先安装 Homebrew):**\n```bash\nbrew install autoconf automake curl libtool\n```\n\n## 安装步骤\n\n由于原始仓库构建数据较大且下载可能较慢，建议按以下步骤操作。目前官方未提供专门的中国镜像源，但可以通过配置代理或手动下载数据加速。\n\n### 1. 克隆源码\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal.git\ncd libpostal\n```\n\n### 2. 配置与编译\n运行引导脚本并配置安装路径（建议安装到 `\u002Fusr\u002Flocal` 以便系统调用）：\n\n```bash\n.\u002Fbootstrap.sh\n.\u002Fconfigure --datadir=$(pwd)\u002Fdata\nmake -j4\nsudo make install\n```\n\n> **注意**：首次运行时，libpostal 会自动下载约 2GB+ 的地址训练数据到 `data` 目录。如果网络受阻，构建过程可能会卡住或失败。请确保网络通畅，或配置 HTTP 代理。\n\n### 3. 更新动态链接库缓存\n安装完成后，刷新系统库缓存以确保能正确找到 libpostal：\n\n```bash\nsudo ldconfig\n```\n\n## 基本使用\n\n### 命令行工具测试\n安装成功后，可以使用内置的 `postal` 命令行工具进行快速测试。\n\n**解析地址（将地址拆分为组件）：**\n```bash\necho \"北京市朝阳区建国路 87 号\" | postal\n```\n*输出示例：*\n```text\n{\n  \"house_number\": \"87\",\n  \"road\": \"建国路\",\n  \"suburb\": \"朝阳区\",\n  \"city\": \"北京市\",\n  \"country\": \"中国\"\n}\n```\n\n**标准化地址（统一格式用于比对）：**\n```bash\necho \"No. 87 Jianguo Road, Chaoyang District, Beijing\" | postal -s\n```\n*输出示例：*\n```text\nno 87 jianguo road chaoyang district beijing\n```\n\n### 代码集成\nlibpostal 官方支持多种语言绑定，推荐直接使用对应语言的封装库而非直接调用 C 接口：\n\n- **Python**: `pip install pypostal`\n- **Node.js**: `npm install node-postal`\n- **Go**: `go get github.com\u002Fopenvenues\u002Fgopostal`\n- **Java**: 参考 [jpostal](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fjpostal)\n- **PHP**: 参考 [php-postal](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Fphp-postal)\n\n**Python 使用示例：**\n```python\nfrom postal.parser import parse_address\nfrom postal.normalize import normalize_address\n\naddress = \"北京市海淀区中关村大街 1 号\"\nparsed = parse_address(address)\nprint(parsed) \n# 输出：[('1', 'house_number'), ('中关村大街', 'road'), ('海淀区', 'suburb'), ('北京市', 'city')]\n\nnormalized = normalize_address(address)\nprint(normalized)\n# 输出：hai dian qu zhong guan cun da jie 1 hao bei jing shi\n```","一家跨国即时配送平台在整合全球骑手与商家数据时，面临海量非标准化地址导致的派单失败难题。\n\n### 没有 libpostal 时\n- **解析错误率高**：面对\"北京市朝阳区建国路 88 号\"或\"123 Main St, Apt 4B, NY\"等混合了缩写、大小写不一的自由文本，传统正则规则难以准确拆分街道、门牌和单元号，导致系统无法识别具体位置。\n- **多语言支持匮乏**：业务拓展至日本、德国或俄罗斯时，因缺乏针对当地语言习惯（如日语地址从大到小、德语街道后缀变体）的解析能力，新市场的地址数据几乎无法自动化处理。\n- **数据去重困难**：同一地点因用户输入习惯不同（如\"St.\"与\"Street\"、\"Rd\"与\"Road\"）被记录为多条差异巨大的数据，造成地图上的重复标记和运力浪费。\n- **地理编码成本高**：直接将脏数据发送给收费的地理编码 API，不仅因格式不规范导致大量请求失败，还因无效调用产生了高昂的账单。\n\n### 使用 libpostal 后\n- **结构化解析精准**：libpostal 利用统计自然语言处理技术，能自动将混乱的地址文本清洗并拆分为标准的“道路名”、“门牌号”、“城市”等字段，无论输入多么随意都能输出机器可读的统一格式。\n- **全球语言无缝覆盖**：内置对全球数十种语言和开放地理数据的支持，无需为每个新国家编写特定规则，即可完美处理从中文到阿拉伯语的各种地址 conventions。\n- **标准化实现去重**：通过将不同写法的地址归一化为标准形式（例如统一将\"Blvd.\"转换为\"Boulevard\"），系统能轻松识别并合并重复记录，显著提升地图数据的准确性。\n- **提升下游效率**：作为地理编码的前置预处理步骤，libpostal 大幅提高了后续定位服务的成功率，减少了无效 API 调用，使整体派单系统的响应速度更快且成本更低。\n\nlibpostal 通过将全球复杂的自由文本地址转化为标准化的机器语言，成为跨国物流与地图应用实现高效、低成本全球扩张的核心基础设施。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fopenvenues_libpostal_03f4ef35.png","openvenues","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fopenvenues_3f317d6f.png","An open source project sponsored by Mapzen.",null,"https:\u002F\u002Fgithub.com\u002Fopenvenues",[80,84,87,91,95,98],{"name":81,"color":82,"percentage":83},"C","#555555",97.7,{"name":85,"color":86,"percentage":32},"Python","#3572A5",{"name":88,"color":89,"percentage":90},"C++","#f34b7d",0.2,{"name":92,"color":93,"percentage":94},"Shell","#89e051",0.1,{"name":96,"color":77,"percentage":97},"M4",0,{"name":99,"color":100,"percentage":97},"Makefile","#427819",4773,466,"2026-04-16T20:45:27","MIT",4,"Linux, macOS, Windows","未说明",{"notes":109,"python":110,"dependencies":111},"核心库由纯 C 语言编写。README 中未列出具体的内存或 GPU 需求，也未指定具体的 Python 版本（仅提供 Python 绑定链接）。该工具主要用于地址解析和标准化，基于统计 NLP，通常不需要 GPU 加速。安装通常需要源码编译。","未说明 (核心库为 C 语言，Python 仅为官方支持的绑定语言之一)",[112,113,114,115,116,117],"C 编译器 (gcc\u002Fclang)","autoconf","automake","libtool","pkg-config","curl",[16,119,14,13,35,45,15,120],"其他","音频",[122,123,124,125,126,127,128,129,130,131],"address-parser","machine-learning","nlp","address","international","c","deduplication","record-linkage","deduping","natural-language-processing","2026-03-27T02:49:30.150509","2026-04-18T02:20:33.360015",[135,140,145,150,155,160],{"id":136,"question_zh":137,"answer_zh":138,"source_url":139},38521,"如何在 Windows 上使用 libpostal？","官方原生不支持 Windows，但可以通过以下方式使用：\n1. 使用第三方包 `pypostalwin`（Python 绑定），它提供了预编译的 DLL 文件。\n2. 如果不需要 Python 绑定，可以从 `pypostalwin` 仓库下载重建后的数据包（rebuilt bundle）直接使用。\n3. 对于 .NET 用户，可以使用 AppVeyor 生成的 x64 DLL 文件配合现有的 .NET 包装器在生产环境中使用（无需安装 MinGW）。注意 x86 版本可能存在依赖 gcc 的问题，建议优先使用 x64 版本。\n相关资源：https:\u002F\u002Fgithub.com\u002Fselva221724\u002Fpypostalwin","https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Fissues\u002F219",{"id":141,"question_zh":142,"answer_zh":143,"source_url":144},38522,"为什么 libpostal 无法正确解析公寓号或单元号（如 Suite, Apt）？","这是一个已知的局限性。libpostal 中的次要单元信息（如公寓号）主要是随机生成的，且必须带有前缀短语（如 \"#\", \"Apt\", \"Suite\" 等）才能被模型识别。\n如果地址中缺少这些前缀（例如直接写 \"123 Main St, 456\"），模型很难将其识别为单元号，往往会错误地将其归类为道路名称的一部分或忽略。\n解决方法：在将数据输入 libpostal 之前，尝试通过正则表达式或其他规则预处理地址，人为添加缺失的单元前缀（如将 \", 456\" 改为 \" Apt 456\"），以提高解析准确率。","https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Fissues\u002F125",{"id":146,"question_zh":147,"answer_zh":148,"source_url":149},38523,"在 Docker 或某些环境中运行地址解析器时报错 \"could not find parser model file\" 怎么办？","该错误通常表示地址解析器的模型文件丢失或未正确加载。\n解决方案：\n1. 确保在构建或运行容器时，libpostal 的数据文件（data files）已正确下载并放置在预期目录中。\n2. 如果是重新构建镜像后出现此问题，可能是新版本的数据文件路径或命名发生了变化，请检查是否需要手动下载最新的模型文件。\n3. 对于 Windows 用户，如果 Docker 方案持续失败，建议尝试直接使用原生的 Windows 构建版本（见 Windows 支持相关问题）。","https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Fissues\u002F175",{"id":151,"question_zh":152,"answer_zh":153,"source_url":154},38524,"在 RedHat\u002FCentOS 旧版本上安装时遇到 make 错误或数据文件问题如何解决？","在 RedHat EL 7.3 等旧系统上，由于默认的软件源中 `automake` 和 `pkgconfig` 版本过老，可能导致编译失败或数据文件校验错误。\n解决方案：\n1. 尝试手动下载最新的 libpostal 数据文件（模型文件）。\n2. 检查 S3 链接是否可用，如果官方链接失效，可寻找社区维护的镜像或从其他成功构建的环境中复制数据文件到安装目录。\n3. 确保构建工具链版本满足要求，必要时需手动升级 `autoconf`, `automake`, `libtool` 等工具到较新版本（如 automake 1.15+）。","https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Fissues\u002F287",{"id":156,"question_zh":157,"answer_zh":158,"source_url":159},38525,"能否按国家或语言拆分 libpostal 的数据文件以减小体积（用于移动端）？","社区用户曾提出此需求以便在移动端使用。虽然官方未直接提供按需拆分的二进制包，但用户可以通过自行训练模型来实现。\n解决方法：用户可以尝试仅使用特定国家或语言的数据集来重新训练 libpostal 模型，从而生成较小的、针对特定区域的数据文件。具体训练指南可参考项目文档或相关讨论线程中的详细步骤。","https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Fissues\u002F132",{"id":161,"question_zh":162,"answer_zh":163,"source_url":144},38526,"如何将 libpostal 与地理编码服务（如 NYC Geoclient）结合使用？","推荐的工作流程是先用 libpostal 对地址进行标准化和解析，然后再传递给地理编码服务。\n具体实践：\n1. 使用 Docker 部署 `libpostal-rest-docker` 和 `geoclient-docker`。\n2. 在本地处理大量地址数据（如数十万条），先调用 libpostal 清洗和结构化地址，再发送给 Geoclient 进行地理编码。\n3. 这种方式可以避免公共 API 的速率限制，并提高本地处理的效率和隐私性。",[165,170,175,180,185,190,195],{"id":166,"version":167,"summary_zh":168,"released_at":169},310389,"v1.1","本次发布为 libpostal 的 C 语言 API 新增了三组重要功能，以支持 [lieu](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flieu) 地址\u002F场所去重项目。目前这些 API 层次仍较为底层，但在广泛的地理应用中仍然具有实用价值，尤其是在大规模数据集的批量地理编码场景中。这标志着此前关于地址扩展的一些工作已落地实现。\n\n# 近似重复检测\n\n近似重复哈希基于 `expand_address` 功能，能够将解析后的地址哈希成适合直接比较和自动聚类的字符串。这些哈希键用于在进行两两去重之前将相似记录分组，从而避免对每条记录都与其他所有记录进行比较（即 N² 次比较）。相反，如果我们有一个函数，可以为可能的重复记录生成相同的哈希键（例如“100 Main”和“100 E Main St”），同时又具有高度的选择性，就能确保大多数重复记录会被捕获并送往下一级进一步比对，而差异较大的记录则可被安全地判定为非重复。在 MapReduce 场景中，近似重复哈希可用作键，以保证可能的重复记录会被分到同一分片上进行两两核对；而在搜索或数据库场景下，它们则可用作索引，快速查找潜在的重复候选，然后再对与该哈希匹配的少数记录执行更细致的比较。这是去重流程的第一步，用于识别潜在的重复记录，可以被视为记录链接中的阻塞函数（一种高选择性的阻塞函数）或近似重复检测领域中的局部敏感哈希的一种形式。libpostal 的近似重复哈希结合了库中多项新特性：\n\n1. **地址根词扩展**：移除街道名称中可忽略的标记，如“Ave”、“Pl”、“Road”等，从而使“West 125th St”有可能与“W 125”匹配。此外，它还支持公寓号的精确比较，例如“Apt 2”和“#2”表示相同含义。每个地址组成部分都会使用 libpostal 中的特定词典来判断哪些内容可忽略、哪些不可忽略。尽管这种方法是基于规则且确定性的，但它仍能在许多复杂情况下准确识别出正确的根词，比如“Avenue Rd”、“Avenue E”、“E St SE”、“E Ctr St”等。虽然目前使用的测试用例多为英语，但 libpostal 的词典结构使其在全球范围内也能较好地工作，例如匹配西班牙语街道名称：在政府数据集中可能会包含“Calle”，但在日常口语或用户自报地址中却很少使用。对于街道名称，我们还会进一步去除根词扩展结果中的空白字符，因此“Sea Grape Ln”和“Seagrape Ln”都会被归一化为“seagrape”。\n\n2. **姓名的音似匹配**：针对以拉丁字母书写的场所\u002F地点\u002F公司名称的近似重复哈希，采用了经过修改的双元音方法。","2018-05-09T18:37:43",{"id":171,"version":172,"summary_zh":173,"released_at":174},310390,"v1.0.0","大型解析器与数据合并工作已顺利完成。Libpostal 1.0 引入了前所未有的国际地址解析器，在保留测试集地址上实现了 99.45% 的完整解析准确率。发布名称致敬了一部电视剧（Libpostal 同样诞生于布鲁克林，这也是首个解析准确率突破 99% 的模型版本）。详细信息请参阅 [博客文章](https:\u002F\u002Fmedium.com\u002F@albarrentine\u002Fstatistical-nlp-on-openstreetmap-part-2-80405b988718)。以下是一个 GIF 动图，展示了它的实际效果：\n\n![parser](https:\u002F\u002Fcloud.githubusercontent.com\u002Fassets\u002F238455\u002F24703087\u002Facbe35d8-19cf-11e7-8850-77fb1c3446a7.gif)\n\n## API 破坏性变更 ##\n- 公共头文件（libpostal.h）中定义的所有函数、结构体、常量等现在都使用 `libpostal_` 前缀。这将影响所有调用 C API 的绑定库。本 GitHub 组织下的绑定库均已推出 1.0 分支。\n\n## 新增标签 ##\n\n### 次级建筑标签 ###\n- **unit**：公寓、单元、办公室、地块或其他次级单元标识符\n- **level**：表示楼层的表达式，如“3楼”、“底层”等\n- **staircase**：编号或字母标记的楼梯间\n- **entrance**：编号或字母标记的入口\n- **po_box**：邮政信箱，通常出现在非实体地址（仅邮寄地址）中\n\n### 类别标签 ###\n- **category**：用于类别查询，如“餐厅”等\n- **near**：在类别短语后使用的“在……”、“靠近……”等介词短语，有助于解析类似“布鲁克林的餐厅”这样的查询\n\n### 新增行政区域标签 ###\n- **island**：有名称的岛屿，如“茂宜岛”\n- **country_region**：国家内部的一种非正式分区，不具有任何政治地位\n- **world_region**：目前仅用于在国家名称后添加“西印度群岛”，这是英语加勒比地区常见的格式，例如“牙买加，西印度群岛”\n\n## 不再对输入进行去重音或转写处理 ##\n\n现引入一个新的转写器，它仅对输入执行简单的修改（HTML 实体规范化、NFC Unicode 规范化）。运行时不再进行拉丁—ASCII 转写。取而代之的是，在训练阶段将地址转写为多种形式，使解析器能够直接处理所有变体，而不是像以前那样先统一到单一形式（而在芬兰语、土耳其语等语言中，这种统一方式本身并不准确）。这一改进使得解析器能够在两个环节都更好地应对多样化的输入。\n\n## 训练数据覆盖全球所有有人居住的国家，超过 10 亿条样本 ##\n\nLibpostal 解析器的训练数据得到了极大扩充，现已包含 OpenStreetMap 中的所有国家及属地。此外，我们还使用了一个仅包含地点的数据集，其中 OSM 中的每个城市名称都会有所体现，即使该城市没有具体地址（人口较多的城市会根据其人口规模分配相应数量的样本）。针对街道也构建了类似的训练集，因此即便某些地方地址稀少，但只要 OSM 上存在路网，这些地点同样可以被纳入训练范围。\n\n1.0 版本进一步扩展了数据来源，除了 OpenStreetMap 外，还使用了 [OpenAddresses](openaddresses","2017-04-07T21:48:36",{"id":176,"version":177,"summary_zh":178,"released_at":179},310391,"v0.3.4","修复了 https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Fissues\u002F160 中提到的小内存泄漏问题。\n","2017-02-09T20:04:33",{"id":181,"version":182,"summary_zh":183,"released_at":184},310392,"v0.3.3","此版本新增了在运行时配置数据目录的功能。\n- libpostal_setup_datadir(char *dir)\n- libpostal_setup_language_classifier_datadir(char *dir)\n- libpostal_setup_parser_datadir(char *dir)\n","2017-01-09T22:28:58",{"id":186,"version":187,"summary_zh":188,"released_at":189},310393,"v0.3.2","已将 libpostal 的 [parser-data](https:\u002F\u002Fgithub.com\u002Fopenvenues\u002Flibpostal\u002Ftree\u002Fparser-data) 分支中的部分提交合并到 master 分支，以修复从 master 分支进行地址解析器训练的问题。\n\n此次合并恰逢 parser-data 分支中生成的部分解析器训练数据的发布：\n\n1. **OSM 训练地址（27GB，ODBL 许可）**  \n   [下载链接](https:\u002F\u002Flibpostal.s3.amazonaws.com\u002Ftraining_data\u002F2016-12-12\u002Fformatted_addresses_tagged.random.tsv)  \n   这是用于训练 libpostal 的原始数据集的大幅改进版本。\n\n2. **OSM 格式化地名\u002F行政区划信息（4GB，ODBL 许可）**  \n   [下载链接](https:\u002F\u002Flibpostal.s3.amazonaws.com\u002Ftraining_data\u002F2016-12-12\u002Fformatted_places_tagged.random.tsv)  \n   有助于确保某个国家的所有地名（城市、郊区等）都包含在 libpostal 的训练集中，即使该地没有对应的地址记录。\n\n3. **GeoPlanet 邮政编码与行政区划信息（11GB，CC-BY 许可）**  \n   [下载链接](https:\u002F\u002Flibpostal.s3.amazonaws.com\u002Ftraining_data\u002F2016-12-12\u002Fgeoplanet_formatted_addresses_tagged.random.tsv)  \n   包含来自全球的大量邮政编码，其中包括英国超过 100 万个邮政编码及其对应的行政区划信息。如果在 master 分支上进行训练，这部分数据可能有所帮助，但也仍然高度依赖 GeoNames 数据来获取邮政编码信息。\n\n4. **OpenAddresses 训练地址（30GB，多种许可）**  \n   [下载链接](https:\u002F\u002Flibpostal.s3.amazonaws.com\u002Ftraining_data\u002F2016-12-12\u002Fopenaddresses_formatted_addresses_tagged.random.tsv)  \n   这是迄今为止最大的数据集。它并非收录 OpenAddresses 中的所有数据源，而是仅选取了适合导入 libpostal 的那些。数据主要集中在北美地区，同时也覆盖了许多欧盟国家。大多数数据源只需注明出处即可使用，少数则带有“相同方式共享”条款。更多详情请参阅 [openaddresses.io](https:\u002F\u002Fopenaddresses.io\u002F)。\n\n建议用户对这些数据进行质量检查，以发现潜在的问题模式等。请注意：虽然现在可以使用这些数据的子集来训练特定国家或语言的解析器，但我们不会为自定义解析器提供支持。\n\n本次发布的版本以世界上最大的火车站命名：名古屋站，详情请见：[维基百科 - 名古屋站](https:\u002F\u002Fen.wikipedia.org\u002Fwiki\u002FNagoya_Station)。","2016-12-20T19:04:55",{"id":191,"version":192,"summary_zh":193,"released_at":194},310394,"v0.3.1","- 数据下载脚本将 S3 文件拆分为 64MB 的分块（与 awscli 中的分块方式一致），并使用进程池\n- 用户提交的各类词典更新","2016-08-29T19:31:33",{"id":196,"version":197,"summary_zh":198,"released_at":199},310395,"v0.3","libpostal 的初始发布，如 [介绍性博客文章](https:\u002F\u002Fmedium.com\u002F@albarrentine\u002Fstatistical-nlp-on-openstreetmap-b9d573e6cc86) 所述。","2016-05-26T17:04:51"]