[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-fugue-project--fugue":3,"tool-fugue-project--fugue":64},[4,17,27,35,43,56],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":16},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,3,"2026-04-05T11:01:52",[13,14,15],"开发框架","图像","Agent","ready",{"id":18,"name":19,"github_repo":20,"description_zh":21,"stars":22,"difficulty_score":23,"last_commit_at":24,"category_tags":25,"status":16},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",138956,2,"2026-04-05T11:33:21",[13,15,26],"语言模型",{"id":28,"name":29,"github_repo":30,"description_zh":31,"stars":32,"difficulty_score":23,"last_commit_at":33,"category_tags":34,"status":16},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",107662,"2026-04-03T11:11:01",[13,14,15],{"id":36,"name":37,"github_repo":38,"description_zh":39,"stars":40,"difficulty_score":23,"last_commit_at":41,"category_tags":42,"status":16},3704,"NextChat","ChatGPTNextWeb\u002FNextChat","NextChat 是一款轻量且极速的 AI 助手，旨在为用户提供流畅、跨平台的大模型交互体验。它完美解决了用户在多设备间切换时难以保持对话连续性，以及面对众多 AI 模型不知如何统一管理的痛点。无论是日常办公、学习辅助还是创意激发，NextChat 都能让用户随时随地通过网页、iOS、Android、Windows、MacOS 或 Linux 端无缝接入智能服务。\n\n这款工具非常适合普通用户、学生、职场人士以及需要私有化部署的企业团队使用。对于开发者而言，它也提供了便捷的自托管方案，支持一键部署到 Vercel 或 Zeabur 等平台。\n\nNextChat 的核心亮点在于其广泛的模型兼容性，原生支持 Claude、DeepSeek、GPT-4 及 Gemini Pro 等主流大模型，让用户在一个界面即可自由切换不同 AI 能力。此外，它还率先支持 MCP（Model Context Protocol）协议，增强了上下文处理能力。针对企业用户，NextChat 提供专业版解决方案，具备品牌定制、细粒度权限控制、内部知识库整合及安全审计等功能，满足公司对数据隐私和个性化管理的高标准要求。",87618,"2026-04-05T07:20:52",[13,26],{"id":44,"name":45,"github_repo":46,"description_zh":47,"stars":48,"difficulty_score":23,"last_commit_at":49,"category_tags":50,"status":16},2268,"ML-For-Beginners","microsoft\u002FML-For-Beginners","ML-For-Beginners 是由微软推出的一套系统化机器学习入门课程，旨在帮助零基础用户轻松掌握经典机器学习知识。这套课程将学习路径规划为 12 周，包含 26 节精炼课程和 52 道配套测验，内容涵盖从基础概念到实际应用的完整流程，有效解决了初学者面对庞大知识体系时无从下手、缺乏结构化指导的痛点。\n\n无论是希望转型的开发者、需要补充算法背景的研究人员，还是对人工智能充满好奇的普通爱好者，都能从中受益。课程不仅提供了清晰的理论讲解，还强调动手实践，让用户在循序渐进中建立扎实的技能基础。其独特的亮点在于强大的多语言支持，通过自动化机制提供了包括简体中文在内的 50 多种语言版本，极大地降低了全球不同背景用户的学习门槛。此外，项目采用开源协作模式，社区活跃且内容持续更新，确保学习者能获取前沿且准确的技术资讯。如果你正寻找一条清晰、友好且专业的机器学习入门之路，ML-For-Beginners 将是理想的起点。",84991,"2026-04-05T10:45:23",[14,51,52,53,15,54,26,13,55],"数据工具","视频","插件","其他","音频",{"id":57,"name":58,"github_repo":59,"description_zh":60,"stars":61,"difficulty_score":10,"last_commit_at":62,"category_tags":63,"status":16},3128,"ragflow","infiniflow\u002Fragflow","RAGFlow 是一款领先的开源检索增强生成（RAG）引擎，旨在为大语言模型构建更精准、可靠的上下文层。它巧妙地将前沿的 RAG 技术与智能体（Agent）能力相结合，不仅支持从各类文档中高效提取知识，还能让模型基于这些知识进行逻辑推理和任务执行。\n\n在大模型应用中，幻觉问题和知识滞后是常见痛点。RAGFlow 通过深度解析复杂文档结构（如表格、图表及混合排版），显著提升了信息检索的准确度，从而有效减少模型“胡编乱造”的现象，确保回答既有据可依又具备时效性。其内置的智能体机制更进一步，使系统不仅能回答问题，还能自主规划步骤解决复杂问题。\n\n这款工具特别适合开发者、企业技术团队以及 AI 研究人员使用。无论是希望快速搭建私有知识库问答系统，还是致力于探索大模型在垂直领域落地的创新者，都能从中受益。RAGFlow 提供了可视化的工作流编排界面和灵活的 API 接口，既降低了非算法背景用户的上手门槛，也满足了专业开发者对系统深度定制的需求。作为基于 Apache 2.0 协议开源的项目，它正成为连接通用大模型与行业专有知识之间的重要桥梁。",77062,"2026-04-04T04:44:48",[15,14,13,26,54],{"id":65,"github_repo":66,"name":67,"description_en":68,"description_zh":69,"ai_summary_zh":69,"readme_en":70,"readme_zh":71,"quickstart_zh":72,"use_case_zh":73,"hero_image_url":74,"owner_login":75,"owner_name":76,"owner_avatar_url":77,"owner_bio":78,"owner_company":79,"owner_location":79,"owner_email":80,"owner_twitter":79,"owner_website":79,"owner_url":81,"languages":82,"stars":102,"forks":103,"last_commit_at":104,"license":105,"difficulty_score":23,"env_os":106,"env_gpu":106,"env_ram":106,"env_deps":107,"category_tags":120,"github_topics":121,"view_count":23,"oss_zip_url":79,"oss_zip_packed_at":79,"status":16,"created_at":130,"updated_at":131,"faqs":132,"releases":162},2901,"fugue-project\u002Ffugue","fugue","A unified interface for distributed computing. Fugue executes SQL, Python, Pandas, and Polars code on Spark, Dask and Ray without any rewrites.","Fugue 是一个专为分布式计算设计的统一接口工具，旨在打破不同大数据引擎之间的壁垒。它允许开发者直接使用熟悉的 Python、Pandas、Polars 代码或 SQL 语句，在 Spark、Dask 和 Ray 等主流分布式框架上运行，而无需对原有代码进行大规模重写。\n\n在传统开发中，将本地脚本迁移到分布式环境往往需要学习特定的 API 并重构大量逻辑，过程繁琐且容易出错。Fugue 完美解决了这一痛点，让用户能够以最小的改动成本，轻松实现现有代码的并行化与横向扩展。无论是处理海量数据的工程师，还是希望提升计算效率的数据科学家，都能利用 Fugue 快速构建端到端的工作流。\n\nFugue 的核心亮点在于其简洁的 API 设计（如 `transform` 函数）以及独特的 FugueSQL。后者不仅支持标准 SQL 操作，还能直接在查询中调用 Python 函数，实现了声明式语言与命令式代码的无缝融合。通过屏蔽底层引擎的复杂性，Fugue 让技术人员可以专注于业务逻辑本身，灵活选择最适合的计算后端，从而显著提升开发效率与系统可维护性。","# \u003Cimg src=\".\u002Fimages\u002Flogo.svg\" width=\"200\">\n\n[![PyPI version](https:\u002F\u002Fbadge.fury.io\u002Fpy\u002Ffugue.svg)](https:\u002F\u002Fpypi.python.org\u002Fpypi\u002Ffugue\u002F)\n[![PyPI pyversions](https:\u002F\u002Fimg.shields.io\u002Fpypi\u002Fpyversions\u002Ffugue.svg)](https:\u002F\u002Fpypi.python.org\u002Fpypi\u002Ffugue\u002F)\n[![PyPI license](https:\u002F\u002Fimg.shields.io\u002Fpypi\u002Fl\u002Ffugue.svg)](https:\u002F\u002Fpypi.python.org\u002Fpypi\u002Ffugue\u002F)\n[![codecov](https:\u002F\u002Fcodecov.io\u002Fgh\u002Ffugue-project\u002Ffugue\u002Fgraph\u002Fbadge.svg?token=ZO9YD5N3IA)](https:\u002F\u002Fcodecov.io\u002Fgh\u002Ffugue-project\u002Ffugue)\n[![Codacy Badge](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffugue-project_fugue_readme_adaaa9495e23.png)](https:\u002F\u002Fwww.codacy.com\u002Fgh\u002Ffugue-project\u002Ffugue\u002Fdashboard?utm_source=github.com&utm_medium=referral&utm_content=fugue-project\u002Ffugue&utm_campaign=Badge_Grade)\n[![Downloads](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffugue-project_fugue_readme_d525d2a8b119.png)](https:\u002F\u002Fpepy.tech\u002Fproject\u002Ffugue)\n\n| Tutorials                                                                                           | API Documentation                                                                     | Chat with us on slack!                                                                                                   |\n| --------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------ |\n| [![Jupyter Book Badge](https:\u002F\u002Fjupyterbook.org\u002Fbadge.svg)](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002F) | [![Doc](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffugue-project_fugue_readme_6bf48b3e9a6d.png)](https:\u002F\u002Ffugue.readthedocs.org) | [![Slack Status](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002Fslack-join_chat-white.svg?logo=slack&style=social)](http:\u002F\u002Fslack.fugue.ai) |\n\n\n**Fugue is a unified interface for distributed computing that lets users execute Python, Pandas, and SQL code on Spark, Dask, and Ray with minimal rewrites**.\n\nFugue is most commonly used for:\n\n*   **Parallelizing or scaling existing Python and Pandas code** by bringing it to Spark, Dask, or Ray with minimal rewrites.\n*   Using [FugueSQL](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fquick_look\u002Ften_minutes_sql.html) to **define end-to-end workflows** on top of Pandas, Spark, and Dask DataFrames. FugueSQL is an enhanced SQL interface that can invoke Python code.\n\nTo see how Fugue compares to other frameworks like dbt, Arrow, Ibis, PySpark Pandas, see the [comparisons](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002F#how-does-fugue-compare-to)\n\n## [Fugue API](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fquick_look\u002Ften_minutes.html)\n\nThe Fugue API is a collection of functions that are capable of running on Pandas, Spark, Dask, and Ray. The simplest way to use Fugue is the [`transform()` function](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fbeginner\u002Ftransform.html). This lets users parallelize the execution of a single function by bringing it to Spark, Dask, or Ray. In the example below, the `map_letter_to_food()` function takes in a mapping and applies it on a column. This is just Pandas and Python so far (without Fugue).\n\n```python\nimport pandas as pd\nfrom typing import Dict\n\ninput_df = pd.DataFrame({\"id\":[0,1,2], \"value\": ([\"A\", \"B\", \"C\"])})\nmap_dict = {\"A\": \"Apple\", \"B\": \"Banana\", \"C\": \"Carrot\"}\n\ndef map_letter_to_food(df: pd.DataFrame, mapping: Dict[str, str]) -> pd.DataFrame:\n    df[\"value\"] = df[\"value\"].map(mapping)\n    return df\n```\n\nNow, the `map_letter_to_food()` function is brought to the Spark execution engine by invoking the `transform()` function of Fugue. The output `schema` and `params` are passed to the `transform()` call. The `schema` is needed because it's a requirement for distributed frameworks. A schema of `\"*\"` below means all input columns are in the output.\n\n```python\nfrom pyspark.sql import SparkSession\nfrom fugue import transform\n\nspark = SparkSession.builder.getOrCreate()\nsdf = spark.createDataFrame(input_df)\n\nout = transform(sdf,\n               map_letter_to_food,\n               schema=\"*\",\n               params=dict(mapping=map_dict),\n               )\n# out is a Spark DataFrame\nout.show()\n```\n```rst\n+---+------+\n| id| value|\n+---+------+\n|  0| Apple|\n|  1|Banana|\n|  2|Carrot|\n+---+------+\n```\n\n\u003Cdetails>\n  \u003Csummary>PySpark equivalent of Fugue transform()\u003C\u002Fsummary>\n\n  ```python\nfrom typing import Iterator, Union\nfrom pyspark.sql.types import StructType\nfrom pyspark.sql import DataFrame, SparkSession\n\nspark_session = SparkSession.builder.getOrCreate()\n\ndef mapping_wrapper(dfs: Iterator[pd.DataFrame], mapping):\n    for df in dfs:\n        yield map_letter_to_food(df, mapping)\n\ndef run_map_letter_to_food(input_df: Union[DataFrame, pd.DataFrame], mapping):\n    # conversion\n    if isinstance(input_df, pd.DataFrame):\n        sdf = spark_session.createDataFrame(input_df.copy())\n    else:\n        sdf = input_df.copy()\n\n    schema = StructType(list(sdf.schema.fields))\n    return sdf.mapInPandas(lambda dfs: mapping_wrapper(dfs, mapping),\n                            schema=schema)\n\nresult = run_map_letter_to_food(input_df, map_dict)\nresult.show()\n  ```\n\u003C\u002Fdetails>\n\nThis syntax is simpler, cleaner, and more maintainable than the PySpark equivalent. At the same time, no edits were made to the original Pandas-based function to bring it to Spark. It is still usable on Pandas DataFrames. Fugue `transform()` also supports Dask and Ray as execution engines alongside the default Pandas-based engine.\n\nThe Fugue API has a broader collection of functions that are also compatible with Spark, Dask, and Ray. For example, we can use `load()` and `save()` to create an end-to-end workflow compatible with Spark, Dask, and Ray. For the full list of functions, see the [Top Level API](https:\u002F\u002Ffugue.readthedocs.io\u002Fen\u002Flatest\u002Ftop_api.html)\n\n```python\nimport fugue.api as fa\n\ndef run(engine=None):\n    with fa.engine_context(engine):\n        df = fa.load(\"\u002Fpath\u002Fto\u002Ffile.parquet\")\n        out = fa.transform(df, map_letter_to_food, schema=\"*\")\n        fa.save(out, \"\u002Fpath\u002Fto\u002Foutput_file.parquet\")\n\nrun()                 # runs on Pandas\nrun(engine=\"spark\")   # runs on Spark\nrun(engine=\"dask\")    # runs on Dask\n```\n\nAll functions underneath the context will run on the specified backend. This makes it easy to toggle between local execution, and distributed execution.\n\n## [FugueSQL](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Ffugue_sql\u002Findex.html)\n\nFugueSQL is a SQL-based language capable of expressing end-to-end data workflows on top of Pandas, Spark, and Dask. The `map_letter_to_food()` function above is used in the SQL expression below. This is how to use a Python-defined function along with the standard SQL `SELECT` statement.\n\n```python\nfrom fugue.api import fugue_sql\nimport json\n\nquery = \"\"\"\n    SELECT id, value\n      FROM input_df\n    TRANSFORM USING map_letter_to_food(mapping={{mapping}}) SCHEMA *\n    \"\"\"\nmap_dict_str = json.dumps(map_dict)\n\n# returns Pandas DataFrame\nfugue_sql(query,mapping=map_dict_str)\n\n# returns Spark DataFrame\nfugue_sql(query, mapping=map_dict_str, engine=\"spark\")\n```\n\n## Installation\n\nFugue can be installed through pip or conda. For example:\n\n```bash\npip install fugue\n```\n\nIn order to use Fugue SQL, it is strongly recommended to install the `sql` extra:\n\n```bash\npip install fugue[sql]\n```\n\nIt also has the following installation extras:\n\n*   **sql**: to support Fugue SQL. Without this extra, the non-SQL part still works. Before Fugue 0.9.0, this extra is included in Fugue's core dependency so you don't need to install explicitly. **But for 0,9.0+, this becomes required if you want to use Fugue SQL.**\n*   **spark**: to support Spark as the [ExecutionEngine](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fadvanced\u002Fexecution_engine.html).\n*   **dask**: to support Dask as the ExecutionEngine.\n*   **ray**: to support Ray as the ExecutionEngine.\n*   **duckdb**: to support DuckDB as the ExecutionEngine, read [details](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fintegrations\u002Fbackends\u002Fduckdb.html).\n*   **polars**: to support Polars DataFrames and extensions using Polars.\n*   **ibis**: to enable Ibis for Fugue workflows, read [details](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fintegrations\u002Fbackends\u002Fibis.html).\n*   **cpp_sql_parser**: to enable the CPP antlr parser for Fugue SQL. It can be 50+ times faster than the pure Python parser. For the main Python versions and platforms, there is already pre-built binaries, but for the remaining, it needs a C++ compiler to build on the fly.\n\nFor example a common use case is:\n\n```bash\npip install \"fugue[duckdb,spark]\"\n```\n\nNote if you already installed Spark or DuckDB independently, Fugue is able to automatically use them without installing the extras.\n\n## [Getting Started](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002F)\n\nThe best way to get started with Fugue is to work through the 10 minute tutorials:\n\n*   [Fugue API in 10 minutes](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fquick_look\u002Ften_minutes.html)\n*   [FugueSQL in 10 minutes](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fquick_look\u002Ften_minutes_sql.html)\n\nFor the top level API, see:\n\n*   [Fugue Top Level API](https:\u002F\u002Ffugue.readthedocs.io\u002Fen\u002Flatest\u002Ftop_api.html)\n\nThe [tutorials](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002F) can also be run in an interactive notebook environment through binder or Docker:\n\n### Using binder\n\n[![Binder](https:\u002F\u002Fmybinder.org\u002Fbadge_logo.svg)](https:\u002F\u002Fmybinder.org\u002Fv2\u002Fgh\u002Ffugue-project\u002Ftutorials\u002Fmaster)\n\n**Note it runs slow on binder** because the machine on binder isn't powerful enough for a distributed framework such as Spark. Parallel executions can become sequential, so some of the performance comparison examples will not give you the correct numbers.\n\n### Using Docker\n\nAlternatively, you should get decent performance by running this Docker image on your own machine:\n\n```bash\ndocker run -p 8888:8888 fugueproject\u002Ftutorials:latest\n```\n\n\n## Jupyter Notebook Extension\n\nThere is an accompanying [notebook extension](https:\u002F\u002Fpypi.org\u002Fproject\u002Ffugue-jupyter\u002F) for FugueSQL that lets users use the `%%fsql` cell magic. The extension also provides syntax highlighting for FugueSQL cells. It works for both classic notebook and Jupyter Lab. More details can be found in the [installation instructions](https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue-jupyter#install).\n\n![FugueSQL gif](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffugue-project_fugue_readme_eb4f75f9172b.gif)\n\n\n## Ecosystem\n\nBy being an abstraction layer, Fugue can be used with a lot of other open-source projects seamlessly.\n\nPython backends:\n\n*   [Pandas](https:\u002F\u002Fgithub.com\u002Fpandas-dev\u002Fpandas)\n*   [Polars](https:\u002F\u002Fwww.pola.rs) (DataFrames only)\n*   [Spark](https:\u002F\u002Fgithub.com\u002Fapache\u002Fspark)\n*   [Dask](https:\u002F\u002Fgithub.com\u002Fdask\u002Fdask)\n*   [Ray](http:\u002F\u002Fgithub.com\u002Fray-project\u002Fray)\n*   [Ibis](https:\u002F\u002Fgithub.com\u002Fibis-project\u002Fibis\u002F)\n\nFugueSQL backends:\n\n*   Pandas - FugueSQL can run on Pandas\n*   [Duckdb](https:\u002F\u002Fgithub.com\u002Fduckdb\u002Fduckdb) - in-process SQL OLAP database management\n*   [dask-sql](https:\u002F\u002Fgithub.com\u002Fdask-contrib\u002Fdask-sql) - SQL interface for Dask\n*   SparkSQL\n*   [BigQuery](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fintegrations\u002Fwarehouses\u002Fbigquery.html)\n*   Trino\n\n\nFugue is available as a backend or can integrate with the following projects:\n\n*   [WhyLogs](https:\u002F\u002Fwhylogs.readthedocs.io\u002Fen\u002Flatest\u002Fexamples\u002Fintegrations\u002FFugue_Profiling.html?highlight=fugue) - data profiling\n*   [PyCaret](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fintegrations\u002Fecosystem\u002Fpycaret.html) - low code machine learning\n*   [Nixtla](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fintegrations\u002Fecosystem\u002Fnixtla.html) - timeseries modelling\n*   [Prefect](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fintegrations\u002Fecosystem\u002Fprefect.html) - workflow orchestration\n*   [Pandera](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fintegrations\u002Fecosystem\u002Fpandera.html) - data validation\n*   [Datacompy (by Capital One)](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fintegrations\u002Fecosystem\u002Fdatacompy.html) - comparing DataFrames\n\nRegistered 3rd party extensions (majorly for Fugue SQL) include:\n\n*   [Pandas plot](https:\u002F\u002Fpandas.pydata.org\u002Fdocs\u002Freference\u002Fapi\u002Fpandas.DataFrame.plot.html) - visualize data using matplotlib or plotly\n*   [Seaborn](https:\u002F\u002Fseaborn.pydata.org\u002Fapi.html) - visualize data using seaborn\n*   [WhyLogs](https:\u002F\u002Fwhylogs.readthedocs.io\u002Fen\u002Flatest\u002Fexamples\u002Fintegrations\u002FFugue_Profiling.html?highlight=fugue) - visualize data profiling\n*   [Vizzu](https:\u002F\u002Fgithub.com\u002Fvizzuhq\u002Fipyvizzu) - visualize data using ipyvizzu\n\n## Community and Contributing\n\nFeel free to message us on [Slack](http:\u002F\u002Fslack.fugue.ai). We also have [contributing instructions](CONTRIBUTING.md).\n\n### Case Studies\n\n*   [How LyftLearn Democratizes Distributed Compute through Kubernetes Spark and Fugue](https:\u002F\u002Feng.lyft.com\u002Fhow-lyftlearn-democratizes-distributed-compute-through-kubernetes-spark-and-fugue-c0875b97c3d9)\n*   [Clobotics - Large Scale Image Processing with Spark through Fugue](https:\u002F\u002Fmedium.com\u002Ffugue-project\u002Flarge-scale-image-processing-with-spark-through-fugue-e510b9813da8)\n*   [Architecture for a data lake REST API using Delta Lake, Fugue & Spark (article by bitsofinfo)](https:\u002F\u002Fbitsofinfo.wordpress.com\u002F2023\u002F08\u002F14\u002Fdata-lake-rest-api-delta-lake-fugue-spark)\n\n### Mentioned Uses\n\n*   [Productionizing Data Science at Interos, Inc. (LinkedIn post by Anthony Holten)](https:\u002F\u002Fwww.linkedin.com\u002Fposts\u002Fanthony-holten_pandas-spark-dask-activity-7022628193983459328-QvcF)\n*   [Multiple Time Series Forecasting with Fugue & Nixtla at Bain & Company (LinkedIn post by Fahad Akbar)](https:\u002F\u002Fwww.linkedin.com\u002Fposts\u002Ffahadakbar_fugue-datascience-forecasting-activity-7041119034813124608-u08q?utm_source=share&utm_medium=member_desktop)\n\n## Further Resources\n\nView some of our latest conferences presentations and content. For a more complete list, check the [Content](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fresources\u002Fcontent.html) page in the tutorials.\n\n### Blogs\n\n*   [Why Pandas-like Interfaces are Sub-optimal for Distributed Computing](https:\u002F\u002Ftowardsdatascience.com\u002Fwhy-pandas-like-interfaces-are-sub-optimal-for-distributed-computing-322dacbce43)\n*   [Introducing FugueSQL — SQL for Pandas, Spark, and Dask DataFrames (Towards Data Science by Khuyen Tran)](https:\u002F\u002Ftowardsdatascience.com\u002Fintroducing-fuguesql-sql-for-pandas-spark-and-dask-dataframes-63d461a16b27)\n\n### Conferences\n\n*   [Distributed Machine Learning at Lyft](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=_IVyIOV0LgY)\n*   [Comparing the Different Ways to Scale Python and Pandas Code](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=b3ae0m_XTys)\n*   [Large Scale Data Validation with Spark and Dask (PyCon US)](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=2AdvBgjO_3Q)\n*   [FugueSQL - The Enhanced SQL Interface for Pandas, Spark, and Dask DataFrames (PyData Global)](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=OBpnGYjNBBI)\n*   [Distributed Hybrid Parameter Tuning](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=_GBjqskD8Qk)\n","# \u003Cimg src=\".\u002Fimages\u002Flogo.svg\" width=\"200\">\n\n[![PyPI版本](https:\u002F\u002Fbadge.fury.io\u002Fpy\u002Ffugue.svg)](https:\u002F\u002Fpypi.python.org\u002Fpypi\u002Ffugue\u002F)\n[![PyPI Python版本](https:\u002F\u002Fimg.shields.io\u002Fpypi\u002Fpyversions\u002Ffugue.svg)](https:\u002F\u002Fpypi.python.org\u002Fpypi\u002Ffugue\u002F)\n[![PyPI许可证](https:\u002F\u002Fimg.shields.io\u002Fpypi\u002Fl\u002Ffugue.svg)](https:\u002F\u002Fpypi.python.org\u002Fpypi\u002Ffugue\u002F)\n[![Codecov](https:\u002F\u002Fcodecov.io\u002Fgh\u002Ffugue-project\u002Ffugue\u002Fgraph\u002Fbadge.svg?token=ZO9YD5N3IA)](https:\u002F\u002Fcodecov.io\u002Fgh\u002Ffugue-project\u002Ffugue)\n[![Codacy徽章](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffugue-project_fugue_readme_adaaa9495e23.png)](https:\u002F\u002Fwww.codacy.com\u002Fgh\u002Ffugue-project\u002Ffugue\u002Fdashboard?utm_source=github.com&utm_medium=referral&utm_content=fugue-project\u002Ffugue&utm_campaign=Badge_Grade)\n[![下载量](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffugue-project_fugue_readme_d525d2a8b119.png)](https:\u002F\u002Fpepy.tech\u002Fproject\u002Ffugue)\n\n| 教程                                                                                           | API文档                                                                     | 在Slack上与我们交流！                                                                                                   |\n| --------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------ |\n| [![Jupyter Book徽章](https:\u002F\u002Fjupyterbook.org\u002Fbadge.svg)](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002F) | [![文档](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffugue-project_fugue_readme_6bf48b3e9a6d.png)](https:\u002F\u002Ffugue.readthedocs.org) | [![Slack状态](https:\u002F\u002Fimg.shields.io\u002Fbadge\u002Fslack-join_chat-white.svg?logo=slack&style=social)](http:\u002F\u002Fslack.fugue.ai) |\n\n\n**Fugue是一个统一的分布式计算接口，允许用户在Spark、Dask和Ray上运行Python、Pandas和SQL代码，且只需进行最少的修改**。\n\nFugue最常用于：\n\n*   **并行化或扩展现有的Python和Pandas代码**，通过将其移植到Spark、Dask或Ray上，而无需大量重写。\n*   使用[FugueSQL](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fquick_look\u002Ften_minutes_sql.html)来**定义基于Pandas、Spark和Dask DataFrame的端到端工作流**。FugueSQL是一种增强型SQL接口，可以调用Python代码。\n\n要了解Fugue与其他框架（如dbt、Arrow、Ibis、PySpark Pandas）相比如何，请参阅[比较](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002F#how-does-fugue-compare-to)。\n\n## [Fugue API](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fquick_look\u002Ften_minutes.html)\n\nFugue API是一组可以在Pandas、Spark、Dask和Ray上运行的函数。使用Fugue最简单的方式是[`transform()`函数](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fbeginner\u002Ftransform.html)。这使用户能够通过将单个函数移植到Spark、Dask或Ray上，实现其并行执行。在下面的例子中，`map_letter_to_food()`函数接受一个映射字典，并将其应用到某一列上。到目前为止，这只是Pandas和Python代码（未使用Fugue）。\n\n```python\nimport pandas as pd\nfrom typing import Dict\n\ninput_df = pd.DataFrame({\"id\":[0,1,2], \"value\": ([\"A\", \"B\", \"C\"])})\nmap_dict = {\"A\": \"Apple\", \"B\": \"Banana\", \"C\": \"Carrot\"}\n\ndef map_letter_to_food(df: pd.DataFrame, mapping: Dict[str, str]) -> pd.DataFrame:\n    df[\"value\"] = df[\"value\"].map(mapping)\n    return df\n```\n\n现在，通过调用Fugue的`transform()`函数，将`map_letter_to_food()`函数移植到Spark执行引擎上。输出的`schema`和`params`被传递给`transform()`调用。需要提供`schema`是因为这是分布式框架的要求。下面的`\"*\"`表示输入的所有列都会出现在输出中。\n\n```python\nfrom pyspark.sql import SparkSession\nfrom fugue import transform\n\nspark = SparkSession.builder.getOrCreate()\nsdf = spark.createDataFrame(input_df)\n\nout = transform(sdf,\n               map_letter_to_food,\n               schema=\"*\",\n               params=dict(mapping=map_dict),\n               )\n# out是一个Spark DataFrame\nout.show()\n```\n```rst\n+---+------+\n| id| value|\n+---+------+\n|  0| Apple|\n|  1|Banana|\n|  2|Carrot|\n+---+------+\n```\n\n\u003Cdetails>\n  \u003Csummary>PySpark等效于Fugue transform()\u003C\u002Fsummary>\n\n  ```python\nfrom typing import Iterator, Union\nfrom pyspark.sql.types import StructType\nfrom pyspark.sql import DataFrame, SparkSession\n\nspark_session = SparkSession.builder.getOrCreate()\n\ndef mapping_wrapper(dfs: Iterator[pd.DataFrame], mapping):\n    for df in dfs:\n        yield map_letter_to_food(df, mapping)\n\ndef run_map_letter_to_food(input_df: Union[DataFrame, pd.DataFrame], mapping):\n    # 转换\n    if isinstance(input_df, pd.DataFrame):\n        sdf = spark_session.createDataFrame(input_df.copy())\n    else:\n        sdf = input_df.copy()\n\n    schema = StructType(list(sdf.schema.fields))\n    return sdf.mapInPandas(lambda dfs: mapping_wrapper(dfs, mapping),\n                            schema=schema)\n\nresult = run_map_letter_to_food(input_df, map_dict)\nresult.show()\n  ```\n\u003C\u002Fdetails>\n\n这种语法比PySpark的等效代码更简单、更清晰且更易于维护。同时，原始基于Pandas的函数并未做任何修改就移植到了Spark上。它仍然可以在Pandas DataFrame上使用。Fugue `transform()`还支持Dask和Ray作为执行引擎，除了默认的基于Pandas的引擎之外。\n\nFugue API还包含更多与Spark、Dask和Ray兼容的函数。例如，我们可以使用`load()`和`save()`来创建一个兼容Spark、Dask和Ray的端到端工作流。有关完整函数列表，请参阅[顶级API](https:\u002F\u002Ffugue.readthedocs.io\u002Fen\u002Flatest\u002Ftop_api.html)。\n\n```python\nimport fugue.api as fa\n\ndef run(engine=None):\n    with fa.engine_context(engine):\n        df = fa.load(\"\u002Fpath\u002Fto\u002Ffile.parquet\")\n        out = fa.transform(df, map_letter_to_food, schema=\"*\")\n        fa.save(out, \"\u002Fpath\u002Fto\u002Foutput_file.parquet\")\n\nrun()                 # 在Pandas上运行\nrun(engine=\"spark\")   # 在Spark上运行\nrun(engine=\"dask\")    # 在Dask上运行\n```\n\n上下文中的所有函数都将在指定的后端上运行。这使得在本地执行和分布式执行之间轻松切换成为可能。\n\n## [FugueSQL](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Ffugue_sql\u002Findex.html)\n\nFugueSQL是一种基于SQL的语言，能够在Pandas、Spark和Dask之上表达端到端的数据工作流。上面的`map_letter_to_food()`函数被用于下面的SQL表达式中。以下是使用Python定义的函数以及标准SQL `SELECT`语句的方法。\n\n```python\nfrom fugue.api import fugue_sql\nimport json\n\nquery = \"\"\"\n    SELECT id, value\n      FROM input_df\n    TRANSFORM USING map_letter_to_food(mapping={{mapping}}) SCHEMA *\n    \"\"\"\nmap_dict_str = json.dumps(map_dict)\n\n# 返回Pandas DataFrame\nfugue_sql(query,mapping=map_dict_str)\n\n# 返回 Spark DataFrame\nfugue_sql(query, mapping=map_dict_str, engine=\"spark\")\n```\n\n## 安装\n\nFugue 可以通过 pip 或 conda 进行安装。例如：\n\n```bash\npip install fugue\n```\n\n为了使用 Fugue SQL，强烈建议安装 `sql` 附加组件：\n\n```bash\npip install fugue[sql]\n```\n\n此外，Fugue 还提供了以下安装附加组件：\n\n*   **sql**: 用于支持 Fugue SQL。如果不安装此附加组件，非 SQL 部分仍然可以正常工作。在 Fugue 0.9.0 之前，此附加组件已包含在 Fugue 的核心依赖中，因此无需显式安装。**但从 0.9.0 版本开始，若要使用 Fugue SQL，则必须安装此附加组件。**\n*   **spark**: 用于支持 Spark 作为 [ExecutionEngine](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fadvanced\u002Fexecution_engine.html)。\n*   **dask**: 用于支持 Dask 作为 ExecutionEngine。\n*   **ray**: 用于支持 Ray 作为 ExecutionEngine。\n*   **duckdb**: 用于支持 DuckDB 作为 ExecutionEngine，请参阅 [详细信息](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fintegrations\u002Fbackends\u002Fduckdb.html)。\n*   **polars**: 用于支持 Polars DataFrames 及其扩展功能。\n*   **ibis**: 用于在 Fugue 工作流中启用 Ibis，请参阅 [详细信息](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fintegrations\u002Fbackends\u002Fibis.html)。\n*   **cpp_sql_parser**: 用于启用 Fugue SQL 的 CPP antlr 解析器。它比纯 Python 解析器快 50 倍以上。对于主流的 Python 版本和平台，已经提供了预编译的二进制文件，但对于其他情况，则需要 C++ 编译器进行即时编译。\n\n例如，一个常见的用法是：\n\n```bash\npip install \"fugue[duckdb,spark]\"\n```\n\n请注意，如果您已经独立安装了 Spark 或 DuckDB，Fugue 会自动使用它们，而无需安装这些附加组件。\n\n## [入门指南](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002F)\n\n开始使用 Fugue 的最佳方式是通过 10 分钟教程：\n*   [10 分钟学会 Fugue API](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fquick_look\u002Ften_minutes.html)\n*   [10 分钟学会 FugueSQL](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fquick_look\u002Ften_minutes_sql.html)\n\n有关顶层 API 的更多信息，请参阅：\n*   [Fugue 顶层 API](https:\u002F\u002Ffugue.readthedocs.io\u002Fen\u002Flatest\u002Ftop_api.html)\n\n这些 [教程](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002F) 也可以通过 binder 或 Docker 在交互式笔记本环境中运行：\n\n### 使用 binder\n\n[![Binder](https:\u002F\u002Fmybinder.org\u002Fbadge_logo.svg)](https:\u002F\u002Fmybinder.org\u002Fv2\u002Fgh\u002Ffugue-project\u002Ftutorials\u002Fmaster)\n\n**请注意，在 binder 上运行速度较慢**，因为 binder 上的机器性能不足以支持像 Spark 这样的分布式框架。并行执行可能会变为串行执行，因此一些性能对比示例可能无法给出准确的结果。\n\n### 使用 Docker\n\n或者，您可以在自己的机器上运行以下 Docker 镜像，以获得较好的性能：\n\n```bash\ndocker run -p 8888:8888 fugueproject\u002Ftutorials:latest\n```\n\n\n## Jupyter Notebook 扩展\n\nFugueSQL 配备了一个配套的 [notebook 扩展](https:\u002F\u002Fpypi.org\u002Fproject\u002Ffugue-jupyter\u002F)，允许用户使用 `%%fsql` 单元魔法命令。该扩展还为 FugueSQL 单元提供语法高亮显示功能，适用于经典 notebook 和 Jupyter Lab。更多详情请参阅 [安装说明](https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue-jupyter#install)。\n\n![FugueSQL 动画](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffugue-project_fugue_readme_eb4f75f9172b.gif)\n\n\n## 生态系统\n\n作为抽象层，Fugue 可以与许多其他开源项目无缝集成。\n\nPython 后端：\n*   [Pandas](https:\u002F\u002Fgithub.com\u002Fpandas-dev\u002Fpandas)\n*   [Polars](https:\u002F\u002Fwww.pola.rs)（仅限 DataFrames）\n*   [Spark](https:\u002F\u002Fgithub.com\u002Fapache\u002Fspark)\n*   [Dask](https:\u002F\u002Fgithub.com\u002Fdask\u002Fdask)\n*   [Ray](http:\u002F\u002Fgithub.com\u002Fray-project\u002Fray)\n*   [Ibis](https:\u002F\u002Fgithub.com\u002Fibis-project\u002Fibis\u002F)\n\nFugueSQL 后端：\n*   Pandas - FugueSQL 可以在 Pandas 上运行\n*   [Duckdb](https:\u002F\u002Fgithub.com\u002Fduckdb\u002Fduckdb) - 进程内 SQL OLAP 数据库管理系统\n*   [dask-sql](https:\u002F\u002Fgithub.com\u002Fdask-contrib\u002Fdask-sql) - Dask 的 SQL 接口\n*   SparkSQL\n*   [BigQuery](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fintegrations\u002Fwarehouses\u002Fbigquery.html)\n*   Trino\n\n\nFugue 可以作为后端使用，或与以下项目集成：\n*   [WhyLogs](https:\u002F\u002Fwhylogs.readthedocs.io\u002Fen\u002Flatest\u002Fexamples\u002Fintegrations\u002FFugue_Profiling.html?highlight=fugue) - 数据分析\n*   [PyCaret](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fintegrations\u002Fecosystem\u002Fpycaret.html) - 低代码机器学习\n*   [Nixtla](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fintegrations\u002Fecosystem\u002Fnixtla.html) - 时间序列建模\n*   [Prefect](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fintegrations\u002Fecosystem\u002Fprefect.html) - 工作流编排\n*   [Pandera](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fintegrations\u002Fecosystem\u002Fpandera.html) - 数据验证\n*   [Datacompy (由 Capital One 开发)](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fintegrations\u002Fecosystem\u002Fdatacompy.html) - 比较 DataFrames\n\n已注册的第三方扩展（主要用于 Fugue SQL）包括：\n*   [Pandas plot](https:\u002F\u002Fpandas.pydata.org\u002Fdocs\u002Freference\u002Fapi\u002Fpandas.DataFrame.plot.html) - 使用 matplotlib 或 plotly 可视化数据\n*   [Seaborn](https:\u002F\u002Fseaborn.pydata.org\u002Fapi.html) - 使用 seaborn 可视化数据\n*   [WhyLogs](https:\u002F\u002Fwhylogs.readthedocs.io\u002Fen\u002Flatest\u002Fexamples\u002Fintegrations\u002FFugue_Profiling.html?highlight=fugue) - 可视化数据分析结果\n*   [Vizzu](https:\u002F\u002Fgithub.com\u002Fvizzuhq\u002Fipyvizzu) - 使用 ipyvizzu 可视化数据\n\n## 社区与贡献\n\n欢迎随时通过 [Slack](http:\u002F\u002Fslack.fugue.ai) 联系我们。我们也有 [贡献指南](CONTRIBUTING.md)。\n\n### 案例研究\n\n*   [LyftLearn 如何通过 Kubernetes、Spark 和 Fugue 实现分布式计算的民主化](https:\u002F\u002Feng.lyft.com\u002Fhow-lyftlearn-democratizes-distributed-compute-through-kubernetes-spark-and-fugue-c0875b97c3d9)\n*   [Clobotics - 使用 Fugue 和 Spark 进行大规模图像处理](https:\u002F\u002Fmedium.com\u002Ffugue-project\u002Flarge-scale-image-processing-with-spark-through-fugue-e510b9813da8)\n*   [使用 Delta Lake、Fugue 和 Spark 构建数据湖 REST API 的架构（bitsofinfo 文章）](https:\u002F\u002Fbitsofinfo.wordpress.com\u002F2023\u002F08\u002F14\u002Fdata-lake-rest-api-delta-lake-fugue-spark)\n\n### 提及的应用\n\n*   [Interos, Inc. 的数据科学生产化实践（Anthony Holten 的 LinkedIn 帖子）](https:\u002F\u002Fwww.linkedin.com\u002Fposts\u002Fanthony-holten_pandas-spark-dask-activity-7022628193983459328-QvcF)\n*   [Bain & Company 使用 Fugue 和 Nixtla 进行多时间序列预测（Fahad Akbar 的 LinkedIn 帖子）](https:\u002F\u002Fwww.linkedin.com\u002Fposts\u002Ffahadakbar_fugue-datascience-forecasting-activity-7041119034813124608-u08q?utm_source=share&utm_medium=member_desktop)\n\n## 更多资源\n\n查看我们最新的会议演讲和相关内容。如需更完整的列表，请访问教程中的 [内容](https:\u002F\u002Ffugue-tutorials.readthedocs.io\u002Ftutorials\u002Fresources\u002Fcontent.html) 页面。\n\n### 博客\n\n*   [为什么类 Pandas 的接口对分布式计算并不理想](https:\u002F\u002Ftowardsdatascience.com\u002Fwhy-pandas-like-interfaces-are-sub-optimal-for-distributed-computing-322dacbce43)\n*   [介绍 FugueSQL — 适用于 Pandas、Spark 和 Dask DataFrame 的 SQL（Khuyen Tran 在 Towards Data Science 上的文章）](https:\u002F\u002Ftowardsdatascience.com\u002Fintroducing-fuguesql-sql-for-pandas-spark-and-dask-dataframes-63d461a16b27)\n\n### 会议\n\n*   [Lyft 公司的分布式机器学习](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=_IVyIOV0LgY)\n*   [比较扩展 Python 和 Pandas 代码的不同方法](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=b3ae0m_XTys)\n*   [使用 Spark 和 Dask 进行大规模数据验证（PyCon US）](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=2AdvBgjO_3Q)\n*   [FugueSQL — 面向 Pandas、Spark 和 Dask DataFrame 的增强型 SQL 接口（PyData Global）](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=OBpnGYjNBBI)\n*   [分布式混合参数调优](https:\u002F\u002Fwww.youtube.com\u002Fwatch?v=_GBjqskD8Qk)","# Fugue 快速上手指南\n\nFugue 是一个统一的分布式计算接口，允许用户使用极少的代码修改，将 Python、Pandas 和 SQL 代码在 Spark、Dask 和 Ray 等引擎上运行。它特别适合将现有的单机 Pandas 代码快速扩展为分布式任务。\n\n## 环境准备\n\n*   **操作系统**：Linux, macOS, Windows\n*   **Python 版本**：支持 Python 3.8+ (具体版本请参考 PyPI 徽章)\n*   **前置依赖**：\n    *   基础使用仅需 Python 环境。\n    *   若需使用特定分布式引擎（如 Spark, Dask, Ray），需确保本地已安装相应环境或通过网络可访问集群。\n    *   若需使用 FugueSQL，建议安装额外的 SQL 解析支持。\n\n## 安装步骤\n\n### 1. 基础安装\n通过 pip 安装核心库：\n```bash\npip install fugue\n```\n> **国内加速建议**：如果下载速度慢，可使用清华或阿里镜像源：\n> ```bash\n> pip install fugue -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n> ```\n\n### 2. 按需安装扩展功能\n根据你需要的执行引擎或功能安装额外组件：\n\n*   **启用 FugueSQL (强烈推荐)**：\n    ```bash\n    pip install \"fugue[sql]\"\n    ```\n*   **启用 Spark 支持**：\n    ```bash\n    pip install \"fugue[spark]\"\n    ```\n*   **启用 Dask 支持**：\n    ```bash\n    pip install \"fugue[dask]\"\n    ```\n*   **启用 DuckDB 支持 (高性能本地 SQL)**：\n    ```bash\n    pip install \"fugue[duckdb]\"\n    ```\n*   **组合安装示例** (同时支持 SQL 和 Spark)：\n    ```bash\n    pip install \"fugue[sql,spark]\"\n    ```\n\n*注意：如果你已经独立安装了 Spark 或 DuckDB，Fugue 通常能自动识别并使用它们，无需安装对应的 extras，但安装 extras 可确保依赖兼容性。*\n\n## 基本使用\n\nFugue 的核心优势在于无需修改原有的 Pandas 函数即可将其转换为分布式任务。以下是最简单的 `transform` 用法示例。\n\n### 场景：将 Pandas 函数并行化\n\n假设你有一个标准的 Pandas 处理函数：\n\n```python\nimport pandas as pd\nfrom typing import Dict\n\n# 1. 定义原始 Pandas 函数\ninput_df = pd.DataFrame({\"id\":[0,1,2], \"value\": ([\"A\", \"B\", \"C\"])})\nmap_dict = {\"A\": \"Apple\", \"B\": \"Banana\", \"C\": \"Carrot\"}\n\ndef map_letter_to_food(df: pd.DataFrame, mapping: Dict[str, str]) -> pd.DataFrame:\n    df[\"value\"] = df[\"value\"].map(mapping)\n    return df\n```\n\n### 2. 使用 Fugue 切换到分布式引擎\n\n只需调用 `fugue.transform`，指定目标引擎（如 `spark`），即可在分布式环境下运行上述函数，而无需重写任何逻辑。\n\n```python\nfrom pyspark.sql import SparkSession\nfrom fugue import transform\n\n# 初始化 Spark Session\nspark = SparkSession.builder.getOrCreate()\nsdf = spark.createDataFrame(input_df)\n\n# 使用 Fugue transform 执行\n# schema=\"*\" 表示输出保留所有输入列结构\nout = transform(\n    sdf,\n    map_letter_to_food,\n    schema=\"*\",\n    params=dict(mapping=map_dict),\n)\n\n# out 现在是一个 Spark DataFrame\nout.show()\n```\n\n**输出结果：**\n```text\n+---+------+\n| id| value|\n+---+------+\n|  0| Apple|\n|  1|Banana|\n|  2|Carrot|\n+---+------+\n```\n\n### 3. 灵活切换执行后端\n\n使用 `engine_context` 可以方便地在本地 (Pandas) 和分布式 (Spark\u002FDask) 之间切换，便于开发和调试：\n\n```python\nimport fugue.api as fa\n\ndef run_workflow(engine=None):\n    with fa.engine_context(engine):\n        # 下面的代码会根据 context 自动在指定引擎上运行\n        df = fa.load(\"\u002Fpath\u002Fto\u002Ffile.parquet\")\n        out = fa.transform(df, map_letter_to_food, schema=\"*\")\n        fa.save(out, \"\u002Fpath\u002Fto\u002Foutput_file.parquet\")\n\nrun_workflow()                 # 默认在 Pandas 上运行\nrun_workflow(engine=\"spark\")   # 在 Spark 上运行\nrun_workflow(engine=\"dask\")    # 在 Dask 上运行\n```\n\n通过以上步骤，你可以轻松地将现有的数据处理逻辑扩展到大规模分布式集群。更多高级用法（如 FugueSQL）请参考官方教程。","某电商数据团队需要将本地开发的 Pandas 用户画像清洗脚本，迁移到 Spark 集群以处理每日增长的亿级交易日志。\n\n### 没有 fugue 时\n- **重写成本高昂**：必须将原本简洁的 Pandas 代码逐行改写为复杂的 PySpark API 调用，开发周期从几小时延长至数天。\n- **逻辑验证困难**：分布式框架的惰性执行机制导致调试繁琐，难以像本地 Pandas 那样快速断点验证中间结果。\n- **技术栈割裂**：数据科学家熟悉的 Python 原生逻辑与工程师维护的 Spark 作业之间存在鸿沟，协作时需反复转换代码风格。\n- **资源闲置浪费**：由于迁移门槛高，大量轻量级脚本仍被迫在单机运行，无法利用集群算力，导致处理超时频发。\n\n### 使用 fugue 后\n- **零代码重构迁移**：只需在原有 Pandas 函数外包裹一层 `transform()` 接口，即可无缝切换至 Spark 引擎执行，几乎无需修改业务逻辑。\n- **统一开发体验**：保留了本地 Pandas 的即时反馈特性，开发者可先用小样本在本地调试通顺，再一键提交至集群跑全量数据。\n- **多引擎自由切换**：同一套代码不仅能跑在 Spark 上，还能根据场景需求灵活切换到 Dask 或 Ray，无需为不同后端维护多套代码库。\n- **算力弹性扩展**：原本卡在单机的耗时任务，现在能自动分发到集群并行处理，将数小时的等待时间压缩至分钟级。\n\nfugue 的核心价值在于打破了本地原型开发与分布式生产部署之间的壁垒，让数据团队能用最熟悉的 Python 写法直接驾驭大规模集群算力。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Ffugue-project_fugue_eb4f75f9.gif","fugue-project","The Fugue Project","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Ffugue-project_e5736fc3.png","Democratizing distributed computing and machine learning",null,"fugue-project@googlegroups.com","https:\u002F\u002Fgithub.com\u002Ffugue-project",[83,87,91,95,98],{"name":84,"color":85,"percentage":86},"Python","#3572A5",98.2,{"name":88,"color":89,"percentage":90},"Jupyter Notebook","#DA5B0B",1.2,{"name":92,"color":93,"percentage":94},"Makefile","#427819",0.3,{"name":96,"color":97,"percentage":94},"JavaScript","#f1e05a",{"name":99,"color":100,"percentage":101},"Shell","#89e051",0,2147,101,"2026-04-03T02:36:50","Apache-2.0","未说明",{"notes":108,"python":109,"dependencies":110},"Fugue 是一个分布式计算统一接口，核心依赖 Pandas。若需使用特定功能（如 FugueSQL、Spark\u002FDask\u002FRay 后端），需安装对应的 extras（例如 pip install fugue[sql,spark]）。若已独立安装 Spark 或 DuckDB，Fugue 可自动识别而无需安装对应 extras。使用 C++ SQL 解析器（cpp_sql_parser）可显著提升性能，但部分平台可能需要 C++ 编译器进行构建。在 Binder 等低配环境中运行分布式框架（如 Spark）时性能会严重下降。","3.8+",[111,112,113,114,115,116,117,118,119],"pandas","pyarrow","fugue-sql-antlr (可选，用于 FugueSQL)","pyspark (可选，用于 Spark 后端)","dask (可选，用于 Dask 后端)","ray (可选，用于 Ray 后端)","duckdb (可选，用于 DuckDB 后端)","polars (可选，用于 Polars 后端)","ibis (可选，用于 Ibis 后端)",[13,51],[122,123,124,125,126,127,128,129,111],"spark","dask","data-practitioners","machine-learning","distributed-systems","distributed-computing","distributed","sql","2026-03-27T02:49:30.150509","2026-04-06T07:11:57.737279",[133,138,143,148,153,158],{"id":134,"question_zh":135,"answer_zh":136,"source_url":137},13413,"如何在 PyCaret 中使用 Dask 作为后端时解决 'FuguePluginsRegistrationError' 错误？","该错误通常是由于 Fugue 版本过旧导致无法识别 Dask 引擎。维护者已确认在 Fugue 0.9.2 版本中修复了此问题。请升级您的环境：\n1. 如果您使用 conda，运行：`conda install -c conda-forge fugue=0.9.2`\n2. 如果您使用 pip，运行：`pip install fugue>=0.9.2`\n升级后，`FugueBackend(\"dask\")` 即可正常工作。","https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F559",{"id":139,"question_zh":140,"answer_zh":141,"source_url":142},13414,"Fugue 与 Spark 的 antlr4-python3-runtime 版本冲突如何解决？","Spark 强制依赖 `antlr4-python3-runtime` 4.9.3 版本，而新版 Fugue 曾要求更高版本导致冲突。此问题已在 Fugue 0.9.0 版本中解决，该版本重新支持了 antlr4 4.9.* 系列。\n解决方案：将 Fugue 升级到 0.9.0 或更高版本（或测试版 `fugue==0.9.0.dev3` 及以上）。\n命令示例：`pip install fugue>=0.9.0` 或 `pip install fugue==0.9.0.dev3`。","https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F509",{"id":144,"question_zh":145,"answer_zh":146,"source_url":147},13415,"如何使用 Pandera 验证包含嵌套 ArrayType\u002FStructType 的 PySpark DataFrame  schema？","对于嵌套结构（如数组中包含元组），不能直接使用常规列定义。需要使用 `pa.Check` 配合自定义验证函数。示例代码如下：\n\n```python\nfrom typing import NamedTuple\nimport pandera as pa\nfrom pandera import Column, DataFrameSchema\n\n# 定义嵌套结构的元组类型\nclass Name(NamedTuple):\n    first_name: str\n    middle_name: str\n    last_name: str\n\n# 自定义检查函数\ndef name_check(elements):\n    try:\n        for name_components in elements:\n            Name(*name_components) # 尝试实例化以验证结构\n        return True\n    except:\n        return False\n\n# 定义 Schema\nbase_schema = pa.DataFrameSchema({\n    \"name\": pa.Column(object, pa.Check(name_check, element_wise=True)),\n    \"id\": pa.Column(str),\n    \"gender\": pa.Column(str),\n    \"salary\": pa.Column(pa.dtypes.Int32),\n})\n```\n确保您的 Fugue 版本至少为 0.6.6.dev3 以支持此功能。","https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F316",{"id":149,"question_zh":150,"answer_zh":151,"source_url":152},13416,"在 Windows 系统上使用 Fugue SQL (%%fsql) 保存文件时路径报错怎么办？","在 Windows 非 Spark 环境下，Fugue 0.6.0.dev2 及更高版本已支持标准 Windows 路径格式。您可以使用以下两种格式之一：\n1. 使用正斜杠：`SAVE OVERWRITE \"C:\u002Fsome_folder\u002Ff.csv\" (header=true)`\n2. 使用双反斜杠（注意转义）：`SAVE OVERWRITE \"C:\\\\some_folder\\\\f.csv\" (header=true)`\n请确保您的 Fugue 版本已更新至 0.6.0 或更高。注意：PySpark 在 Windows 上的原生支持可能仍需额外配置或建议使用 WSL\u002F容器化环境。","https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F224",{"id":154,"question_zh":155,"answer_zh":156,"source_url":157},13417,"遇到 'Could not infer execution engine for type DataFrame' 错误该如何处理？","此错误表明 Fugue 无法自动识别传入的 DataFrame 类型（例如 Dask DataFrame 或 Ray Dataset）。这通常发生在通过第三方库（如 Nixtla）调用时。\n解决方法：\n1. 检查数据类型：确认传入的对象确实是预期的分布式 DataFrame 类型（如 `dask.dataframe.DataFrame`）。\n2. 显式指定后端：如果可能，在调用函数时显式传递执行引擎参数（例如 `engine=\"dask\"`）。\n3. 版本兼容性：确保 `fugue`、`dask` 以及调用方库（如 `nixtla`）均为最新兼容版本。旧版本的 Fugue 可能缺乏对新版 Dask 内部结构（如 `dask_expr`）的支持。建议升级 Fugue 到最新版。","https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F555",{"id":159,"question_zh":160,"answer_zh":161,"source_url":147},13418,"如何在 Fugue Schema 字符串中表示字段的可空性（nullable）？","在 Fugue 的字符串格式 Schema 定义中，目前主要通过类型后缀或特定语法来暗示可空性，但在基础字符串表示法（如 `\"name:[{firstname:str}]\"`）中直接指定 `nullable=False` 的支持较为有限或依赖于具体解析器版本。\n根据社区讨论，如果您需要严格的非空约束，建议：\n1. 使用 Python API (`DataFrameSchema`) 而非字符串 Schema，这样可以明确设置 `nullable=False`。\n2. 或者在数据处理逻辑中添加额外的检查步骤。\n如果在较新版本中发现字符串语法支持更新，请参考官方最新文档 `fugue-tutorials.readthedocs.io` 中的 'Schema DataFrames' 章节。",[163,168,173,178,183,188,193,198,202,207,212,216,221,225,229,234,238,242,246,250],{"id":164,"version":165,"summary_zh":166,"released_at":167},72136,"v0.9.7","## 变更内容\n* 迁移到 UV，移除 QPD，由 @goodwanghan 在 https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fpull\u002F572 中完成。","2026-02-20T13:45:51",{"id":169,"version":170,"summary_zh":171,"released_at":172},72137,"0.9.6","## 变更内容\n* [热修复] 在 Databricks 中无法识别 Spark，由 @goodwanghan 在 https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fpull\u002F571 中修复。\n","2026-01-30T19:08:43",{"id":174,"version":175,"summary_zh":176,"released_at":177},72138,"0.9.5","## 变更内容\n\n* 由 @goodwanghan 在 https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fpull\u002F569 中设置 Pandas\u003C3\n","2026-01-28T01:28:13",{"id":179,"version":180,"summary_zh":181,"released_at":182},72139,"0.9.4","## 变更内容\n* 由 @goodwanghan 在 https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fpull\u002F567 中修复了兼容性问题\n* 由 @nasaul 在 https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fpull\u002F566 中解除了对 pandas 版本的锁定\n\n## 新贡献者\n* @nasaul 在 https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fpull\u002F566 中完成了首次贡献\n\n**完整变更日志**: https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fcompare\u002F0.9.3...0.9.4","2025-12-30T21:05:09",{"id":184,"version":185,"summary_zh":186,"released_at":187},72140,"0.9.3","## 变更内容\n* @kvnkho 在 https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fpull\u002F564 中为 Flask RPCServer 添加了安全警告\n* @goodwanghan 在 https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fpull\u002F565 中修复了 Flask 服务器的安全漏洞\n\n\n**完整变更日志**: https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fcompare\u002F0.9.2...0.9.3","2025-12-02T06:25:32",{"id":189,"version":190,"summary_zh":191,"released_at":192},72141,"0.9.2","## 变更内容\n* @goodwanghan 在 https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fpull\u002F547 中添加了 `dict[str, Any]` 作为支持的输入和输出类型。\n* @goodwanghan 在 https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fpull\u002F549 中增加了对 Python 3.12 的支持。\n* 问题 551：@kondziolka9ld 在 https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fpull\u002F552 中为 `function_wrapper.py` 添加了对 `collections.abc.Callable` 的支持。\n* @goodwanghan 在 https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fpull\u002F553 中修复了 Python 3.8 下的 Ibis 测试。\n* @goodwanghan 在 https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fpull\u002F556 中修复了兼容性问题。\n* @goodwanghan 在 https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fpull\u002F561 中修复了兼容性问题，并移除了 Transformer 输入中对 `row(dict)` 注解的支持。\n* @goodwanghan 在 https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fpull\u002F563 中发布了 0.9.2 版本。\n\n## 新贡献者\n* @kondziolka9ld 在 https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fpull\u002F552 中做出了首次贡献。","2025-10-31T22:31:31",{"id":194,"version":195,"summary_zh":196,"released_at":197},72142,"0.9.2.dev2","修复了几个兼容性问题。","2025-03-29T18:19:27",{"id":199,"version":200,"summary_zh":79,"released_at":201},72143,"0.9.2.dev1","2024-06-28T06:19:29",{"id":203,"version":204,"summary_zh":205,"released_at":206},72144,"0.9.1","-   [543](https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F543) 支持使用标准集合进行类型提示\n-   [544](https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F544) 修复工作节点端的 Spark Connect 导入问题","2024-06-14T07:09:17",{"id":208,"version":209,"summary_zh":210,"released_at":211},72145,"0.9.0","## 0.9.0\n\n-   [482](https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F482) 将 Fugue SQL 的依赖移至额外的 `[sql]` 分组，并使相关函数变为可选依赖\n-   [504](https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F504) 创建 Fugue 的 pytest 固定装置和插件\n-   [541](https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F541) 将表的临时视图名称改为大写\n-   [540](https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F540) 修复 Ray 2.10+ 的兼容性问题\n-   [539](https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F539) 修复与 Dask 2024.4+ 的兼容性问题\n-   [534](https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F534) 移除 ibis 版本限制\n-   [505](https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F505) 弃用 FugueWorkflow 中的 `as_ibis` 方法\n-   [387](https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F387) 提升对 Python 3.10 的测试覆盖率，并新增针对 Python 3.11 的测试\n-   [269](https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F269) Spark 和 Dask 在未进行排序优化的情况下仅取一行","2024-04-29T15:25:29",{"id":213,"version":214,"summary_zh":79,"released_at":215},72146,"0.9.0.dev4","2024-04-28T23:30:13",{"id":217,"version":218,"summary_zh":219,"released_at":220},72147,"0.9.0.dev3","修复一些兼容性问题","2024-01-21T08:18:57",{"id":222,"version":223,"summary_zh":79,"released_at":224},72148,"0.9.0.dev2","2024-01-14T22:31:49",{"id":226,"version":227,"summary_zh":79,"released_at":228},72149,"0.9.0.dev1","2024-01-14T22:02:14",{"id":230,"version":231,"summary_zh":232,"released_at":233},72150,"0.8.7","-   [488](https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F488) Migrate from fs to fsspec\r\n-   [521](https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F521) Add `as_dicts` to Fugue API\r\n-   [516](https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F516) Use `_collect_as_arrow` for Spark `as_arrow``\r\n-   [520](https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fpull\u002F520) Add Python 3.10 to Windows Tests\r\n-   [506](https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F506) Adopt pandas `ExtensionDType`\r\n-   [504](https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F504) Create Fugue pytest fixtures\r\n-   [503](https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F503) Deprecate python 3.7 support\r\n-   [501](https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F501) Simplify zip\u002Fcomap, remove join from the implementation\r\n-   [500](https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F500) Implement all partitioning strategies for Dask\r\n-   [495](https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F495) Resolve segfault on Duckdb 0.8.1\r\n-   [494](https:\u002F\u002Fgithub.com\u002Ffugue-project\u002Ffugue\u002Fissues\u002F494) Remove the version cap of Dask","2023-11-09T16:07:54",{"id":235,"version":236,"summary_zh":79,"released_at":237},72151,"0.8.7.dev8","2023-11-09T05:28:18",{"id":239,"version":240,"summary_zh":79,"released_at":241},72152,"0.8.7.dev7","2023-11-08T08:14:23",{"id":243,"version":244,"summary_zh":79,"released_at":245},72153,"0.8.7.dev6","2023-10-27T05:12:37",{"id":247,"version":248,"summary_zh":79,"released_at":249},72154,"0.8.7.dev5","2023-10-09T04:51:10",{"id":251,"version":252,"summary_zh":79,"released_at":253},72155,"0.8.7.dev4","2023-08-20T06:01:10"]