[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-kyegomez--zeta":3,"tool-kyegomez--zeta":61},[4,18,26,36,44,53],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",160784,2,"2026-04-19T11:32:54",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",109154,"2026-04-18T11:18:24",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":32,"last_commit_at":50,"category_tags":51,"status":17},6121,"gemini-cli","google-gemini\u002Fgemini-cli","gemini-cli 是一款由谷歌推出的开源 AI 命令行工具，它将强大的 Gemini 大模型能力直接集成到用户的终端环境中。对于习惯在命令行工作的开发者而言，它提供了一条从输入提示词到获取模型响应的最短路径，无需切换窗口即可享受智能辅助。\n\n这款工具主要解决了开发过程中频繁上下文切换的痛点，让用户能在熟悉的终端界面内直接完成代码理解、生成、调试以及自动化运维任务。无论是查询大型代码库、根据草图生成应用，还是执行复杂的 Git 操作，gemini-cli 都能通过自然语言指令高效处理。\n\n它特别适合广大软件工程师、DevOps 人员及技术研究人员使用。其核心亮点包括支持高达 100 万 token 的超长上下文窗口，具备出色的逻辑推理能力；内置 Google 搜索、文件操作及 Shell 命令执行等实用工具；更独特的是，它支持 MCP（模型上下文协议），允许用户灵活扩展自定义集成，连接如图像生成等外部能力。此外，个人谷歌账号即可享受免费的额度支持，且项目基于 Apache 2.0 协议完全开源，是提升终端工作效率的理想助手。",100752,"2026-04-10T01:20:03",[52,13,15,14],"插件",{"id":54,"name":55,"github_repo":56,"description_zh":57,"stars":58,"difficulty_score":32,"last_commit_at":59,"category_tags":60,"status":17},4721,"markitdown","microsoft\u002Fmarkitdown","MarkItDown 是一款由微软 AutoGen 团队打造的轻量级 Python 工具，专为将各类文件高效转换为 Markdown 格式而设计。它支持 PDF、Word、Excel、PPT、图片（含 OCR）、音频（含语音转录）、HTML 乃至 YouTube 链接等多种格式的解析，能够精准提取文档中的标题、列表、表格和链接等关键结构信息。\n\n在人工智能应用日益普及的今天，大语言模型（LLM）虽擅长处理文本，却难以直接读取复杂的二进制办公文档。MarkItDown 恰好解决了这一痛点，它将非结构化或半结构化的文件转化为模型“原生理解”且 Token 效率极高的 Markdown 格式，成为连接本地文件与 AI 分析 pipeline 的理想桥梁。此外，它还提供了 MCP（模型上下文协议）服务器，可无缝集成到 Claude Desktop 等 LLM 应用中。\n\n这款工具特别适合开发者、数据科学家及 AI 研究人员使用，尤其是那些需要构建文档检索增强生成（RAG）系统、进行批量文本分析或希望让 AI 助手直接“阅读”本地文件的用户。虽然生成的内容也具备一定可读性，但其核心优势在于为机器",93400,"2026-04-06T19:52:38",[52,14],{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":72,"owner_name":73,"owner_avatar_url":74,"owner_bio":75,"owner_company":76,"owner_location":77,"owner_email":78,"owner_twitter":79,"owner_website":80,"owner_url":81,"languages":82,"stars":91,"forks":92,"last_commit_at":93,"license":94,"difficulty_score":95,"env_os":96,"env_gpu":97,"env_ram":96,"env_deps":98,"category_tags":103,"github_topics":104,"view_count":32,"oss_zip_url":78,"oss_zip_packed_at":78,"status":17,"created_at":118,"updated_at":119,"faqs":120,"releases":151},9771,"kyegomez\u002Fzeta","zeta","Build high-performance AI models with modular building blocks","Zeta 是一个基于 PyTorch 的模块化框架，旨在让高性能 AI 模型的开发像搭积木一样简单。它提供了一系列经过精心打磨、测试和优化的高复用组件，帮助开发者快速构建前沿模型，无需重复造轮子。\n\n在 AI 研发中，从零实现复杂的注意力机制或混合专家系统往往耗时且容易出错。Zeta 通过提供标准化的“乐高式”模块解决了这一痛点，让用户能专注于模型架构设计而非底层细节。其内置组件涵盖多种先进注意力机制（如 Flash Attention）、混合专家路由（MoE）、量化技术（BitLinear）以及完整的 Transformer 架构，所有模块均针对性能进行了深度优化，并支持无缝替换现有 PyTorch 代码。\n\n这款工具特别适合 AI 研究人员、深度学习工程师以及希望快速验证想法的开发者使用。无论是需要复现最新论文算法，还是构建生产级应用，Zeta 都能凭借其一贯的可靠性和高效性提供强力支持。借助 Zeta，你可以轻松调用多查询注意力、SwiGLU 激活函数等高级特性，显著降低开发门槛，加速从原型到落地的全过程。","\n![Zeta banner](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkyegomez_zeta_readme_686837b92a91.png)\n\n\u003Cp>\n  \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fkyegomez\u002Fzeta\u002Fblob\u002Fmain\u002FLICENSE\">\n    \u003Cimg alt=\"MIT License\" src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002Flicense-MIT-blue.svg\" \u002F>\n  \u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fpypi.org\u002Fproject\u002Fzetascale\">\n    \u003Cimg alt=\"PyPI\" src=\"https:\u002F\u002Fbadge.fury.io\u002Fpy\u002Fzetascale.svg\" \u002F>\n  \u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fzeta.readthedocs.io\">\n    \u003Cimg alt=\"Docs\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkyegomez_zeta_readme_13d664e1afd7.png\" \u002F>\n  \u003C\u002Fa>\n\u003C\u002Fp>\n\n**Zeta** is a modular PyTorch framework designed to simplify the development of AI models by providing reusable, high-performance building blocks. Think of it as a collection of LEGO blocks for AI each component is carefully crafted, tested, and optimized, allowing you to quickly assemble state-of-the-art models without reinventing the wheel.\n\n\n\u003Cp>\n  \u003Ca href=\"https:\u002F\u002Fdiscord.gg\u002FEamjgSaEQf\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FDiscord-Join%20our%20server-5865F2?style=for-the-badge&logo=discord&logoColor=white\" alt=\"Join our Discord\" \u002F>\n  \u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fwww.youtube.com\u002F@kyegomez3242\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FYouTube-Subscribe-red?style=for-the-badge&logo=youtube&logoColor=white\" alt=\"Subscribe on YouTube\" \u002F>\n  \u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Fkye-g-38759a207\u002F\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FLinkedIn-Connect-blue?style=for-the-badge&logo=linkedin&logoColor=white\" alt=\"Connect on LinkedIn\" \u002F>\n  \u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fx.com\u002Fkyegomezb\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FX.com-Follow-1DA1F2?style=for-the-badge&logo=x&logoColor=white\" alt=\"Follow on X.com\" \u002F>\n  \u003C\u002Fa>\n\u003C\u002Fp>\n\n## Overview\n\nZeta provides a comprehensive library of modular components commonly used in modern AI architectures, including:\n\n- **Attention Mechanisms**: Multi-query attention, sigmoid attention, flash attention, and more\n- **Mixture of Experts (MoE)**: Efficient expert routing and gating mechanisms\n- **Neural Network Modules**: Feedforward networks, activation functions, normalization layers\n- **Quantization**: BitLinear, dynamic quantization, and other optimization techniques\n- **Architectures**: Transformers, encoders, decoders, vision transformers, and complete model implementations\n- **Training Utilities**: Optimization algorithms, logging, and performance monitoring\n\n\nEach component is designed to be:\n- **Modular**: Drop-in replacements that work seamlessly with PyTorch\n- **High-Performance**: Optimized implementations with fused kernels where applicable\n- **Well-Tested**: Comprehensive test coverage ensuring reliability\n- **Production-Ready**: Used in hundreds of models across various domains\n\n## Installation\n\n```bash\npip3 install -U zetascale\n```\n\n## Quick Start\n\n### Multi-Query Attention\n\nMulti-query attention reduces memory usage while maintaining model quality by sharing key and value projections across attention heads.\n\n```python\nimport torch\nfrom zeta import MultiQueryAttention\n\n# Initialize the model\nmodel = MultiQueryAttention(\n    dim=512,\n    heads=8,\n)\n\n# Forward pass\ntext = torch.randn(2, 4, 512)\noutput, _, _ = model(text)\nprint(output.shape)  # torch.Size([2, 4, 512])\n```\n\n### SwiGLU Activation\n\nThe SwiGLU activation function applies a gating mechanism to selectively pass information through the network.\n\n```python\nimport torch\nfrom zeta.nn import SwiGLUStacked\n\nx = torch.randn(5, 10)\nswiglu = SwiGLUStacked(10, 20)\noutput = swiglu(x)\nprint(output.shape)  # torch.Size([5, 20])\n```\n\n### Relative Position Bias\n\nRelative position bias quantizes the distance between positions into buckets and uses embeddings to provide position-aware attention biases.\n\n```python\nimport torch\nfrom torch import nn\nfrom zeta.nn import RelativePositionBias\n\n# Initialize the module\nrel_pos_bias = RelativePositionBias()\n\n# Compute bias for attention mechanism\nbias_matrix = rel_pos_bias(1, 10, 10)\n\n# Use in custom attention\nclass CustomAttention(nn.Module):\n    def __init__(self):\n        super().__init__()\n        self.rel_pos_bias = RelativePositionBias()\n\n    def forward(self, queries, keys):\n        bias = self.rel_pos_bias(queries.size(0), queries.size(1), keys.size(1))\n        # Use bias in attention computation\n        return None\n```\n\n### FeedForward Network\n\nA flexible feedforward module with optional GLU activation and LayerNorm, commonly used in transformer architectures.\n\n```python\nimport torch\nfrom zeta.nn import FeedForward\n\nmodel = FeedForward(256, 512, glu=True, post_act_ln=True, dropout=0.2)\nx = torch.randn(1, 256)\noutput = model(x)\nprint(output.shape)  # torch.Size([1, 512])\n```\n\n### BitLinear Quantization\n\nBitLinear performs linear transformation with quantization and dequantization, reducing memory usage while maintaining performance. Based on [BitNet: Scaling 1-bit Transformers for Large Language Models](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.11453).\n\n```python\nimport torch\nfrom torch import nn\nimport zeta.quant as qt\n\nclass MyModel(nn.Module):\n    def __init__(self):\n        super().__init__()\n        self.linear = qt.BitLinear(10, 20)\n\n    def forward(self, x):\n        return self.linear(x)\n\nmodel = MyModel()\ninput = torch.randn(128, 10)\noutput = model(input)\nprint(output.size())  # torch.Size([128, 20])\n```\n\n### PalmE: Multi-Modal Architecture\n\nA complete implementation of the PalmE multi-modal model architecture, combining a ViT image encoder with a transformer decoder for vision-language tasks.\n\n```python\nimport torch\nfrom zeta.structs import (\n    AutoRegressiveWrapper,\n    Decoder,\n    Encoder,\n    Transformer,\n    ViTransformerWrapper,\n)\n\nclass PalmE(torch.nn.Module):\n    \"\"\"\n    PalmE is a transformer architecture that uses a ViT encoder and a transformer decoder.\n    \n    This implementation demonstrates how to combine Zeta's modular components to build\n    a complete multi-modal model architecture.\n    \"\"\"\n    \n    def __init__(\n        self,\n        image_size=256,\n        patch_size=32,\n        encoder_dim=512,\n        encoder_depth=6,\n        encoder_heads=8,\n        num_tokens=20000,\n        max_seq_len=1024,\n        decoder_dim=512,\n        decoder_depth=6,\n        decoder_heads=8,\n        alibi_num_heads=4,\n        attn_kv_heads=2,\n        use_abs_pos_emb=False,\n        cross_attend=True,\n        alibi_pos_bias=True,\n        rotary_xpos=True,\n        attn_flash=True,\n        qk_norm=True,\n    ):\n        super().__init__()\n        \n        # Vision encoder\n        self.encoder = ViTransformerWrapper(\n            image_size=image_size,\n            patch_size=patch_size,\n            attn_layers=Encoder(\n                dim=encoder_dim, \n                depth=encoder_depth, \n                heads=encoder_heads\n            ),\n        )\n        \n        # Language decoder\n        self.decoder = Transformer(\n            num_tokens=num_tokens,\n            max_seq_len=max_seq_len,\n            use_abs_pos_emb=use_abs_pos_emb,\n            attn_layers=Decoder(\n                dim=decoder_dim,\n                depth=decoder_depth,\n                heads=decoder_heads,\n                cross_attend=cross_attend,\n                alibi_pos_bias=alibi_pos_bias,\n                alibi_num_heads=alibi_num_heads,\n                rotary_xpos=rotary_xpos,\n                attn_kv_heads=attn_kv_heads,\n                attn_flash=attn_flash,\n                qk_norm=qk_norm,\n            ),\n        )\n        \n        # Enable autoregressive generation\n        self.decoder = AutoRegressiveWrapper(self.decoder)\n    \n    def forward(self, img: torch.Tensor, text: torch.Tensor):\n        \"\"\"Forward pass of the model.\"\"\"\n        encoded = self.encoder(img, return_embeddings=True)\n        return self.decoder(text, context=encoded)\n\n# Usage\nimg = torch.randn(1, 3, 256, 256)\ntext = torch.randint(0, 20000, (1, 1024))\nmodel = PalmE()\noutput = model(img, text)\nprint(output.shape)\n```\n\n### U-Net Architecture\n\nA complete U-Net implementation for image segmentation and generative tasks.\n\n```python\nimport torch\nfrom zeta.nn import Unet\n\nmodel = Unet(n_channels=1, n_classes=2)\nx = torch.randn(1, 1, 572, 572)\ny = model(x)\nprint(f\"Input shape: {x.shape}\")\nprint(f\"Output shape: {y.shape}\")\n```\n\n### Vision Embeddings\n\nConvert images into patch embeddings suitable for transformer-based vision models.\n\n```python\nimport torch\nfrom zeta.nn import VisionEmbedding\n\nvision_embedding = VisionEmbedding(\n    img_size=224,\n    patch_size=16,\n    in_chans=3,\n    embed_dim=768,\n    contain_mask_token=True,\n    prepend_cls_token=True,\n)\n\ninput_image = torch.rand(1, 3, 224, 224)\noutput = vision_embedding(input_image)\nprint(output.shape)\n```\n\n### Dynamic Quantization with Niva\n\nNiva provides dynamic quantization for specific layer types, ideal for models with variable runtime activations.\n\n```python\nimport torch\nfrom torch import nn\nfrom zeta import niva\n\n# Load a pre-trained model\nmodel = YourModelClass()\n\n# Quantize the model dynamically\nniva(\n    model=model,\n    model_path=\"path_to_pretrained_weights.pt\",\n    output_path=\"quantized_model.pt\",\n    quant_type=\"dynamic\",\n    quantize_layers=[nn.Linear, nn.Conv2d],\n    dtype=torch.qint8,\n)\n```\n\n### Fused Operations\n\nZeta includes several fused operations that combine multiple operations into single kernels for improved performance.\n\n#### FusedDenseGELUDense\n\nFuses two dense operations with GELU activation for up to 2x speedup.\n\n```python\nimport torch\nfrom zeta.nn import FusedDenseGELUDense\n\nx = torch.randn(1, 512)\nmodel = FusedDenseGELUDense(512, 1024)\nout = model(x)\nprint(out.shape)  # torch.Size([1, 1024])\n```\n\n#### FusedDropoutLayerNorm\n\nFuses dropout and layer normalization for faster feedforward networks.\n\n```python\nimport torch\nfrom zeta.nn import FusedDropoutLayerNorm\n\nmodel = FusedDropoutLayerNorm(dim=512)\nx = torch.randn(1, 512)\noutput = model(x)\nprint(output.shape)  # torch.Size([1, 512])\n```\n\n### Mamba: State Space Model\n\nPyTorch implementation of the Mamba state space model architecture.\n\n```python\nimport torch\nfrom zeta.nn import MambaBlock\n\nblock = MambaBlock(dim=64, depth=1)\nx = torch.randn(1, 10, 64)\ny = block(x)\nprint(y.shape)  # torch.Size([1, 10, 64])\n```\n\n### FiLM: Feature-wise Linear Modulation\n\nFeature-wise Linear Modulation for conditional feature transformation.\n\n```python\nimport torch\nfrom zeta.nn import Film\n\nfilm_layer = Film(dim=128, hidden_dim=64, expanse_ratio=4)\nconditions = torch.randn(10, 128)\nhiddens = torch.randn(10, 1, 128)\nmodulated_features = film_layer(conditions, hiddens)\nprint(modulated_features.shape)  # torch.Size([10, 1, 128])\n```\n\n### Model Optimization\n\nThe `hyper_optimize` decorator` provides a unified interface for multiple optimization techniques.\n\n```python\nimport torch\nfrom zeta.nn import hyper_optimize\n\n@hyper_optimize(\n    torch_fx=False,\n    torch_script=False,\n    torch_compile=True,\n    quantize=True,\n    mixed_precision=True,\n    enable_metrics=True,\n)\ndef model(x):\n    return x @ x\n\nout = model(torch.randn(1, 3, 32, 32))\nprint(out)\n```\n\n### Direct Policy Optimization (DPO)\n\nDPO implementation for reinforcement learning from human feedback (RLHF) applications.\n\n```python\nimport torch\nfrom torch import nn\nfrom zeta.rl import DPO\n\nclass PolicyModel(nn.Module):\n    def __init__(self, dim, output_dim):\n        super().__init__()\n        self.fc = nn.Linear(dim, output_dim)\n    \n    def forward(self, x):\n        return self.fc(x)\n\ndim = 10\noutput_dim = 5\npolicy_model = PolicyModel(dim, output_dim)\ndpo_model = DPO(model=policy_model, beta=0.1)\n\npreferred_seq = torch.randint(0, output_dim, (3, dim))\nunpreferred_seq = torch.randint(0, output_dim, (3, dim))\nloss = dpo_model(preferred_seq, unpreferred_seq)\nprint(loss)\n```\n\n### PyTorch Model Logging\n\nA decorator for comprehensive model execution logging, including parameters, gradients, and memory usage.\n\n```python\nimport torch\nfrom torch import nn\nfrom zeta.utils.verbose_execution import verbose_execution\n\n@verbose_execution(log_params=True, log_gradients=True, log_memory=True)\nclass YourPyTorchModel(nn.Module):\n    def __init__(self):\n        super().__init__()\n        self.conv1 = nn.Conv2d(3, 64, 3)\n        self.relu = nn.ReLU()\n        self.flatten = nn.Flatten()\n        self.fc = nn.Linear(64 * 222 * 222, 10)\n    \n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.relu(x)\n        x = self.flatten(x)\n        x = self.fc(x)\n        return x\n\nmodel = YourPyTorchModel()\ninput_tensor = torch.randn(1, 3, 224, 224)\noutput = model(input_tensor)\n\n# Gradient information requires backward pass\nloss = output.sum()\nloss.backward()\n```\n\n### Sigmoid Attention\n\nAn attention mechanism that replaces softmax with sigmoid, providing up to 18% speedup while maintaining performance.\n\n```python\nimport torch\nfrom zeta import SigmoidAttention\n\nbatch_size = 32\nseq_len = 128\ndim = 512\nheads = 8\n\nx = torch.rand(batch_size, seq_len, dim)\nmask = torch.ones(batch_size, seq_len, seq_len)\n\nsigmoid_attn = SigmoidAttention(dim, heads, seq_len)\noutput = sigmoid_attn(x, mask)\nprint(output.shape)  # torch.Size([32, 128, 512])\n```\n\n## Documentation\n\nComprehensive documentation is available at [zeta.apac.ai](https:\u002F\u002Fzeta.apac.ai\u002F).\n\n## Quick Examples\n\nThere are various examples that you can try out in the [examples folder](examples\u002FREADME.md)\n\n## Running Tests\n\nTo run the full test suite:\n\n```bash\npython3 -m pip install -e '.[testing]'  # Install extra dependencies for testing\npython3 -m pytest tests\u002F                # Run the entire test suite\n```\n\nFor more details, refer to the CI workflow configuration.\n\n## Community\n\nJoin our growing community for real-time support, ideas, and discussions on building better AI models.\n\n| Platform    | Link                                                                         | Description                 |\n|-------------|------------------------------------------------------------------------------|-----------------------------|\n| Docs        | [zeta.apac.ai](https:\u002F\u002Fzeta.apac.ai)                                         | Official documentation      |\n| Discord     | [Join our Discord](https:\u002F\u002Fdiscord.gg\u002FEamjgSaEQf)                            | Live chat & community       |\n| Twitter     | [@kyegomez](https:\u002F\u002Ftwitter.com\u002Fkyegomez)                                    | Follow for updates          |\n| LinkedIn    | [The Swarm Corporation](https:\u002F\u002Fwww.linkedin.com\u002Fcompany\u002Fthe-swarm-corporation) | Connect professionally      |\n| YouTube     | [YouTube Channel](https:\u002F\u002Fwww.youtube.com\u002Fchannel\u002FUC9yXyitkbU_WSy7bd_41SqQ)  | Watch our videos            |\n\n## Contributing\n\nZeta is an open-source project, and contributions are welcome! If you want to create new features, fix bugs, or improve the infrastructure, we'd love to have you contribute.\n\n**Getting Started:**\n\n- Pick any issue with the `good first issue` tag to get started\n- Read our [Contributing Guidelines](CONTRIBUTING.md)\n- Check out our [contributing board](https:\u002F\u002Fgithub.com\u002Fusers\u002Fkyegomez\u002Fprojects\u002F1) for roadmap discussions\n\n**Report Issues:**\n\n- [Bug Report](https:\u002F\u002Fgithub.com\u002Fkyegomez\u002Fzeta\u002Fissues\u002Fnew\u002Fchoose)\n- [Feature Request](https:\u002F\u002Fgithub.com\u002Fkyegomez\u002Fzeta\u002Fissues\u002Fnew\u002Fchoose)\n\n## Our Contributors\n\nThank you to all of our contributors who have built this great framework 🙌\n\n\u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fkyegomez\u002Fzeta\u002Fgraphs\u002Fcontributors\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkyegomez_zeta_readme_c5c45c36af60.png\" alt=\"Contributors\" \u002F>\n\u003C\u002Fa>\n\n---\n\n\n## Citation\n\nIf you use Zeta in your research or projects, please cite it:\n\n```bibtex\n@misc{zetascale,\n    title = {Zetascale Framework},\n    author = {Kye Gomez},\n    year = {2024},\n    howpublished = {\\url{https:\u002F\u002Fgithub.com\u002Fkyegomez\u002Fzeta}},\n}\n```\n\n## License\n\nApache 2.0 License\n","![Zeta 横幅](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkyegomez_zeta_readme_686837b92a91.png)\n\n\u003Cp>\n  \u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fkyegomez\u002Fzeta\u002Fblob\u002Fmain\u002FLICENSE\">\n    \u003Cimg alt=\"MIT 许可证\" src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002Flicense-MIT-blue.svg\" \u002F>\n  \u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fpypi.org\u002Fproject\u002Fzetascale\">\n    \u003Cimg alt=\"PyPI\" src=\"https:\u002F\u002Fbadge.fury.io\u002Fpy\u002Fzetascale.svg\" \u002F>\n  \u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fzeta.readthedocs.io\">\n    \u003Cimg alt=\"文档\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkyegomez_zeta_readme_13d664e1afd7.png\" \u002F>\n  \u003C\u002Fa>\n\u003C\u002Fp>\n\n**Zeta** 是一个模块化的 PyTorch 框架，旨在通过提供可重用、高性能的构建模块来简化 AI 模型的开发。你可以把它想象成一套用于 AI 的乐高积木——每个组件都经过精心设计、测试和优化，使你能够快速搭建最先进的模型，而无需从头开始重新发明轮子。\n\n\n\u003Cp>\n  \u003Ca href=\"https:\u002F\u002Fdiscord.gg\u002FEamjgSaEQf\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FDiscord-Join%20our%20server-5865F2?style=for-the-badge&logo=discord&logoColor=white\" alt=\"加入我们的 Discord\" \u002F>\n  \u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fwww.youtube.com\u002F@kyegomez3242\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FYouTube-Subscribe-red?style=for-the-badge&logo=youtube&logoColor=white\" alt=\"在 YouTube 上订阅\" \u002F>\n  \u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fwww.linkedin.com\u002Fin\u002Fkye-g-38759a207\u002F\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FLinkedIn-Connect-blue?style=for-the-badge&logo=linkedin&logoColor=white\" alt=\"在 LinkedIn 上联系\" \u002F>\n  \u003C\u002Fa>\n  \u003Ca href=\"https:\u002F\u002Fx.com\u002Fkyegomezb\">\n    \u003Cimg src=\"https:\u002F\u002Fimg.shields.io\u002Fbadge\u002FX.com-Follow-1DA1F2?style=for-the-badge&logo=x&logoColor=white\" alt=\"在 X.com 上关注\" \u002F>\n  \u003C\u002Fa>\n\u003C\u002Fp>\n\n## 概述\n\nZeta 提供了一个全面的模块化组件库，这些组件广泛应用于现代 AI 架构中，包括：\n\n- **注意力机制**：多查询注意力、sigmoid 注意力、FlashAttention 等\n- **专家混合（MoE）**：高效的专家路由和门控机制\n- **神经网络模块**：前馈网络、激活函数、归一化层\n- **量化**：BitLinear、动态量化等优化技术\n- **架构**：Transformer、编码器、解码器、视觉 Transformer，以及完整的模型实现\n- **训练工具**：优化算法、日志记录和性能监控\n\n\n每个组件的设计特点如下：\n- **模块化**：即插即用，与 PyTorch 无缝兼容\n- **高性能**：在适用情况下采用融合内核进行优化实现\n- **经过充分测试**：拥有全面的测试覆盖率，确保可靠性\n- **生产就绪**：已在多个领域的数百个模型中得到应用\n\n## 安装\n\n```bash\npip3 install -U zetascale\n```\n\n## 快速入门\n\n### 多查询注意力\n\n多查询注意力通过在不同注意力头之间共享键和值投影，从而在保持模型质量的同时降低内存占用。\n\n```python\nimport torch\nfrom zeta import MultiQueryAttention\n\n# 初始化模型\nmodel = MultiQueryAttention(\n    dim=512,\n    heads=8,\n)\n\n# 前向传播\ntext = torch.randn(2, 4, 512)\noutput, _, _ = model(text)\nprint(output.shape)  # torch.Size([2, 4, 512])\n```\n\n### SwiGLU 激活函数\n\nSwiGLU 激活函数通过门控机制有选择地在网络中传递信息。\n\n```python\nimport torch\nfrom zeta.nn import SwiGLUStacked\n\nx = torch.randn(5, 10)\nswiglu = SwiGLUStacked(10, 20)\noutput = swiglu(x)\nprint(output.shape)  # torch.Size([5, 20])\n```\n\n### 相对位置偏置\n\n相对位置偏置将位置之间的距离量化为若干桶，并利用嵌入向量提供位置感知的注意力偏置。\n\n```python\nimport torch\nfrom torch import nn\nfrom zeta.nn import RelativePositionBias\n\n# 初始化模块\nrel_pos_bias = RelativePositionBias()\n\n# 计算注意力机制的偏置矩阵\nbias_matrix = rel_pos_bias(1, 10, 10)\n\n# 在自定义注意力中使用\nclass CustomAttention(nn.Module):\n    def __init__(self):\n        super().__init__()\n        self.rel_pos_bias = RelativePositionBias()\n\n    def forward(self, queries, keys):\n        bias = self.rel_pos_bias(queries.size(0), queries.size(1), keys.size(1))\n        # 在注意力计算中使用偏置\n        return None\n```\n\n### 前馈网络\n\n一个灵活的前馈模块，带有可选的 GLU 激活函数和 LayerNorm，常用于 Transformer 架构中。\n\n```python\nimport torch\nfrom zeta.nn import FeedForward\n\nmodel = FeedForward(256, 512, glu=True, post_act_ln=True, dropout=0.2)\nx = torch.randn(1, 256)\noutput = model(x)\nprint(output.shape)  # torch.Size([1, 512])\n```\n\n### BitLinear 量化\n\nBitLinear 通过量化和反量化执行线性变换，能够在保持性能的同时减少内存占用。其灵感来源于 [BitNet: 面向大型语言模型的 1 位 Transformer 扩展](https:\u002F\u002Farxiv.org\u002Fabs\u002F2310.11453)。\n\n```python\nimport torch\nfrom torch import nn\nimport zeta.quant as qt\n\nclass MyModel(nn.Module):\n    def __init__(self):\n        super().__init__()\n        self.linear = qt.BitLinear(10, 20)\n\n    def forward(self, x):\n        return self.linear(x)\n\nmodel = MyModel()\ninput = torch.randn(128, 10)\noutput = model(input)\nprint(output.size())  # torch.Size([128, 20])\n```\n\n### PalmE：多模态架构\n\nPalmE 是一种完整的多模态模型架构实现，结合了 ViT 图像编码器和 Transformer 解码器，用于视觉-语言任务。\n\n```python\nimport torch\nfrom zeta.structs import (\n    AutoRegressiveWrapper,\n    Decoder,\n    Encoder,\n    Transformer,\n    ViTransformerWrapper,\n)\n\nclass PalmE(torch.nn.Module):\n    \"\"\"\n    PalmE 是一种使用 ViT 编码器和 Transformer 解码器的 Transformer 架构。\n    \n    该实现展示了如何结合 Zeta 的模块化组件来构建一个完整的多模态模型架构。\n    \"\"\"\n    \n    def __init__(\n        self,\n        image_size=256,\n        patch_size=32,\n        encoder_dim=512,\n        encoder_depth=6,\n        encoder_heads=8,\n        num_tokens=20000,\n        max_seq_len=1024,\n        decoder_dim=512,\n        decoder_depth=6,\n        decoder_heads=8,\n        alibi_num_heads=4,\n        attn_kv_heads=2,\n        use_abs_pos_emb=False,\n        cross_attend=True,\n        alibi_pos_bias=True,\n        rotary_xpos=True,\n        attn_flash=True,\n        qk_norm=True,\n    ):\n        super().__init__()\n        \n        # 视觉编码器\n        self.encoder = ViTransformerWrapper(\n            image_size=image_size,\n            patch_size=patch_size,\n            attn_layers=Encoder(\n                dim=encoder_dim, \n                depth=encoder_depth, \n                heads=encoder_heads\n            ),\n        )\n        \n        # 语言解码器\n        self.decoder = Transformer(\n            num_tokens=num_tokens,\n            max_seq_len=max_seq_len,\n            use_abs_pos_emb=use_abs_pos_emb,\n            attn_layers=Decoder(\n                dim=decoder_dim,\n                depth=decoder_depth,\n                heads=decoder_heads,\n                cross_attend=cross_attend,\n                alibi_pos_bias=alibi_pos_bias,\n                alibi_num_heads=alibi_num_heads,\n                rotary_xpos=rotary_xpos,\n                attn_kv_heads=attn_kv_heads,\n                attn_flash=attn_flash,\n                qk_norm=qk_norm,\n            ),\n        )\n        \n        # 启用自回归生成\n        self.decoder = AutoRegressiveWrapper(self.decoder)\n    \n    def forward(self, img: torch.Tensor, text: torch.Tensor):\n        \"\"\"模型前向传播。\"\"\"\n        encoded = self.encoder(img, return_embeddings=True)\n        return self.decoder(text, context=encoded)\n\n# 使用示例\nimg = torch.randn(1, 3, 256, 256)\ntext = torch.randint(0, 20000, (1, 1024))\nmodel = PalmE()\noutput = model(img, text)\nprint(output.shape)\n```\n\n### U-Net 架构\n\n用于图像分割和生成任务的完整 U-Net 实现。\n\n```python\nimport torch\nfrom zeta.nn import Unet\n\nmodel = Unet(n_channels=1, n_classes=2)\nx = torch.randn(1, 1, 572, 572)\ny = model(x)\nprint(f\"输入形状: {x.shape}\")\nprint(f\"输出形状: {y.shape}\")\n```\n\n### 视觉嵌入\n\n将图像转换为适合基于 Transformer 的视觉模型的补丁嵌入。\n\n```python\nimport torch\nfrom zeta.nn import VisionEmbedding\n\nvision_embedding = VisionEmbedding(\n    img_size=224,\n    patch_size=16,\n    in_chans=3,\n    embed_dim=768,\n    contain_mask_token=True,\n    prepend_cls_token=True,\n)\n\ninput_image = torch.rand(1, 3, 224, 224)\noutput = vision_embedding(input_image)\nprint(output.shape)\n```\n\n### 使用 Niva 进行动态量化\n\nNiva 提供针对特定层类型的动态量化功能，非常适合具有可变运行时激活的模型。\n\n```python\nimport torch\nfrom torch import nn\nfrom zeta import niva\n\n# 加载预训练模型\nmodel = YourModelClass()\n\n# 对模型进行动态量化\nniva(\n    model=model,\n    model_path=\"path_to_pretrained_weights.pt\",\n    output_path=\"quantized_model.pt\",\n    quant_type=\"dynamic\",\n    quantize_layers=[nn.Linear, nn.Conv2d],\n    dtype=torch.qint8,\n)\n```\n\n### 融合操作\n\nZeta 包含多种融合操作，可以将多个操作合并为单个内核，从而提升性能。\n\n#### FusedDenseGELUDense\n\n将两个全连接层与 GELU 激活函数融合，速度最高可提升 2 倍。\n\n```python\nimport torch\nfrom zeta.nn import FusedDenseGELUDense\n\nx = torch.randn(1, 512)\nmodel = FusedDenseGELUDense(512, 1024)\nout = model(x)\nprint(out.shape)  # torch.Size([1, 1024])\n```\n\n#### FusedDropoutLayerNorm\n\n将 Dropout 和层归一化融合，以加快前馈网络的速度。\n\n```python\nimport torch\nfrom zeta.nn import FusedDropoutLayerNorm\n\nmodel = FusedDropoutLayerNorm(dim=512)\nx = torch.randn(1, 512)\noutput = model(x)\nprint(output.shape)  # torch.Size([1, 512])\n```\n\n### Mamba：状态空间模型\n\nMamba 状态空间模型架构的 PyTorch 实现。\n\n```python\nimport torch\nfrom zeta.nn import MambaBlock\n\nblock = MambaBlock(dim=64, depth=1)\nx = torch.randn(1, 10, 64)\ny = block(x)\nprint(y.shape)  # torch.Size([1, 10, 64])\n```\n\n### FiLM：特征线性调制\n\n用于条件特征变换的特征线性调制。\n\n```python\nimport torch\nfrom zeta.nn import Film\n\nfilm_layer = Film(dim=128, hidden_dim=64, expanse_ratio=4)\nconditions = torch.randn(10, 128)\nhiddens = torch.randn(10, 1, 128)\nmodulated_features = film_layer(conditions, hiddens)\nprint(modulated_features.shape)  # torch.Size([10, 1, 128])\n```\n\n### 模型优化\n\n`hyper_optimize` 装饰器提供了一个统一的接口，用于多种优化技术。\n\n```python\nimport torch\nfrom zeta.nn import hyper_optimize\n\n@hyper_optimize(\n    torch_fx=False,\n    torch_script=False,\n    torch_compile=True,\n    quantize=True,\n    mixed_precision=True,\n    enable_metrics=True,\n)\ndef model(x):\n    return x @ x\n\nout = model(torch.randn(1, 3, 32, 32))\nprint(out)\n```\n\n### 直接策略优化（DPO）\n\n用于从人类反馈中学习（RLHF）应用的 DPO 实现。\n\n```python\nimport torch\nfrom torch import nn\nfrom zeta.rl import DPO\n\nclass PolicyModel(nn.Module):\n    def __init__(self, dim, output_dim):\n        super().__init__()\n        self.fc = nn.Linear(dim, output_dim)\n    \n    def forward(self, x):\n        return self.fc(x)\n\ndim = 10\noutput_dim = 5\npolicy_model = PolicyModel(dim, output_dim)\ndpo_model = DPO(model=policy_model, beta=0.1)\n\npreferred_seq = torch.randint(0, output_dim, (3, dim))\nunpreferred_seq = torch.randint(0, output_dim, (3, dim))\nloss = dpo_model(preferred_seq, unpreferred_seq)\nprint(loss)\n```\n\n### PyTorch 模型日志记录\n\n一个用于全面模型执行日志记录的装饰器，包括参数、梯度和内存使用情况。\n\n```python\nimport torch\nfrom torch import nn\nfrom zeta.utils.verbose_execution import verbose_execution\n\n@verbose_execution(log_params=True, log_gradients=True, log_memory=True)\nclass YourPyTorchModel(nn.Module):\n    def __init__(self):\n        super().__init__()\n        self.conv1 = nn.Conv2d(3, 64, 3)\n        self.relu = nn.ReLU()\n        self.flatten = nn.Flatten()\n        self.fc = nn.Linear(64 * 222 * 222, 10)\n    \n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.relu(x)\n        x = self.flatten(x)\n        x = self.fc(x)\n        return x\n\nmodel = YourPyTorchModel()\ninput_tensor = torch.randn(1, 3, 224, 224)\noutput = model(input_tensor)\n\n# 梯度信息需要反向传播\nloss = output.sum()\nloss.backward()\n```\n\n### Sigmoid 注意力机制\n\n一种用 sigmoid 替代 softmax 的注意力机制，可在保持性能的同时提供高达 18% 的速度提升。\n\n```python\nimport torch\nfrom zeta import SigmoidAttention\n\nbatch_size = 32\nseq_len = 128\ndim = 512\nheads = 8\n\nx = torch.rand(batch_size, seq_len, dim)\nmask = torch.ones(batch_size, seq_len, seq_len)\n\nsigmoid_attn = SigmoidAttention(dim, heads, seq_len)\noutput = sigmoid_attn(x, mask)\nprint(output.shape)  # torch.Size([32, 128, 512])\n```\n\n## 文档\n\n完整的文档可在 [zeta.apac.ai](https:\u002F\u002Fzeta.apac.ai\u002F) 上找到。\n\n## 快速示例\n\n您可以在 [examples 文件夹](examples\u002FREADME.md) 中尝试各种示例。\n\n## 运行测试\n\n要运行完整的测试套件：\n\n```bash\npython3 -m pip install -e '.[testing]'  # 安装测试所需的额外依赖\npython3 -m pytest tests\u002F                # 运行整个测试套件\n```\n\n更多详细信息，请参阅 CI 工作流配置。\n\n## 社区\n\n加入我们不断壮大的社区，获取实时支持、创意以及关于构建更优 AI 模型的讨论。\n\n| 平台    | 链接                                                                         | 描述                 |\n|-------------|------------------------------------------------------------------------------|-----------------------------|\n| 文档        | [zeta.apac.ai](https:\u002F\u002Fzeta.apac.ai)                                         | 官方文档      |\n| Discord     | [加入我们的 Discord](https:\u002F\u002Fdiscord.gg\u002FEamjgSaEQf)                            | 实时聊天与社区       |\n| Twitter     | [@kyegomez](https:\u002F\u002Ftwitter.com\u002Fkyegomez)                                    | 关注以获取最新动态          |\n| LinkedIn    | [The Swarm Corporation](https:\u002F\u002Fwww.linkedin.com\u002Fcompany\u002Fthe-swarm-corporation) | 专业交流      |\n| YouTube     | [YouTube 频道](https:\u002F\u002Fwww.youtube.com\u002Fchannel\u002FUC9yXyitkbU_WSy7bd_41SqQ)  | 观看我们的视频            |\n\n## 贡献\n\nZeta 是一个开源项目，欢迎各位贡献！如果您想添加新功能、修复 bug 或改进基础设施，我们都十分期待您的参与。\n\n**开始贡献：**\n\n- 选择带有 `good first issue` 标签的问题作为起点\n- 阅读我们的 [贡献指南](CONTRIBUTING.md)\n- 查看我们的 [贡献看板](https:\u002F\u002Fgithub.com\u002Fusers\u002Fkyegomez\u002Fprojects\u002F1) 以了解路线图讨论\n\n**报告问题：**\n\n- [Bug 报告](https:\u002F\u002Fgithub.com\u002Fkyegomez\u002Fzeta\u002Fissues\u002Fnew\u002Fchoose)\n- [功能请求](https:\u002F\u002Fgithub.com\u002Fkyegomez\u002Fzeta\u002Fissues\u002Fnew\u002Fchoose)\n\n## 我们的贡献者\n\n感谢所有为我们构建这一优秀框架的贡献者 🙌\n\n\u003Ca href=\"https:\u002F\u002Fgithub.com\u002Fkyegomez\u002Fzeta\u002Fgraphs\u002Fcontributors\">\n  \u003Cimg src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkyegomez_zeta_readme_c5c45c36af60.png\" alt=\"贡献者\" \u002F>\n\u003C\u002Fa>\n\n---\n\n\n## 引用\n\n如果您在研究或项目中使用 Zeta，请引用如下：\n\n```bibtex\n@misc{zetascale,\n    title = {Zetascale 框架},\n    author = {Kye Gomez},\n    year = {2024},\n    howpublished = {\\url{https:\u002F\u002Fgithub.com\u002Fkyegomez\u002Fzeta}},\n}\n```\n\n## 许可证\n\nApache 2.0 许可证","# Zeta 快速上手指南\n\nZeta 是一个模块化的 PyTorch 框架，旨在通过提供可复用、高性能的构建块来简化 AI 模型的开发。你可以将其视为 AI 领域的\"LEGO 积木”，快速组装出最先进的模型架构。\n\n## 环境准备\n\n在开始之前，请确保你的开发环境满足以下要求：\n\n*   **操作系统**: Linux, macOS, 或 Windows\n*   **Python 版本**: Python 3.8 及以上\n*   **核心依赖**: PyTorch (建议安装与你的 CUDA 版本匹配的最新版)\n*   **包管理工具**: pip\n\n## 安装步骤\n\n使用 pip 直接安装最新版本的 `zetascale` 包：\n\n```bash\npip3 install -U zetascale\n```\n\n> **提示**：国内开发者若遇到下载速度慢的问题，可使用清华源或阿里源加速安装：\n> ```bash\n> pip3 install -U zetascale -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n> ```\n\n## 基本使用\n\nZeta 的核心优势在于其模块化组件。以下是一个最基础的示例，展示如何使用 **Multi-Query Attention**（多查询注意力机制）模块。该机制能在保持模型质量的同时显著降低显存占用。\n\n### 示例：使用 Multi-Query Attention\n\n```python\nimport torch\nfrom zeta import MultiQueryAttention\n\n# 1. 初始化模型组件\n# dim: 嵌入维度，heads: 注意力头数\nmodel = MultiQueryAttention(\n    dim=512,\n    heads=8,\n)\n\n# 2. 准备输入数据 (Batch_Size, Sequence_Length, Dimension)\ntext = torch.randn(2, 4, 512)\n\n# 3. 执行前向传播\noutput, _, _ = model(text)\n\n# 4. 查看输出形状\nprint(output.shape)  # 输出: torch.Size([2, 4, 512])\n```\n\n### 更多常用组件\n\nZeta 提供了丰富的即插即用模块，以下是几个高频使用的场景：\n\n**1. SwiGLU 激活函数**\n```python\nimport torch\nfrom zeta.nn import SwiGLUStacked\n\nx = torch.randn(5, 10)\nswiglu = SwiGLUStacked(10, 20)\noutput = swiglu(x)\nprint(output.shape)  # torch.Size([5, 20])\n```\n\n**2. 量化线性层 (BitLinear)**\n基于 BitNet 技术，减少内存占用并保持性能。\n```python\nimport torch\nfrom torch import nn\nimport zeta.quant as qt\n\nclass MyModel(nn.Module):\n    def __init__(self):\n        super().__init__()\n        self.linear = qt.BitLinear(10, 20)\n\n    def forward(self, x):\n        return self.linear(x)\n\nmodel = MyModel()\ninput = torch.randn(128, 10)\noutput = model(input)\nprint(output.size())  # torch.Size([128, 20])\n```\n\n**3. 完整架构示例：U-Net**\n直接调用完整的 U-Net 架构用于图像分割任务。\n```python\nimport torch\nfrom zeta.nn import Unet\n\nmodel = Unet(n_channels=1, n_classes=2)\nx = torch.randn(1, 1, 572, 572)\ny = model(x)\nprint(f\"Output shape: {y.shape}\")\n```\n\n通过以上示例，你可以快速将 Zeta 的高性能模块集成到自己的 PyTorch 项目中，无需重复造轮子。","某初创团队正在研发一款面向垂直领域的轻量级大语言模型，需要在有限算力下快速验证多种前沿架构（如混合专家 MoE 和高效注意力机制）的效果。\n\n### 没有 zeta 时\n- **重复造轮子耗时**：工程师需手动从零编写 Flash Attention、SwiGLU 激活函数等复杂算子，花费数周时间调试底层代码而非关注模型逻辑。\n- **性能优化困难**：自研模块缺乏内核级融合优化，导致显存占用高、训练速度慢，难以在单卡环境下跑通大参数量实验。\n- **架构试错成本高**：想要切换不同的注意力机制（如从多头注意力改为多查询注意力）或引入相对位置编码，需要大幅重构网络结构，极易引入 Bug。\n- **稳定性无保障**：自定义组件缺乏充分的单元测试覆盖，在长周期训练中常出现梯度爆炸或数值不稳定，排查问题耗费大量精力。\n\n### 使用 zeta 后\n- **积木式快速搭建**：直接调用 zeta 预置的模块化组件（如 `MultiQueryAttention`、`SwiGLUStacked`），像拼乐高一样在几小时内组装出 SOTA 模型架构。\n- **开箱即用的性能**：内置组件已针对 PyTorch 进行内核融合与量化优化（如 BitLinear），显著降低显存开销并提升训练吞吐量。\n- **灵活的低成本试错**：通过简单的参数配置即可无缝替换注意力机制或插入 MoE 路由模块，轻松对比不同架构对最终效果的影响。\n- **生产级可靠性**：依托 zeta 完善的测试覆盖和工业界验证记录，模型训练过程稳定可控，团队能将重心完全转移到业务数据调优上。\n\nzeta 让算法团队从繁琐的底层实现中解放出来，将模型迭代周期从“月”级缩短至“天”级，真正实现了高性能 AI 模型的敏捷开发。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fkyegomez_zeta_686837b9.png","kyegomez","Kye Gomez","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fkyegomez_0b95c3bb.jpg","Founder of swarms.ai","Swarms","Palo Alto",null,"KyeGomezB","https:\u002F\u002Fgithub.com\u002Fkyegomez\u002Fswarms","https:\u002F\u002Fgithub.com\u002Fkyegomez",[83,87],{"name":84,"color":85,"percentage":86},"Python","#3572A5",100,{"name":88,"color":89,"percentage":90},"Dockerfile","#384d54",0,580,55,"2026-04-11T03:50:37","Apache-2.0",1,"未说明","未说明 (基于 PyTorch，部分功能如 Flash Attention 可能需要 NVIDIA GPU)",{"notes":99,"python":96,"dependencies":100},"README 中未明确列出具体的操作系统、GPU 型号、显存大小、内存需求及 Python 版本要求。该工具是一个模块化的 PyTorch 框架，安装命令为 `pip3 install -U zetascale`。部分高级功能（如 Flash Attention、BitLinear 量化、Mamba 架构）通常依赖特定的硬件加速或额外的底层库支持，建议参考官方文档或源码获取更详细的运行环境配置。",[101,102],"torch","zetascale",[35,14],[105,106,107,108,109,110,111,112,113,114,115,116,117],"transformers","pytorch","attention-mechanism","attention-model","chatgpt","ffns","llms","openai","tensorflow","transformer-architecture","lucidrains","pytorch-implementation","pytorch-tutorial","2026-03-27T02:49:30.150509","2026-04-20T04:06:00.871825",[121,126,131,136,141,146],{"id":122,"question_zh":123,"answer_zh":124,"source_url":125},43873,"为什么在使用 Mamba 网络时反向传播计算非常慢？","这个问题通常与 'backscan' 操作的实现效率有关。维护者已确认该部分是性能瓶颈。建议检查是否使用了 CUDA 加速，并关注后续版本更新，因为维护者表示已针对此问题进行修复或优化。如果问题依旧，请确保输入张量和模型参数都在同一设备（如 GPU）上。","https:\u002F\u002Fgithub.com\u002Fkyegomez\u002Fzeta\u002Fissues\u002F220",{"id":127,"question_zh":128,"answer_zh":129,"source_url":130},43874,"遇到 'Expected all tensors to be on the same device' (cuda:0 和 cpu) 错误怎么办？","这是因为模型内部创建的张量默认在 CPU 上，而输入数据在 GPU 上。解决方法是修改源码 `zeta\u002Fnn\u002Fmodules\u002Fsimple_mamba.py` 第 202 行，将创建零张量的代码改为自动匹配模型所在设备：\n`x = torch.zeros((b, d_in, n)).to(next(self.parameters()).device)`\n或者在调用模块前确保所有输入和子模块都显式移动到同一设备。","https:\u002F\u002Fgithub.com\u002Fkyegomez\u002Fzeta\u002Fissues\u002F181",{"id":132,"question_zh":133,"answer_zh":134,"source_url":135},43875,"在 MacOS (MPS 后端) 上导入或使用 MambaBlock 时报错怎么办？","这通常是由于 PyTorch MPS 后端的默认数据类型与代码预期不一致导致的。首先尝试升级 zeta 库到最新版本：`pip install -U zetascale`。如果问题仍然存在，可能是因为 MPS 后端对某些算子支持不完善，建议检查张量数据类型是否为 Float 或 Long，并确保在初始化模型前正确处理了设备兼容性问题。","https:\u002F\u002Fgithub.com\u002Fkyegomez\u002Fzeta\u002Fissues\u002F127",{"id":137,"question_zh":138,"answer_zh":139,"source_url":140},43876,"运行测试时出现 'ModuleNotFoundError: No module named ...' 错误如何解决？","这通常是因为本地代码库未同步或 pip 安装的版本过旧，导致测试文件引用了新版本才有的模块。解决方法是同步最新的代码仓库或升级 pip 包：\n1. 如果是源码安装，执行 `git pull` 同步最新代码。\n2. 如果是 pip 安装，执行 `pip install -U zeta` (或 `zetascale`) 升级到最新版本。","https:\u002F\u002Fgithub.com\u002Fkyegomez\u002Fzeta\u002Fissues\u002F83",{"id":142,"question_zh":143,"answer_zh":144,"source_url":145},43877,"Zeta 库强制锁定了 torchvision 版本，导致无法使用新版 PyTorch 怎么办？","这是一个已知的依赖限制问题。维护者已确认可以放宽该限制。如果遇到依赖冲突（例如需要 PyTorch 2.3.1 但被 torchvision 0.18.0 限制），建议暂时手动修改 `requirements.txt` 或使用 `pip install --no-deps` 安装后手动安装兼容版本的 torchvision，同时关注官方发布的更新版本以获取修复后的依赖配置。","https:\u002F\u002Fgithub.com\u002Fkyegomez\u002Fzeta\u002Fissues\u002F232",{"id":147,"question_zh":148,"answer_zh":149,"source_url":150},43878,"VisualExpert 或其他模块报设备不匹配错误 (cuda:0 vs cpu) 如何修复？","当在 `visual_expert` 等模块中遇到层归一化（LayerNorm）的设备不匹配错误时，原因是模块内部的权重参数未随输入数据移动到 GPU。除了检查输入数据 `.to(device)` 外，还需确保模型实例本身已正确移动到目标设备（例如 `model.to(device)`）。如果是库内部代码问题，参考类似 Issue #181 的修复方案，在源码中显式指定新创建张量的设备为 `next(self.parameters()).device`。","https:\u002F\u002Fgithub.com\u002Fkyegomez\u002Fzeta\u002Fissues\u002F197",[152,157,161,165,169,173],{"id":153,"version":154,"summary_zh":155,"released_at":156},351314,"2.3.7","# 更改日志报告\n[功能]-[模块]: [return_loss_text]: 添加 [return_loss_text] 函数，以提升损失计算的可读性\n[功能]-[模块]: [calc_z_loss]: 引入 [calc_z_loss] 函数，用于在模型训练中计算 Z 损失\n[功能]-[模块]: [max_neg_value]: 实现 [max_neg_value] 函数，用于处理计算中的负值\n[功能]-[模块]: [TextTokenEmbedding]: 部署 [TextTokenEmbedding]，以改进文本标记嵌入功能\n[功能]-[模块]: [dropout_seq]: 添加 [dropout_seq] 函数，用于神经网络层中的序列丢弃\n[功能]-[模块]: [transformer_generate]: 引入 [transformer_generate] 函数，用于高效的 Transformer 文本生成\n[功能]-[模块]: [vit_output_head]: 添加 [vit_output_head]，用于 Vision Transformer 模型输出的处理\n[功能]-[模块]: [patch_linear_flatten]: 实现 [patch_linear_flatten]，用于 ViT 中线性补丁展平的优化\n[功能]-[模块]: [ScalableImgSelfAttention]: 引入 [ScalableImgSelfAttention]，用于可扩展的图像自注意力机制\n\n\n## 简介\n\n本更改日志报告详细列出了 Zeta 神经网络模块的最新功能新增内容。每条记录都描述了该功能的目的、实现细节以及其对系统性能或功能的预期影响。我们的重点在于提升神经网络操作的鲁棒性、效率和可扩展性，特别是针对损失计算、标记嵌入、丢弃序列和注意力机制等方面的改进。\n\n## 条目\n\n### [功能]-[模块]: [return_loss_text]\n\n#### 目的\n\n引入 `return_loss_text` 函数旨在为神经网络训练过程中的损失计算提供一种更直观、更易读的方式。通过将损失值转换为文本描述，开发人员和研究人员可以更轻松地解释和传达训练迭代的效果。\n\n#### 实现细节\n\n该函数在 `return_loss_text` 模块中实现，接收数值型损失数据作为输入，并生成一段描述性字符串，总结损失的大小及对模型性能的潜在影响。函数利用预定义的损失范围描述符对损失值进行分类，从而一目了然地提供洞察信息。\n\n#### 预期影响\n\n此功能预计将增强模型开发中的调试和优化阶段，使调整更加迅速，并对模型行为有更直观的理解。通过提供人类可读的损失描述，它弥合了原始数据分析与实际应用洞察之间的差距。\n\n### [功能]-[模块]: [calc_z_loss]\n\n#### 目的\n\n引入 `calc_z_loss` 函数是为了计算 Z 损失，这是一种新颖的指标，旨在通过调整训练数据中的特定不平衡和偏差来优化模型性能。该函数对于处理异构数据集且标准损失函数无法有效应对的模型至关重要。","2024-04-06T02:57:58",{"id":158,"version":159,"summary_zh":78,"released_at":160},351315,"0.0.3","2023-07-10T17:40:05",{"id":162,"version":163,"summary_zh":78,"released_at":164},351316,"0.0.2","2023-07-10T04:19:48",{"id":166,"version":167,"summary_zh":78,"released_at":168},351317,"0.0.111","2023-07-10T03:06:20",{"id":170,"version":171,"summary_zh":78,"released_at":172},351318,"0.0.11","2023-07-10T03:00:03",{"id":174,"version":175,"summary_zh":78,"released_at":176},351319,"0.0.1","2023-07-10T02:52:02"]