[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-GoodStartLabs--AI_Diplomacy":3,"tool-GoodStartLabs--AI_Diplomacy":64},[4,17,27,35,43,56],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":16},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,3,"2026-04-05T11:01:52",[13,14,15],"开发框架","图像","Agent","ready",{"id":18,"name":19,"github_repo":20,"description_zh":21,"stars":22,"difficulty_score":23,"last_commit_at":24,"category_tags":25,"status":16},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",138956,2,"2026-04-05T11:33:21",[13,15,26],"语言模型",{"id":28,"name":29,"github_repo":30,"description_zh":31,"stars":32,"difficulty_score":23,"last_commit_at":33,"category_tags":34,"status":16},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",107662,"2026-04-03T11:11:01",[13,14,15],{"id":36,"name":37,"github_repo":38,"description_zh":39,"stars":40,"difficulty_score":23,"last_commit_at":41,"category_tags":42,"status":16},3704,"NextChat","ChatGPTNextWeb\u002FNextChat","NextChat 是一款轻量且极速的 AI 助手，旨在为用户提供流畅、跨平台的大模型交互体验。它完美解决了用户在多设备间切换时难以保持对话连续性，以及面对众多 AI 模型不知如何统一管理的痛点。无论是日常办公、学习辅助还是创意激发，NextChat 都能让用户随时随地通过网页、iOS、Android、Windows、MacOS 或 Linux 端无缝接入智能服务。\n\n这款工具非常适合普通用户、学生、职场人士以及需要私有化部署的企业团队使用。对于开发者而言，它也提供了便捷的自托管方案，支持一键部署到 Vercel 或 Zeabur 等平台。\n\nNextChat 的核心亮点在于其广泛的模型兼容性，原生支持 Claude、DeepSeek、GPT-4 及 Gemini Pro 等主流大模型，让用户在一个界面即可自由切换不同 AI 能力。此外，它还率先支持 MCP（Model Context Protocol）协议，增强了上下文处理能力。针对企业用户，NextChat 提供专业版解决方案，具备品牌定制、细粒度权限控制、内部知识库整合及安全审计等功能，满足公司对数据隐私和个性化管理的高标准要求。",87618,"2026-04-05T07:20:52",[13,26],{"id":44,"name":45,"github_repo":46,"description_zh":47,"stars":48,"difficulty_score":23,"last_commit_at":49,"category_tags":50,"status":16},2268,"ML-For-Beginners","microsoft\u002FML-For-Beginners","ML-For-Beginners 是由微软推出的一套系统化机器学习入门课程，旨在帮助零基础用户轻松掌握经典机器学习知识。这套课程将学习路径规划为 12 周，包含 26 节精炼课程和 52 道配套测验，内容涵盖从基础概念到实际应用的完整流程，有效解决了初学者面对庞大知识体系时无从下手、缺乏结构化指导的痛点。\n\n无论是希望转型的开发者、需要补充算法背景的研究人员，还是对人工智能充满好奇的普通爱好者，都能从中受益。课程不仅提供了清晰的理论讲解，还强调动手实践，让用户在循序渐进中建立扎实的技能基础。其独特的亮点在于强大的多语言支持，通过自动化机制提供了包括简体中文在内的 50 多种语言版本，极大地降低了全球不同背景用户的学习门槛。此外，项目采用开源协作模式，社区活跃且内容持续更新，确保学习者能获取前沿且准确的技术资讯。如果你正寻找一条清晰、友好且专业的机器学习入门之路，ML-For-Beginners 将是理想的起点。",84991,"2026-04-05T10:45:23",[14,51,52,53,15,54,26,13,55],"数据工具","视频","插件","其他","音频",{"id":57,"name":58,"github_repo":59,"description_zh":60,"stars":61,"difficulty_score":10,"last_commit_at":62,"category_tags":63,"status":16},3128,"ragflow","infiniflow\u002Fragflow","RAGFlow 是一款领先的开源检索增强生成（RAG）引擎，旨在为大语言模型构建更精准、可靠的上下文层。它巧妙地将前沿的 RAG 技术与智能体（Agent）能力相结合，不仅支持从各类文档中高效提取知识，还能让模型基于这些知识进行逻辑推理和任务执行。\n\n在大模型应用中，幻觉问题和知识滞后是常见痛点。RAGFlow 通过深度解析复杂文档结构（如表格、图表及混合排版），显著提升了信息检索的准确度，从而有效减少模型“胡编乱造”的现象，确保回答既有据可依又具备时效性。其内置的智能体机制更进一步，使系统不仅能回答问题，还能自主规划步骤解决复杂问题。\n\n这款工具特别适合开发者、企业技术团队以及 AI 研究人员使用。无论是希望快速搭建私有知识库问答系统，还是致力于探索大模型在垂直领域落地的创新者，都能从中受益。RAGFlow 提供了可视化的工作流编排界面和灵活的 API 接口，既降低了非算法背景用户的上手门槛，也满足了专业开发者对系统深度定制的需求。作为基于 Apache 2.0 协议开源的项目，它正成为连接通用大模型与行业专有知识之间的重要桥梁。",77062,"2026-04-04T04:44:48",[15,14,13,26,54],{"id":65,"github_repo":66,"name":67,"description_en":68,"description_zh":69,"ai_summary_zh":69,"readme_en":70,"readme_zh":71,"quickstart_zh":72,"use_case_zh":73,"hero_image_url":74,"owner_login":75,"owner_name":76,"owner_avatar_url":77,"owner_bio":78,"owner_company":79,"owner_location":79,"owner_email":79,"owner_twitter":80,"owner_website":81,"owner_url":82,"languages":83,"stars":116,"forks":117,"last_commit_at":118,"license":119,"difficulty_score":23,"env_os":120,"env_gpu":121,"env_ram":120,"env_deps":122,"category_tags":125,"github_topics":126,"view_count":23,"oss_zip_url":79,"oss_zip_packed_at":79,"status":16,"created_at":131,"updated_at":132,"faqs":133,"releases":134},3420,"GoodStartLabs\u002FAI_Diplomacy","AI_Diplomacy","Frontier Models playing the board game Diplomacy.","AI_Diplomacy 是一个让大语言模型（LLM）自主游玩经典策略桌游《外交》（Diplomacy）的开源项目。在《外交》这款游戏中，玩家不仅需要通过军事移动争夺领土，更核心的是要进行复杂的人际谈判、结盟与背叛。传统 AI 往往难以处理这种高度依赖自然语言沟通和心理博弈的场景，而 AI_Diplomacy 正是为了解决这一难题而生。\n\n该项目为游戏中的每个势力赋予了独立的智能体，它们不仅能根据战局动态调整战略目标，还拥有独特的“记忆系统”。通过私人的日记记录、关系追踪以及年度总结机制，这些智能体能够像人类一样记住过往的承诺与背叛，从而制定长期的外交策略。此外，它支持多种主流大模型后端，并集成了路径规划算法来辅助军事决策，实现了从私下谈判到公开指令生成的全流程自动化。\n\nAI_Diplomacy 特别适合人工智能研究人员、游戏开发者以及对多智能体协作感兴趣的技术爱好者使用。其独特的技术亮点在于构建了双层记忆架构和状态感知能力，使得智能体不再是简单的指令执行者，而是具备个性、能进行深度推理和情感计算的虚拟外交官，为研究大模型在复杂博弈环境中的表现提供了极佳的实验平台。","# AI Diplomacy: LLM-Powered Strategic Gameplay\n\nCreated by Alex Duffy @Alx-Ai & Tyler Marques @Tylermarques\n\n## Overview\n\nThis repository extends the original [Diplomacy](https:\u002F\u002Fgithub.com\u002Fdiplomacy\u002Fdiplomacy) project with sophisticated AI agents powered by Large Language Models (LLMs). Each power in the game is controlled by an autonomous agent that maintains state, forms relationships, conducts negotiations, and makes strategic decisions.\n\n## Key Features\n\n### 🤖 Stateful AI Agents\n\nEach power is represented by a `DiplomacyAgent` with:\n\n- **Dynamic Goals**: Strategic objectives that evolve based on game events\n- **Relationship Tracking**: Maintains relationships (Enemy\u002FUnfriendly\u002FNeutral\u002FFriendly\u002FAlly) with other powers\n- **Memory System**: Dual-layer memory with structured diary entries and consolidation\n- **Personality**: Power-specific system prompts shape each agent's diplomatic style\n\n### 💬 Rich Negotiations\n\n- Multi-round message exchanges (private and global)\n- Relationship-aware communication strategies\n- Message history tracking and analysis\n- Detection of ignored messages and non-responsive powers\n\n### 🎯 Strategic Order Generation\n\n- BFS pathfinding for movement analysis\n- Context-aware order selection with nearest threats\u002Fopportunities\n- Fallback logic for robustness\n- Support for multiple LLM providers (OpenAI, Claude, Gemini, DeepSeek, OpenRouter)\n\n### 📊 Advanced Game Analysis\n\n- Custom phase summaries with success\u002Ffailure categorization\n- Betrayal detection through order\u002Fnegotiation comparison\n- Strategic planning phases for high-level directives\n- Comprehensive logging of all LLM interactions\n\n### 🧠 Memory Management\n\n- **Private Diary**: Structured, phase-prefixed entries for LLM context\n  - Negotiation summaries with relationship updates\n  - Order reasoning and strategic justifications\n  - Phase result analysis with betrayal detection\n- **Yearly Consolidation**: Automatic summarization of old entries to prevent context overflow\n- **Smart Context Building**: Only relevant history provided to LLMs\n\n## How AI Agents Work\n\nThe following diagram illustrates the complete information flow and decision-making process for each AI agent:\n\n```mermaid\ngraph TB\n    %% Game State Sources\n    subgraph \"Game State Information\"\n        GS[Game State\u003Cbr\u002F>- Unit Positions\u003Cbr\u002F>- Supply Centers\u003Cbr\u002F>- Power Status]\n        GH[Game History\u003Cbr\u002F>- Past Orders\u003Cbr\u002F>- Past Messages\u003Cbr\u002F>- Phase Results]\n        PS[Phase Summary\u003Cbr\u002F>- Successful Moves\u003Cbr\u002F>- Failed Moves\u003Cbr\u002F>- Board Changes]\n    end\n    \n    %% Agent Internal State\n    subgraph \"Agent State (DiplomacyAgent)\"\n        GOALS[Dynamic Goals\u003Cbr\u002F>- Expansion targets\u003Cbr\u002F>- Alliance priorities\u003Cbr\u002F>- Defense needs]\n        REL[Relationships\u003Cbr\u002F>Enemy ↔ Ally Scale]\n        \n        subgraph \"Memory System\"\n            DIARY[Private Diary\u003Cbr\u002F>Phase-prefixed entries]\n            \n            ND[Negotiation Diary\u003Cbr\u002F>- Message analysis\u003Cbr\u002F>- Trust assessment\u003Cbr\u002F>- Relationship changes]\n            OD[Order Diary\u003Cbr\u002F>- Strategic reasoning\u003Cbr\u002F>- Risk\u002Freward analysis]\n            PRD[Phase Result Diary\u003Cbr\u002F>- Outcome analysis\u003Cbr\u002F>- Betrayal detection\u003Cbr\u002F>- Success evaluation]\n            \n            CONS[Diary Consolidation\u003Cbr\u002F>Yearly summaries\u003Cbr\u002F>via Gemini Flash]\n        end\n        \n        JOURNAL[Private Journal\u003Cbr\u002F>Debug logs only]\n    end\n    \n    %% Context Building\n    subgraph \"Context Construction\"\n        POC[Possible Order Context\u003Cbr\u002F>- BFS pathfinding\u003Cbr\u002F>- Nearest enemies\u003Cbr\u002F>- Uncontrolled SCs\u003Cbr\u002F>- Adjacent territories]\n        \n        BCP[build_context_prompt\u003Cbr\u002F>Assembles all info]\n        \n        RECENT[Recent Context\u003Cbr\u002F>- Last 40 diary entries\u003Cbr\u002F>- Current relationships\u003Cbr\u002F>- Active goals]\n    end\n    \n    %% LLM Interactions\n    subgraph \"LLM Decision Points\"\n        INIT_LLM[Initialization\u003Cbr\u002F>Set initial goals\u003Cbr\u002F>& relationships]\n        \n        NEG_LLM[Negotiation\u003Cbr\u002F>Generate messages\u003Cbr\u002F>Update relationships]\n        \n        PLAN_LLM[Planning\u003Cbr\u002F>Strategic directives]\n        \n        ORD_LLM[Order Generation\u003Cbr\u002F>Choose moves]\n        \n        STATE_LLM[State Update\u003Cbr\u002F>Revise goals\u003Cbr\u002F>& relationships]\n    end\n    \n    %% Prompt Templates\n    subgraph \"Prompt Templates\"\n        PROMPTS[Power-specific prompts\u003Cbr\u002F>+ Instruction templates\u003Cbr\u002F>+ Context templates]\n    end\n    \n    %% Information Flow\n    GS --> BCP\n    GH --> BCP\n    PS --> STATE_LLM\n    \n    GOALS --> BCP\n    REL --> BCP\n    DIARY --> RECENT\n    RECENT --> BCP\n    \n    POC --> BCP\n    BCP --> NEG_LLM\n    BCP --> ORD_LLM\n    BCP --> PLAN_LLM\n    \n    PROMPTS --> INIT_LLM\n    PROMPTS --> NEG_LLM\n    PROMPTS --> PLAN_LLM\n    PROMPTS --> ORD_LLM\n    PROMPTS --> STATE_LLM\n    \n    %% Diary Updates\n    NEG_LLM --> ND\n    ORD_LLM --> OD\n    PS --> PRD\n    \n    ND --> DIARY\n    OD --> DIARY\n    PRD --> DIARY\n    \n    %% State Updates\n    INIT_LLM --> GOALS\n    INIT_LLM --> REL\n    NEG_LLM --> REL\n    STATE_LLM --> GOALS\n    STATE_LLM --> REL\n    \n    %% Consolidation\n    DIARY -->|Every 2 years| CONS\n    CONS -->|Summarized| DIARY\n    \n    %% Styling\n    classDef gameState fill:#e74c3c,stroke:#333,stroke-width:2px,color:#fff\n    classDef agentState fill:#3498db,stroke:#333,stroke-width:2px,color:#fff\n    classDef context fill:#2ecc71,stroke:#333,stroke-width:2px,color:#fff\n    classDef llm fill:#f39c12,stroke:#333,stroke-width:2px,color:#fff\n    classDef memory fill:#9b59b6,stroke:#333,stroke-width:2px,color:#fff\n    \n    class GS,GH,PS gameState\n    class GOALS,REL,JOURNAL agentState\n    class POC,BCP,RECENT context\n    class INIT_LLM,NEG_LLM,PLAN_LLM,ORD_LLM,STATE_LLM llm\n    class DIARY,ND,OD,PRD,CONS memory\n```\n\n### Key Components Explained\n\n1. **Information Sources**\n   - **Game State**: Current board position, unit locations, supply center ownership\n   - **Game History**: Complete record of past orders, messages, and results\n   - **Phase Summaries**: Categorized analysis of what succeeded\u002Ffailed each turn\n\n2. **Agent Memory Architecture**\n   - **Private Diary**: The main memory system, with structured entries for each phase\n   - **Diary Types**: Three specialized entry types capture different aspects of gameplay\n   - **Consolidation**: Automatic yearly summarization prevents context overflow\n   - **Journal**: Unstructured logs for debugging (not used by LLMs)\n\n3. **Context Building**\n   - **Strategic Analysis**: BFS pathfinding identifies threats and opportunities\n   - **Relationship Context**: Current diplomatic standings influence all decisions\n   - **Historical Context**: Recent diary entries provide continuity\n\n4. **LLM Decision Points**\n   - **Initialization**: Sets starting personality and objectives\n   - **Negotiations**: Generates contextual messages based on relationships\n   - **Planning**: Creates high-level strategic directives\n   - **Orders**: Selects specific moves with full strategic context\n   - **State Updates**: Adjusts goals and relationships based on outcomes\n\n### Implementation Details\n\n#### Core Files\n\n1. **`lm_game.py`** - Main game orchestrator\n   - Manages agent lifecycle and game phases\n   - Coordinates async LLM calls for maximum performance\n   - Handles error tracking and recovery\n   - Saves game state with phase summaries and agent relationships\n\n2. **`ai_diplomacy\u002Fagent.py`** - Stateful agent implementation\n   - `DiplomacyAgent` class with goals, relationships, and memory\n   - Robust JSON parsing for various LLM response formats\n   - Diary entry generation for each game event\n   - State update logic based on game outcomes\n\n3. **`ai_diplomacy\u002Fclients.py`** - LLM abstraction layer\n   - `BaseModelClient` interface for all LLM providers\n   - Implementations for OpenAI, Claude, Gemini, DeepSeek, OpenRouter\n   - Prompt construction and response parsing\n   - Retry logic and error handling\n\n4. **`ai_diplomacy\u002Fpossible_order_context.py`** - Strategic analysis\n   - BFS pathfinding on game map\n   - Nearest threat\u002Fopportunity identification\n   - Adjacent territory analysis\n   - Rich XML context generation for orders\n\n5. **`ai_diplomacy\u002Fprompt_constructor.py`** - Centralized prompt building\n   - Assembles game state, agent state, and strategic context\n   - Formats prompts for different LLM tasks\n   - Integrates with template system\n\n6. **`ai_diplomacy\u002Fgame_history.py`** - Phase-by-phase game tracking\n   - Stores messages, orders, and results\n   - Provides historical context for agents\n   - Tracks ignored messages for relationship analysis\n\n#### Prompt Templates\n\nThe `ai_diplomacy\u002Fprompts\u002F` directory contains customizable templates:\n\n- Power-specific system prompts (e.g., `france_system_prompt.txt`)\n- Task-specific instructions (`order_instructions.txt`, `conversation_instructions.txt`)\n- Diary generation prompts for different game events\n- State update and planning templates\n\n### Running AI Games\n\n```bash\n# Basic game with negotiations\npython lm_game.py --max_year 1910 --num_negotiation_rounds 3\n\n# With strategic planning phase\npython lm_game.py --max_year 1910 --planning_phase --num_negotiation_rounds 2\n\n# Custom model assignment (order: AUSTRIA, ENGLAND, FRANCE, GERMANY, ITALY, RUSSIA, TURKEY)\npython lm_game.py --models \"claude-3-5-sonnet-20241022,gpt-4o,claude-3-5-sonnet-20241022,gpt-4o,claude-3-5-sonnet-20241022,gpt-4o,claude-3-5-sonnet-20241022\"\n\n# Run until game completion or specific year\npython lm_game.py --num_negotiation_rounds 2 --planning_phase\n\n# Write all artefacts to a chosen directory (auto-resumes if it already exists)\npython lm_game.py --run_dir results\u002Fgame_run_001\n\n# Resume an interrupted game from a specific phase\npython lm_game.py --run_dir results\u002Fgame_run_001 --resume_from_phase S1902M\n\n# Critical-state analysis: resume from an existing run but save new results elsewhere\npython lm_game.py \\\n  --run_dir results\u002Fgame_run_001 \\\n  --critical_state_analysis_dir results\u002Fcritical_analysis_001 \\\n  --resume_from_phase F1903M\n\n# End the simulation after a particular phase regardless of remaining years\npython lm_game.py --run_dir results\u002Fgame_run_002 --end_at_phase F1905M\n\n# Set the global max_tokens generation limit\npython lm_game.py --run_dir results\u002Fgame_run_003 --max_tokens 8000\n\n# Per-model token limits (AU,EN,FR,GE,IT,RU,TR)\npython lm_game.py --run_dir results\u002Fgame_run_004 \\\n  --max_tokens_per_model \"8000,8000,16000,8000,8000,16000,8000\"\n\n# Use a custom prompts directory\npython lm_game.py --run_dir results\u002Fgame_run_005 --prompts_dir .\u002Fprompts\u002Fmy_variants\n```\n\n### Setting `--models` (quick guide)\n\n- Pass **one comma-separated list of up to seven model IDs** in this fixed order: AUSTRIA, ENGLAND, FRANCE, GERMANY, ITALY, RUSSIA, TURKEY.\n\n- **Model-ID syntax**\n\n  ```\n  \u003Cclient prefix:>model[@base_url][#api_key]\n  ```\n\n  - `prefix:` – specify the client (`openai`, `openai-requests`, `openai-responses`, `anthropic`, `gemini`, `deepseek`, `openrouter`, `together`).\n  - `@base_url` – hit a proxy \u002F alt endpoint.\n  - `#api_key` – inline key (overrides env vars).\n\n  ```bash\n  # gpt-4o on openrouter for all powers:\n  --models \"openrouter:gpt-4o\"\n  # custom URL+apikey for Austria only:\n  --models \"openai:llama-3.2-3b@http:\u002F\u002Flocalhost:8000#myapikey,openai:gpt-4o,openai:gpt-4o,openai:gpt-4o,openai:gpt-4o,openai:gpt-4o,openai:gpt-4o\"\n  ```\n\n### Running Batch Experiments with **`experiment_runner.py`**\n\n`experiment_runner.py` is a lightweight orchestrator: it spins up many `lm_game.py` runs in parallel, gathers their artefacts under one *experiment directory*, and then executes the analysis modules you specify.\nAll flags that belong to **`lm_game.py`** can be passed straight through; the runner validates them and forwards them unchanged to every game instance.\n\n---\n\n#### Examples\n\n```bash\n# Run 10 independent games (iterations) in parallel, using a custom prompts dir\n# and a single model (GPT-4o) for all seven powers.\npython3 experiment_runner.py \\\n    --experiment_dir \"results\u002Fexp001\" \\\n    --iterations 10 \\\n    --parallel 10 \\\n    --max_year 1905 \\\n    --num_negotiation_rounds 0 \\\n    --prompts_dir \"ai_diplomacy\u002Fprompts\" \\\n    --models \"gpt-4o,gpt-4o,gpt-4o,gpt-4o,gpt-4o,gpt-4o,gpt-4o\"\n\n\n# Critical-state analysis: resume every run from W1901A (taken from an existing\n# base run) and stop after S1902M.  Two analysis modules will be executed:\n#  • summary         → aggregated results & scores\n#  • critical_state  → before\u002Fafter snapshots around the critical phase\npython3 experiment_runner.py \\\n    --experiment_dir \"results\u002Fexp002\" \\\n    --iterations 10 \\\n    --parallel 10 \\\n    --resume_from_phase W1901A \\\n    --end_at_phase S1902M \\\n    --num_negotiation_rounds 0 \\\n    --critical_state_base_run \"results\u002Ftest1\" \\\n    --prompts_dir \"ai_diplomacy\u002Fprompts\" \\\n    --analysis_modules \"summary,critical_state\" \\\n    --models \"gpt-4o,gpt-4o,gpt-4o,gpt-4o,gpt-4o,gpt-4o,gpt-4o\"\n```\n\n*(Any other `lm_game.py` flags—`--planning_phase`, `--max_tokens`, etc.—can be added exactly where you’d use them on a single-game run.)*\n\n---\n\n#### Experiment-runner–specific arguments\n\n| Flag                              | Type \u002F Default             | Description                                                                                                                                                                              |\n| --------------------------------- | -------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `--experiment_dir` **(required)** | `Path`                     | Root folder for the experiment; sub-folders `runs\u002F` and `analysis\u002F` are managed automatically. Re-running with the same directory will **resume** existing runs and regenerate analysis. |\n| `--iterations`                    | `int`, default `1`         | How many individual games to launch for this experiment.                                                                                                                                 |\n| `--parallel`                      | `int`, default `1`         | Max number of games to execute concurrently (uses a process pool).                                                                                                                       |\n| `--analysis_modules`              | `str`, default `\"summary\"` | Comma-separated list of analysis modules to run after all games finish. Modules are imported from `experiment_runner.analysis.\u003Cname>` and must provide `run(experiment_dir, ctx)`.       |\n| `--critical_state_base_run`       | `Path`, optional           | Path to an **existing** `run_dir` produced by a previous `lm_game` run. Each iteration resumes from that snapshot; new artefacts are written under the current `experiment_dir`.         |\n| `--seed_base`                     | `int`, default `42`        | Base random seed. Run *ɪ* receives seed = `seed_base + ɪ`, enabling reproducible batches.                                                                                                |\n\n*(All other command-line flags belong to `lm_game.py` and are forwarded unchanged.)*\n\n### Environment Setup\n\nCreate a `.env` file with your API keys:\n\n```\nOPENAI_API_KEY=your_key_here\nANTHROPIC_API_KEY=your_key_here\nGEMINI_API_KEY=your_key_here\nDEEPSEEK_API_KEY=your_key_here\nOPENROUTER_API_KEY=your_key_here\n```\n\n### Model Configuration\n\nModels can be assigned to powers in `ai_diplomacy\u002Futils.py`:\n\n```python\ndef assign_models_to_powers() -> Dict[str, str]:\n    return {\n        \"AUSTRIA\": \"o3\",\n        \"ENGLAND\": \"claude-sonnet-4-20250514\",\n        \"FRANCE\": \"gpt-4.1\",\n        \"GERMANY\": \"gemini-2.5-pro-preview-05-06\",\n        \"ITALY\": \"openrouter-meta-llama\u002Fllama-4-maverick\",\n        \"RUSSIA\": \"claude-opus-4-20250514\",\n        \"TURKEY\": \"openrouter-google\u002Fgemini-2.5-flash-preview-05-20\",\n    }\n```\n\nSupported models include:\n\n- OpenAI: `gpt-4o`, `gpt-4.1`, `o3`, `o4-mini`\n- Anthropic: `claude-3-5-sonnet-20241022`, `claude-opus-4-20250514`\n- Google: `gemini-2.0-flash`, `gemini-2.5-pro-preview`\n- OpenRouter: Various models including Llama, Qwen, DeepSeek\n\n### Game Output and Analysis\n\nGames are saved to the `results\u002F` directory with timestamps. Each game folder contains:\n\n- `lmvsgame.json` - Complete game data including phase summaries and agent relationships\n- `overview.jsonl` - Error statistics and model assignments\n- `game_manifesto.txt` - Strategic directives from planning phases\n- `general_game.log` - Detailed game execution logs\n- `llm_responses.csv` - Complete log of all LLM interactions\n\nThe game JSON includes special fields for AI analysis:\n\n- `phase_summaries` - Categorized move results for each phase\n- `agent_relationships` - Diplomatic standings at each phase\n- `final_agent_states` - End-game goals and relationships\n\n### Data Processing and RL Analysis Pipeline\n\nFor detailed analysis of LLM interactions and order success rates, a two-step pipeline is used:\n\n1. **Convert CSV to RL JSON**:\n    The `csv_to_rl_json.py` script processes `llm_responses.csv` files, typically found in game-specific subdirectories ending with \"FULL_GAME\" (e.g., `results\u002F20250524_..._FULL_GAME\u002F`). It converts this raw interaction data into a JSON format suitable for Reinforcement Learning (RL) analysis.\n\n    To process all relevant CSVs in batch:\n\n    ```bash\n    python csv_to_rl_json.py --scan_dir results\u002F\n    ```\n\n    This command scans the `results\u002F` directory for \"FULL_GAME\" subfolders, converts their `llm_responses.csv` files, and outputs all generated `*_rl.json` files into the `results\u002Fjson\u002F` directory.\n\n2. **Analyze RL JSON Files**:\n    The `analyze_rl_json.py` script then analyzes the JSON files generated in the previous step. It aggregates statistics on successful and failed convoy and support orders, categorized by model.\n\n    To run the analysis:\n\n    ```bash\n    python analyze_rl_json.py results\u002Fjson\u002F\n    ```\n\n    This command processes all `*_rl.json` files in the `results\u002Fjson\u002F` directory and generates two reports in the project's root directory:\n    - `analysis_summary.txt`: A clean summary of order statistics.\n    - `analysis_summary_debug.txt`: A detailed report including unique 'success' field values and other debug information.\n\nThis pipeline allows for a comprehensive understanding of LLM performance in generating valid and successful game orders.\n\n### Post-Game Analysis Tools\n\n#### Strategic Moment Analysis\n\nAnalyze games for key strategic moments including betrayals, collaborations, and brilliant strategies:\n\n```bash\n# Analyze a completed game\npython analyze_game_moments.py results\u002F20250522_210700_o3vclaudes_o3win\n\n# Limit analysis to specific phases\npython analyze_game_moments.py results\u002Fgame_folder --max-phases 20\n\n# Use a different analysis model\npython analyze_game_moments.py results\u002Fgame_folder --model claude-3-5-sonnet-20241022\n```\n\nThe analysis identifies:\n\n- **Betrayals**: When powers explicitly promise one action but take contradictory action\n- **Collaborations**: Successfully coordinated actions between powers\n- **Playing Both Sides**: Powers making conflicting promises to different parties\n- **Brilliant Strategies**: Exceptionally well-executed strategic maneuvers\n- **Strategic Blunders**: Major mistakes that significantly weaken a position\n\nAnalysis outputs include:\n\n- **Markdown Report** (`game_moments\u002F[game]_report_[timestamp].md`)\n  - AI-generated narrative of the entire game\n  - Summary statistics (betrayals, collaborations, etc.)\n  - Invalid move counts by model\n  - Lie analysis with intentional vs unintentional breakdown\n  - Top strategic moments with full context and diary entries\n- **JSON Data** (`game_moments\u002F[game]_data_[timestamp].json`)\n  - Complete structured data of all detected moments\n  - Metadata including scores, categories, and relationships\n  - Raw lie detection data for further analysis\n\nExample output snippet:\n\n```markdown\n## Power Models\n- **TURKEY**: o3\n- **ENGLAND**: claude-sonnet-4-20250514\n- **RUSSIA**: claude-opus-4-20250514\n\n## Invalid Moves by Model\n- **o3**: 91 invalid moves\n- **claude-sonnet-4**: 67 invalid moves\n\n## Lies Analysis\n### Lies by Model\n- **o3**: 195 total lies (71 intentional, 124 unintentional)\n- **claude-opus-4**: 96 total lies (0 intentional, 96 unintentional)\n```\n\n#### Diplomatic Lie Detection\n\nThe analysis system can detect lies by comparing:\n\n1. **Messages**: What powers promise to each other\n2. **Private Diaries**: What powers privately plan (from negotiation_diary entries)\n3. **Actual Orders**: What they actually do\n\nLies are classified as:\n\n- **Intentional**: Diary shows planned deception (e.g., \"mislead them\", \"while actually...\")\n- **Unintentional**: No evidence of planned deception (likely misunderstandings)\n\n#### Animation and Visualization\n\nVisualize games with the interactive 3D animation system:\n\n```bash\n# Start the animation server\ncd ai_animation\nnpm install\nnpm run dev\n\n# Open http:\u002F\u002Flocalhost:5173 in your browser\n# Load a game JSON file to see animated playback\n```\n\nFeatures:\n\n- 3D map with unit movements and battles\n- Phase-by-phase playback controls\n- Chat window showing diplomatic messages\n- Standings board tracking supply centers\n- Sound effects and visual flourishes\n\n### Game Statistics and Patterns\n\nAnalysis of hundreds of AI games reveals interesting patterns:\n\n#### Model Performance Characteristics\n\n- **Invalid Move Rates**: Some models (e.g., o3) generate more invalid moves but play aggressively\n- **Deception Patterns**: Models vary dramatically in honesty (0-100% intentional lie rates)\n- **Strategic Styles**: From defensive\u002Fhonest to aggressive\u002Fdeceptive playstyles\n\n#### Common Strategic Patterns\n\n- **Opening Gambits**: RT Juggernaut (Russia-Turkey), Western Triple, Lepanto\n- **Mid-game Dynamics**: Stab timing, alliance shifts, convoy operations\n- **Endgame Challenges**: Stalemate lines, forced draws, kingmaking\n\n### Future Explorations\n\n- **Adaptive Negotiations**: Dynamic round count based on conversation flow\n- **Coalition Detection**: Identify and track multi-power alliances\n- **Personality Evolution**: Agents that adapt their diplomatic style\n- **Tournament Mode**: Automated multi-game competitions with ELO ratings\n- **Human-AI Hybrid**: Allow human players to compete against AI agents\n- **Real-time Commentary**: Live narrative generation for spectators\n- **Deception Training**: Models specifically trained to detect or execute lies\n- **Meta-Strategy Learning**: Agents that learn from previous games\n\n---\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"500\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FGoodStartLabs_AI_Diplomacy_readme_d45cea57dde2.png\" alt=\"Diplomacy Map Overview\">\n\u003C\u002Fp>\n\n## Documentation\n\nThe complete documentation is available at [diplomacy.readthedocs.io](https:\u002F\u002Fdiplomacy.readthedocs.io\u002F).\n\n## Available Analysis Scripts\n\n### 1. Strategic Moment Analysis (`analyze_game_moments.py`)\n\nComprehensive analysis of game dynamics:\n\n```bash\npython analyze_game_moments.py results\u002Fgame_folder [options]\n\nOptions:\n  --model MODEL         Analysis model to use (default: gemini-2.5-flash)\n  --max-phases N        Analyze only first N phases\n  --max-concurrent N    Concurrent API calls (default: 5)\n  --report PATH         Output report path (auto-generated if not specified)\n  --json PATH           Output JSON path (auto-generated if not specified)\n```\n\n### 2. Focused Lie Detection (`analyze_lies_focused.py`)\n\nDetailed analysis of diplomatic deception:\n\n```bash\npython analyze_lies_focused.py results\u002Fgame_folder [--output report.md]\n```\n\n### 3. Game Results Statistics (`analyze_game_results.py`)\n\nAggregates win\u002Floss statistics across all completed games:\n\n```bash\npython analyze_game_results.py\n# Creates model_power_statistics.csv\n```\n\nAnalyzes all `*_FULL_GAME` folders to show how many times each model played as each power and won.\n\n### 4. Game Visualization (`ai_animation\u002F`)\n\nInteractive 3D visualization of games:\n\n```bash\ncd ai_animation\nnpm install\nnpm run dev\n# Open http:\u002F\u002Flocalhost:5173 and load a game JSON\n```\n\n## Getting Started\n\n### Installation\n\nThis project uses [uv](https:\u002F\u002Fgithub.com\u002Fastral-sh\u002Fuv) for Python dependency management.\n\n#### Setup Project Dependencies\n\n```bash\n# Clone the repository\ngit clone https:\u002F\u002Fgithub.com\u002Fyour-repo\u002FAI_Diplomacy.git\ncd AI_Diplomacy\n\n# Install dependencies and create virtual environment\nuv sync\n\n# Activate the virtual environment\nsource .venv\u002Fbin\u002Factivate  # On Unix\u002FmacOS\n# or\n.venv\\Scripts\\activate     # On Windows\n```\n\n### Running a game\n\nThe following script plays a game locally by submitting random valid orders until the game is completed.\n\n```python3\nimport random\nfrom diplomacy import Game\nfrom diplomacy.utils.export import to_saved_game_format\n\n# Creating a game\n# Alternatively, a map_name can be specified as an argument. e.g. Game(map_name='pure')\ngame = Game()\nwhile not game.is_game_done:\n\n    # Getting the list of possible orders for all locations\n    possible_orders = game.get_all_possible_orders()\n\n    # For each power, randomly sampling a valid order\n    for power_name, power in game.powers.items():\n        power_orders = [random.choice(possible_orders[loc]) for loc in game.get_orderable_locations(power_name)\n                        if possible_orders[loc]]\n        game.set_orders(power_name, power_orders)\n\n    # Messages can be sent locally with game.add_message\n    # e.g. game.add_message(Message(sender='FRANCE',\n    #                               recipient='ENGLAND',\n    #                               message='This is a message',\n    #                               phase=self.get_current_phase(),\n    #                               time_sent=int(time.time())))\n\n    # Processing the game to move to the next phase\n    game.process()\n\n# Exporting the game to disk to visualize (game is appended to file)\n# Alternatively, we can do >> file.write(json.dumps(to_saved_game_format(game)))\nto_saved_game_format(game, output_path='game.json')\n```\n\n## Web interface\n\nIt is also possible to install a web interface in React to play against bots and\u002For other humans and to visualize games.\n\nThe web interface can be installed with:\n\n```bash\n# Install NVM\ncurl -o- https:\u002F\u002Fraw.githubusercontent.com\u002Fnvm-sh\u002Fnvm\u002Fv0.34.0\u002Finstall.sh | bash\n\n# Clone repo\ngit clone https:\u002F\u002Fgithub.com\u002Fdiplomacy\u002Fdiplomacy.git\n\n# Install package locally\n# You may want to install it in a conda or virtualenv environment\ncd diplomacy\u002F\npip install -r requirements_dev.txt\n\n# Build node modules\ncd diplomacy\u002Fweb\nnpm install .\nnpm install . --only=dev\n\n# In a terminal window or tab - Launch React server\nnpm start\n\n# In another terminal window or tab - Launch diplomacy server\npython -m diplomacy.server.run\n```\n\nThe web interface will be accessible at \u003Chttp:\u002F\u002Flocalhost:3000>.\n\nTo login, users can use admin\u002Fpassword or username\u002Fpassword. Additional users can be created by logging in with a username that does not exist in the database.\n\n![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FGoodStartLabs_AI_Diplomacy_readme_c1820757b650.png)\n\n### Visualizing a game\n\nIt is possible to visualize a game by using the \"Load a game from disk\" menu on the top-right corner of the web interface.\n\n![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FGoodStartLabs_AI_Diplomacy_readme_6e3d78ce26fd.png)\n\n## Network Game\n\nIt is possible to join a game remotely over a network using websockets. The script below plays a game over a network.\n\nNote. The server must be started with `python -m diplomacy.server.run` for the script to work.\n\n```python3\nimport asyncio\nimport random\nfrom diplomacy.client.connection import connect\nfrom diplomacy.utils import exceptions\n\nPOWERS = ['AUSTRIA', 'ENGLAND', 'FRANCE', 'GERMANY', 'ITALY', 'RUSSIA', 'TURKEY']\n\nasync def create_game(game_id, hostname='localhost', port=8432):\n    \"\"\" Creates a game on the server \"\"\"\n    connection = await connect(hostname, port)\n    channel = await connection.authenticate('random_user', 'password')\n    await channel.create_game(game_id=game_id, rules={'REAL_TIME', 'NO_DEADLINE', 'POWER_CHOICE'})\n\nasync def play(game_id, power_name, hostname='localhost', port=8432):\n    \"\"\" Play as the specified power \"\"\"\n    connection = await connect(hostname, port)\n    channel = await connection.authenticate('user_' + power_name, 'password')\n\n    # Waiting for the game, then joining it\n    while not (await channel.list_games(game_id=game_id)):\n        await asyncio.sleep(1.)\n    game = await channel.join_game(game_id=game_id, power_name=power_name)\n\n    # Playing game\n    while not game.is_game_done:\n        current_phase = game.get_current_phase()\n\n        # Submitting orders\n        if game.get_orderable_locations(power_name):\n            possible_orders = game.get_all_possible_orders()\n            orders = [random.choice(possible_orders[loc]) for loc in game.get_orderable_locations(power_name)\n                      if possible_orders[loc]]\n            print('[%s\u002F%s] - Submitted: %s' % (power_name, game.get_current_phase(), orders))\n            await game.set_orders(power_name=power_name, orders=orders, wait=False)\n\n        # Messages can be sent with game.send_message\n        # await game.send_game_message(message=game.new_power_message('FRANCE', 'This is the message'))\n\n        # Waiting for game to be processed\n        while current_phase == game.get_current_phase():\n            await asyncio.sleep(0.1)\n\n    # A local copy of the game can be saved with to_saved_game_format\n    # To download a copy of the game with messages from all powers, you need to export the game as an admin\n    # by logging in as 'admin' \u002F 'password'\n\nasync def launch(game_id):\n    \"\"\" Creates and plays a network game \"\"\"\n    await create_game(game_id)\n    await asyncio.gather(*[play(game_id, power_name) for power_name in POWERS])\n\nif __name__ == '__main__':\n    asyncio.run(launch(game_id=str(random.randint(1, 1000))))\n```\n\n## License\n\nCopyright (C) 2025 Good Start Labs\n\nSee the [LICENSE](LICENSE) file for additional details\n","# AI 外交：由大语言模型驱动的战略游戏\n\n由 Alex Duffy @Alx-Ai 和 Tyler Marques @Tylermarques 创建\n\n## 概述\n\n此仓库在原始 [Diplomacy](https:\u002F\u002Fgithub.com\u002Fdiplomacy\u002Fdiplomacy) 项目的基础上进行了扩展，引入了由大型语言模型（LLM）驱动的高级 AI 代理。游戏中每个大国均由一个自主代理控制，该代理能够维护自身状态、建立关系、开展谈判并作出战略决策。\n\n## 核心特性\n\n### 🤖 带状态的 AI 代理\n\n每个大国由一个 `DiplomacyAgent` 表示，具备以下功能：\n\n- **动态目标**：根据游戏事件不断调整的战略目标\n- **关系追踪**：与其他大国保持敌对、不友好、中立、友好或同盟等关系\n- **记忆系统**：双层记忆结构，包括结构化的日记条目和整合机制\n- **个性设定**：基于大国特性的系统提示塑造每个代理的外交风格\n\n### 💬 丰富的谈判机制\n\n- 多轮消息交互（私密与全局）\n- 关系感知的沟通策略\n- 消息历史追踪与分析\n- 能够检测被忽略的消息及无响应的大国\n\n### 🎯 战略指令生成\n\n- 使用广度优先搜索进行移动路径分析\n- 结合上下文选择最近的威胁或机会对应的指令\n- 提供后备逻辑以增强鲁棒性\n- 支持多种 LLM 服务提供商（OpenAI、Claude、Gemini、DeepSeek、OpenRouter）\n\n### 📊 高级游戏分析\n\n- 自定义回合总结，按成功\u002F失败分类\n- 通过比较指令与谈判内容检测背叛行为\n- 针对高层指令的战略规划阶段\n- 全面记录所有 LLM 的交互过程\n\n### 🧠 记忆管理\n\n- **私人日记**：按回合编号的结构化条目，用于提供给 LLM 的上下文\n  - 包含关系更新的谈判摘要\n  - 指令推理与战略理由\n  - 反应背叛行为的回合结果分析\n- **年度整合**：自动汇总旧条目，防止上下文过载\n- **智能上下文构建**：仅向 LLM 提供相关的历史记录\n\n## AI 代理的工作原理\n\n下图展示了每个 AI 代理的完整信息流与决策流程：\n\n```mermaid\ngraph TB\n    %% 游戏状态来源\n    subgraph \"游戏状态信息\"\n        GS[游戏状态\u003Cbr\u002F>- 单位位置\u003Cbr\u002F>- 补给中心\u003Cbr\u002F>- 各国状态]\n        GH[游戏历史\u003Cbr\u002F>- 历史指令\u003Cbr\u002F>- 历史消息\u003Cbr\u002F>- 回合结果]\n        PS[回合总结\u003Cbr\u002F>- 成功行动\u003Cbr\u002F>- 失败行动\u003Cbr\u002F>- 棋盘变化]\n    end\n    \n    %% 代理内部状态\n    subgraph \"代理状态 (DiplomacyAgent)\"\n        GOALS[动态目标\u003Cbr\u002F>- 扩张目标\u003Cbr\u002F>- 联盟优先级\u003Cbr\u002F>- 防御需求]\n        REL[关系\u003Cbr\u002F>从敌对到同盟的等级]\n        \n        subgraph \"记忆系统\"\n            DIARY[私人日记\u003Cbr\u002F>按回合编号的条目]\n            \n            ND[谈判日记\u003Cbr\u002F>- 消息分析\u003Cbr\u002F>- 信任评估\u003Cbr\u002F>- 关系变化]\n            OD[指令日记\u003Cbr\u002F>- 战略推理\u003Cbr\u002F>- 风险收益分析]\n            PRD[回合结果日记\u003Cbr\u002F>- 结果分析\u003Cbr\u002F>- 背叛检测\u003Cbr\u002F>- 成功评价]\n            \n            CONS[日记整合\u003Cbr\u002F>每年使用 Gemini Flash 生成摘要]\n        end\n        \n        JOURNAL[私人日志\u003Cbr\u002F>仅用于调试]\n    end\n    \n    %% 上下文构建\n    subgraph \"上下文构建\"\n        POC[Possible Order Context\u003Cbr\u002F>- BFS 路径搜索\u003Cbr\u002F>- 最近的敌人\u003Cbr\u002F>- 未控制的补给中心\u003Cbr\u002F>- 相邻领土]\n        \n        BCP[build_context_prompt\u003Cbr\u002F>整合所有信息]\n        \n        RECENT[近期上下文\u003Cbr\u002F>- 最近 40 条日记条目\u003Cbr\u002F>- 当前关系\u003Cbr\u002F>- 当前目标]\n    end\n    \n    %% LLM 决策点\n    subgraph \"LLM 决策点\"\n        INIT_LLM[初始化\u003Cbr\u002F>设定初始目标与关系]\n        \n        NEG_LLM[谈判\u003Cbr\u002F>生成消息\u003Cbr\u002F>更新关系]\n        \n        PLAN_LLM[规划\u003Cbr\u002F>制定战略指令]\n        \n        ORD_LLM[指令生成\u003Cbr\u002F>选择具体行动]\n        \n        STATE_LLM[状态更新\u003Cbr\u002F>调整目标与关系]\n    end\n    \n    %% 提示模板\n    subgraph \"提示模板\"\n        PROMPTS[大国专属提示\u003Cbr\u002F>+ 指令模板\u003Cbr\u002F>+ 上下文模板]\n    end\n    \n    %% 信息流动\n    GS --> BCP\n    GH --> BCP\n    PS --> STATE_LLM\n    \n    GOALS --> BCP\n    REL --> BCP\n    DIARY --> RECENT\n    RECENT --> BCP\n    \n    POC --> BCP\n    BCP --> NEG_LLM\n    BCP --> ORD_LLM\n    BCP --> PLAN_LLM\n    \n    PROMPTS --> INIT_LLM\n    PROMPTS --> NEG_LLM\n    PROMPTS --> PLAN_LLM\n    PROMPTS --> ORD_LLM\n    PROMPTS --> STATE_LLM\n    \n    %% 日记更新\n    NEG_LLM --> ND\n    ORD_LLM --> OD\n    PS --> PRD\n    \n    ND --> DIARY\n    OD --> DIARY\n    PRD --> DIARY\n    \n    %% 状态更新\n    INIT_LLM --> GOALS\n    INIT_LLM --> REL\n    NEG_LLM --> REL\n    STATE_LLM --> GOALS\n    STATE_LLM --> REL\n    \n    %% 整合\n    DIARY -->|每两年一次| CONS\n    CONS -->|汇总后| DIARY\n    \n    %% 样式\n    classDef gameState fill:#e74c3c,stroke:#333,stroke-width:2px,color:#fff\n    classDef agentState fill:#3498db,stroke:#333,stroke-width:2px,color:#fff\n    classDef context fill:#2ecc71,stroke:#333,stroke-width:2px,color:#fff\n    classDef llm fill:#f39c12,stroke:#333,stroke-width:2px,color:#fff\n    classDef memory fill:#9b59b6,stroke:#333,stroke-width:2px,color:#fff\n    \n    class GS,GH,PS gameState\n    class GOALS,REL,JOURNAL agentState\n    class POC,BCP,RECENT context\n    class INIT_LLM,NEG_LLM,PLAN_LLM,ORD_LLM,STATE_LLM llm\n    class DIARY,ND,OD,PRD,CONS memory\n```\n\n### 关键组件详解\n\n1. **信息来源**\n   - **游戏状态**：当前棋盘布局、单位位置及补给中心归属\n   - **游戏历史**：过去的所有指令、消息及结果记录\n   - **回合总结**：对每回合成功与失败情况的分类分析\n\n2. **代理记忆架构**\n   - **私人日记**：主要的记忆系统，按回合进行结构化记录\n   - **日记类型**：三种专门的日记类型分别记录游戏的不同方面\n   - **整合机制**：每年自动汇总日记内容，防止上下文过载\n   - **日志**：非结构化的调试日志（不用于 LLM）\n\n3. **上下文构建**\n   - **战略分析**：利用广度优先搜索识别潜在的威胁与机遇\n   - **关系上下文**：当前的外交关系影响所有决策\n   - **历史上下文**：近期的日记条目为决策提供连续性\n\n4. **LLM 决策点**\n   - **初始化**：设定初始性格特征与目标\n   - **谈判**：基于关系生成符合情境的消息\n   - **规划**：制定高层次的战略指令\n   - **指令**：结合全面的战略上下文选择具体行动\n   - **状态更新**：根据游戏结果调整目标与关系\n\n### 实现细节\n\n#### 核心文件\n\n1. **`lm_game.py`** - 主游戏调度器\n   - 管理智能体的生命周期和游戏阶段\n   - 协调异步LLM调用以实现最大性能\n   - 处理错误跟踪与恢复\n   - 保存包含阶段摘要和智能体关系的游戏状态\n\n2. **`ai_diplomacy\u002Fagent.py`** - 带状态的智能体实现\n   - `DiplomacyAgent`类，包含目标、关系和记忆\n   - 强健的JSON解析功能，适用于多种LLM响应格式\n   - 每次游戏事件生成日记条目\n   - 基于游戏结果的状态更新逻辑\n\n3. **`ai_diplomacy\u002Fclients.py`** - LLM抽象层\n   - `BaseModelClient`接口，用于所有LLM提供商\n   - 提供OpenAI、Claude、Gemini、DeepSeek、OpenRouter等实现\n   - 构建提示并解析响应\n   - 具备重试逻辑和错误处理机制\n\n4. **`ai_diplomacy\u002Fpossible_order_context.py`** - 战略分析\n   - 在游戏地图上进行广度优先搜索路径规划\n   - 识别最近的威胁或机会\n   - 分析相邻领土\n   - 为指令生成丰富的XML上下文\n\n5. **`ai_diplomacy\u002Fprompt_constructor.py`** - 集中式提示构建\n   - 整合游戏状态、智能体状态和战略上下文\n   - 格式化不同LLM任务的提示\n   - 与模板系统集成\n\n6. **`ai_diplomacy\u002Fgame_history.py`** - 游戏各阶段追踪\n   - 存储消息、指令及结果\n   - 为智能体提供历史背景\n   - 跟踪被忽略的消息，用于关系分析\n\n#### 提示模板\n\n`ai_diplomacy\u002Fprompts\u002F`目录下包含可定制的模板：\n\n- 针对各强国的系统提示（如`france_system_prompt.txt`）\n- 针对特定任务的指令（`order_instructions.txt`、`conversation_instructions.txt`）\n- 针对不同游戏事件的日记生成提示\n- 状态更新与规划模板\n\n### 运行AI游戏\n\n```bash\n# 基本谈判游戏\npython lm_game.py --max_year 1910 --num_negotiation_rounds 3\n\n# 包含战略规划阶段\npython lm_game.py --max_year 1910 --planning_phase --num_negotiation_rounds 2\n\n# 自定义模型分配（顺序：奥地利、英格兰、法国、德国、意大利、俄罗斯、土耳其）\npython lm_game.py --models \"claude-3-5-sonnet-20241022,gpt-4o,claude-3-5-sonnet-20241022,gpt-4o,claude-3-5-sonnet-20241022,gpt-4o,claude-3-5-sonnet-20241022\"\n\n# 直到游戏结束或特定年份\npython lm_game.py --num_negotiation_rounds 2 --planning_phase\n\n# 将所有数据输出到指定目录（若目录已存在则自动续跑）\npython lm_game.py --run_dir results\u002Fgame_run_001\n\n# 从特定阶段恢复中断的游戏\npython lm_game.py --run_dir results\u002Fgame_run_001 --resume_from_phase S1902M\n\n# 关键状态分析：从现有运行中恢复，但将新结果保存到其他位置\npython lm_game.py \\\n  --run_dir results\u002Fgame_run_001 \\\n  --critical_state_analysis_dir results\u002Fcritical_analysis_001 \\\n  --resume_from_phase F1903M\n\n# 在特定阶段后结束模拟，无论剩余年份多少\npython lm_game.py --run_dir results\u002Fgame_run_002 --end_at_phase F1905M\n\n# 设置全局最大token生成限制\npython lm_game.py --run_dir results\u002Fgame_run_003 --max_tokens 8000\n\n# 按模型设置token限制（AU,EN,FR,GE,IT,RU,TR）\npython lm_game.py --run_dir results\u002Fgame_run_004 \\\n  --max_tokens_per_model \"8000,8000,16000,8000,8000,16000,8000\"\n\n# 使用自定义提示目录\npython lm_game.py --run_dir results\u002Fgame_run_005 --prompts_dir .\u002Fprompts\u002Fmy_variants\n```\n\n### 设置`--models`（快速指南）\n\n- 请按固定顺序传递**一个由最多七个模型ID组成的逗号分隔列表**：奥地利、英格兰、法国、德国、意大利、俄罗斯、土耳其。\n\n- **模型ID语法**\n\n  ```\n  \u003C客户端前缀>:模型[@base_url][#api_key]\n  ```\n\n  - `前缀:` – 指定客户端（`openai`、`openai-requests`、`openai-responses`、`anthropic`、`gemini`、`deepseek`、`openrouter`、`together`）。\n  - `@base_url` – 使用代理或备用端点。\n  - `#api_key` – 内联密钥（覆盖环境变量）。\n\n  ```bash\n  # 所有势力都使用OpenRouter上的gpt-4o：\n  --models \"openrouter:gpt-4o\"\n  # 仅奥地利使用自定义URL+密钥：\n  --models \"openai:llama-3.2-3b@http:\u002F\u002Flocalhost:8000#myapikey,openai:gpt-4o,openai:gpt-4o,openai:gpt-4o,openai:gpt-4o,openai:gpt-4o,openai:gpt-4o\"\n  ```\n\n### 使用`experiment_runner.py`运行批量实验\n\n`experiment_runner.py`是一个轻量级调度器：它会并行启动多个`lm_game.py`实例，在同一个*实验目录*下收集它们的成果，并执行您指定的分析模块。所有属于`lm_game.py`的参数标志都可以直接传递；调度器会验证这些参数并原封不动地转发给每个游戏实例。\n\n---\n\n#### 示例\n\n```bash\n# 并行运行10个独立游戏（迭代），使用自定义提示目录，\n# 并且所有七股势力都采用同一模型（GPT-4o）。\npython3 experiment_runner.py \\\n    --experiment_dir \"results\u002Fexp001\" \\\n    --iterations 10 \\\n    --parallel 10 \\\n    --max_year 1905 \\\n    --num_negotiation_rounds 0 \\\n    --prompts_dir \"ai_diplomacy\u002Fprompts\" \\\n    --models \"gpt-4o,gpt-4o,gpt-4o,gpt-4o,gpt-4o,gpt-4o,gpt-4o\"\n\n\n# 关键状态分析：从W1901A阶段恢复每个运行（基于现有基础运行），并在S1902M阶段停止。将执行两个分析模块：\n#  • summary         → 汇总结果与评分\n\n#  • critical_state  → 关键阶段前后快照\npython3 experiment_runner.py \\\n    --experiment_dir \"results\u002Fexp002\" \\\n    --iterations 10 \\\n    --parallel 10 \\\n    --resume_from_phase W1901A \\\n    --end_at_phase S1902M \\\n    --num_negotiation_rounds 0 \\\n    --critical_state_base_run \"results\u002Ftest1\" \\\n    --prompts_dir \"ai_diplomacy\u002Fprompts\" \\\n    --analysis_modules \"summary,critical_state\" \\\n    --models \"gpt-4o,gpt-4o,gpt-4o,gpt-4o,gpt-4o,gpt-4o,gpt-4o\"\n```\n\n*(任何其他 `lm_game.py` 标志——如 `--planning_phase`、`--max_tokens` 等——都可以像在单局游戏中使用它们那样直接添加。)*\n\n---\n\n#### 实验运行器专用参数\n\n| 标志                              | 类型 \u002F 默认值             | 描述                                                                                                                                                                              |\n| --------------------------------- | -------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `--experiment_dir` **(必填)**     | `Path`                     | 实验的根目录；子文件夹 `runs\u002F` 和 `analysis\u002F` 会自动管理。使用同一目录重新运行将**恢复**现有运行并重新生成分析结果。 |\n| `--iterations`                    | `int`, 默认 `1`         | 本次实验要启动的独立游戏数量。                                                                                                                                                   |\n| `--parallel`                      | `int`, 默认 `1`         | 同时执行的最大游戏数量（使用进程池）。                                                                                                                                             |\n| `--analysis_modules`              | `str`, 默认 `\"summary\"` | 游戏全部结束后要运行的分析模块列表，用逗号分隔。模块从 `experiment_runner.analysis.\u003Cname>` 中导入，必须提供 `run(experiment_dir, ctx)` 方法。       |\n| `--critical_state_base_run`       | `Path`, 可选           | 指向先前 `lm_game` 运行产生的**现有** `run_dir` 的路径。每次迭代都从该快照恢复；新生成的产物会写入当前的 `experiment_dir` 下。         |\n| `--seed_base`                     | `int`, 默认 `42`        | 基础随机种子。第 *ɪ* 次运行的种子为 `seed_base + ɪ`，从而实现可重复的批次运行。                                                                                                |\n\n*(所有其他命令行标志均属于 `lm_game.py`，并将原样传递给它。)*\n\n### 环境设置\n\n创建一个包含 API 密钥的 `.env` 文件：\n\n```\nOPENAI_API_KEY=your_key_here\nANTHROPIC_API_KEY=your_key_here\nGEMINI_API_KEY=your_key_here\nDEEPSEEK_API_KEY=your_key_here\nOPENROUTER_API_KEY=your_key_here\n```\n\n### 模型配置\n\n可以在 `ai_diplomacy\u002Futils.py` 中为各势力分配模型：\n\n```python\ndef assign_models_to_powers() -> Dict[str, str]:\n    return {\n        \"AUSTRIA\": \"o3\",\n        \"ENGLAND\": \"claude-sonnet-4-20250514\",\n        \"FRANCE\": \"gpt-4.1\",\n        \"GERMANY\": \"gemini-2.5-pro-preview-05-06\",\n        \"ITALY\": \"openrouter-meta-llama\u002Fllama-4-maverick\",\n        \"RUSSIA\": \"claude-opus-4-20250514\",\n        \"TURKEY\": \"openrouter-google\u002Fgemini-2.5-flash-preview-05-20\",\n    }\n```\n\n支持的模型包括：\n\n- OpenAI: `gpt-4o`, `gpt-4.1`, `o3`, `o4-mini`\n- Anthropic: `claude-3-5-sonnet-20241022`, `claude-opus-4-20250514`\n- Google: `gemini-2.0-flash`, `gemini-2.5-pro-preview`\n- OpenRouter: 包括 Llama、Qwen、DeepSeek 等多种模型\n\n### 游戏输出与分析\n\n游戏会按时间戳保存到 `results\u002F` 目录下。每个游戏文件夹包含：\n\n- `lmvsgame.json` - 包含阶段摘要和代理关系的完整游戏数据\n- `overview.jsonl` - 错误统计和模型分配信息\n- `game_manifesto.txt` - 来自规划阶段的战略指令\n- `general_game.log` - 详细的游戏执行日志\n- `llm_responses.csv` - 所有 LLM 交互的完整日志\n\n游戏 JSON 中包含用于 AI 分析的特殊字段：\n\n- `phase_summaries` - 每个阶段的分类行动结果\n- `agent_relationships` - 每个阶段的外交态势\n- `final_agent_states` - 游戏结束时的目标和关系\n\n### 数据处理与 RL 分析流程\n\n为了详细分析 LLM 交互及订单成功率，采用两步流程：\n\n1. **将 CSV 转换为 RL JSON**：\n    `csv_to_rl_json.py` 脚本会处理通常位于以“FULL_GAME”结尾的游戏特定子目录中的 `llm_responses.csv` 文件（例如 `results\u002F20250524_..._FULL_GAME\u002F`）。它会将这些原始交互数据转换为适合强化学习（RL）分析的 JSON 格式。\n\n    要批量处理所有相关 CSV 文件：\n\n    ```bash\n    python csv_to_rl_json.py --scan_dir results\u002F\n    ```\n\n    该命令会扫描 `results\u002F` 目录下的“FULL_GAME”子文件夹，将其中的 `llm_responses.csv` 文件转换为 JSON，并将所有生成的 `*_rl.json` 文件输出到 `results\u002Fjson\u002F` 目录中。\n\n2. **分析 RL JSON 文件**：\n    `analyze_rl_json.py` 脚本会分析上一步生成的 JSON 文件。它会按模型汇总成功和失败的护航及支援订单统计数据。\n\n    要运行分析：\n\n    ```bash\n    python analyze_rl_json.py results\u002Fjson\u002F\n    ```\n\n    该命令会处理 `results\u002Fjson\u002F` 目录下的所有 `*_rl.json` 文件，并在项目根目录下生成两份报告：\n    - `analysis_summary.txt`: 订单统计的简洁摘要。\n    - `analysis_summary_debug.txt`: 包含唯一“success”字段值及其他调试信息的详细报告。\n\n此流程能够全面了解 LLM 在生成有效且成功的游戏订单方面的表现。\n\n### 赛后分析工具\n\n#### 战略时刻分析\n\n分析游戏中的关键战略时刻，包括背叛、合作以及精彩策略：\n\n```bash\n# 分析已完成的游戏\npython analyze_game_moments.py results\u002F20250522_210700_o3vclaudes_o3win\n\n# 限制分析范围至特定阶段\npython analyze_game_moments.py results\u002Fgame_folder --max-phases 20\n\n# 使用不同的分析模型\npython analyze_game_moments.py results\u002Fgame_folder --model claude-3-5-sonnet-20241022\n```\n\n分析会识别以下内容：\n\n- **背叛行为**：当各势力明确承诺某种行动，却采取了相矛盾的行动时\n- **合作行为**：各势力之间成功协调一致的行动\n- **两面派行为**：各势力对不同方做出相互矛盾的承诺\n- **精彩战略**：执行得极为出色的策略性操作\n- **战略失误**：严重削弱自身局势的重大错误\n\n分析输出包括：\n\n- **Markdown 报告**（`game_moments\u002F[game]_report_[timestamp].md`）\n  - AI 自动生成的整局游戏叙述\n  - 总结性统计数据（背叛、合作等）\n  - 各模型的无效走法次数\n  - 谎言分析，区分故意与无意谎言\n  - 最重要的战略时刻，附完整背景及日记条目\n- **JSON 数据**（`game_moments\u002F[game]_data_[timestamp].json`）\n  - 所有检测到的事件的完整结构化数据\n  - 包含得分、类别和关系等元数据\n  - 原始谎言检测数据，供进一步分析使用\n\n示例输出片段如下：\n\n```markdown\n## 各势力使用的模型\n- **土耳其**：o3\n- **英格兰**：claude-sonnet-4-20250514\n- **俄罗斯**：claude-opus-4-20250514\n\n## 各模型的无效走法数量\n- **o3**：91 次无效走法\n- **claude-sonnet-4**：67 次无效走法\n\n## 谎言分析\n### 各模型的谎言情况\n- **o3**：共 195 次谎言（71 次故意，124 次无意）\n- **claude-opus-4**：共 96 次谎言（0 次故意，96 次无意）\n```\n\n#### 外交谎言检测\n\n分析系统通过比较以下三方面来检测谎言：\n\n1. **消息内容**：各势力彼此之间的承诺\n2. **私人日记**：各势力私下计划的内容（来自谈判日记条目）\n3. **实际命令**：各势力真正执行的行动\n\n谎言被分为两类：\n\n- **故意谎言**：日记中显示出有计划的欺骗意图（例如“误导他们”、“实际上……”）\n- **无意谎言**：没有证据表明存在计划中的欺骗行为（很可能是误解）\n\n#### 动画与可视化\n\n使用交互式 3D 动画系统可视化游戏：\n\n```bash\n# 启动动画服务器\ncd ai_animation\nnpm install\nnpm run dev\n\n# 在浏览器中打开 http:\u002F\u002Flocalhost:5173\n# 加载一个游戏 JSON 文件即可观看动画回放\n```\n\n功能特点：\n\n- 3D 地图展示部队移动与战斗\n- 分阶段播放控制\n- 聊天窗口显示外交消息\n- 排行榜实时追踪补给中心数量\n- 配备音效与视觉特效\n\n### 游戏统计与模式\n\n对数百场 AI 对弈的分析揭示了一些有趣的规律：\n\n#### 各模型的表现特征\n\n- **无效走法率**：某些模型（如 o3）会产生较多无效走法，但打法较为激进\n- **欺骗模式**：各模型在诚实度方面差异巨大（故意谎言比例从 0% 到 100% 不等）\n- **战略风格**：从防守型\u002F诚实型到进攻型\u002F欺骗型不等\n\n#### 常见的战略模式\n\n- **开局战术**：RT 重锤（俄土组合）、西线三强联盟、勒班多阵型\n- **中盘动态**：背后捅刀时机、联盟转换、运输船队运作\n- **收官挑战**：僵持线、被迫和棋、关键玩家决定胜负\n\n### 未来探索方向\n\n- **自适应谈判**：根据对话流程动态调整回合数\n- **联盟检测**：识别并跟踪多方联盟\n- **个性演化**：让智能体能够调整其外交风格\n- **锦标赛模式**：自动进行多局比赛，并计算 ELO 等级分\n- **人机混合**：允许人类玩家与 AI 智能体同场竞技\n- **实时解说**：为观众生成现场解说文字\n- **欺骗训练**：专门训练模型以检测或实施谎言\n- **元策略学习**：让智能体能够从过往对局中学习经验\n\n---\n\n\u003Cp align=\"center\">\n  \u003Cimg width=\"500\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FGoodStartLabs_AI_Diplomacy_readme_d45cea57dde2.png\" alt=\"外交地图概览\">\n\u003C\u002Fp>\n\n## 文档说明\n\n完整的文档可在 [diplomacy.readthedocs.io](https:\u002F\u002Fdiplomacy.readthedocs.io\u002F) 查阅。\n\n## 可用的分析脚本\n\n### 1. 战略时刻分析（`analyze_game_moments.py`）\n\n全面分析游戏动态：\n\n```bash\npython analyze_game_moments.py results\u002Fgame_folder [选项]\n\n选项：\n  --model MODEL         使用的分析模型（默认：gemini-2.5-flash）\n  --max-phases N        只分析前 N 个阶段\n  --max-concurrent N    并发 API 调用数量（默认：5）\n  --report PATH         输出报告路径（未指定时自动生成）\n  --json PATH           输出 JSON 文件路径（未指定时自动生成）\n```\n\n### 2. 集中式谎言检测（`analyze_lies_focused.py`）\n\n详细分析外交欺骗行为：\n\n```bash\npython analyze_lies_focused.py results\u002Fgame_folder [--output report.md]\n```\n\n### 3. 游戏结果统计（`analyze_game_results.py`）\n\n汇总所有已完成游戏的胜负统计数据：\n\n```bash\npython analyze_game_results.py\n# 生成 model_power_statistics.csv\n```\n\n该脚本会分析所有 `*_FULL_GAME` 文件夹，统计每个模型作为各个势力出战并获胜的次数。\n\n### 4. 游戏可视化（`ai_animation\u002F`）\n\n交互式 3D 游戏可视化工具：\n\n```bash\ncd ai_animation\nnpm install\nnpm run dev\n# 打开 http:\u002F\u002Flocalhost:5173 并加载一个游戏 JSON 文件\n```\n\n## 快速入门\n\n### 安装步骤\n\n该项目使用 [uv](https:\u002F\u002Fgithub.com\u002Fastral-sh\u002Fuv) 进行 Python 依赖管理。\n\n#### 设置项目依赖\n\n```bash\n# 克隆仓库\ngit clone https:\u002F\u002Fgithub.com\u002Fyour-repo\u002FAI_Diplomacy.git\ncd AI_Diplomacy\n\n# 安装依赖并创建虚拟环境\nuv sync\n\n# 激活虚拟环境\nsource .venv\u002Fbin\u002Factivate  # Unix\u002FmacOS 系统\n# 或\n.venv\\Scripts\\activate     # Windows 系统\n```\n\n### 运行一局游戏\n\n以下脚本会在本地随机提交有效指令，直到游戏结束：\n\n```python3\nimport random\nfrom diplomacy import Game\nfrom diplomacy.utils.export import to_saved_game_format\n\n# 创建一局游戏\n# 也可以通过参数指定地图名称，例如：Game(map_name='pure')\ngame = Game()\nwhile not game.is_game_done:\n\n    # 获取所有地点的可能指令列表\n    possible_orders = game.get_all_possible_orders()\n\n    # 随机为每个势力选取一条有效指令\n    for power_name, power in game.powers.items():\n        power_orders = [random.choice(possible_orders[loc]) for loc in game.get_orderable_locations(power_name)\n                        if possible_orders[loc]]\n        game.set_orders(power_name, power_orders)\n\n    # 可以通过 game.add_message 在本地发送消息\n    # 例如：game.add_message(Message(sender='FRANCE',\n    #                               recipient='ENGLAND',\n    #                               message='这是一条消息',\n    #                               phase=self.get_current_phase(),\n    #                               time_sent=int(time.time())))\n\n    # 处理游戏进入下一阶段\n    game.process()\n\n# 将游戏导出到磁盘以便可视化（游戏会被追加到文件中）\n\n# 或者，我们也可以这样做 >> file.write(json.dumps(to_saved_game_format(game)))\nto_saved_game_format(game, output_path='game.json')\n```\n\n## 网页界面\n\n还可以使用 React 安装一个网页界面，以便与机器人和\u002F或其他人对战，并可视化游戏过程。\n\n可以通过以下步骤安装网页界面：\n\n```bash\n# 安装 NVM\ncurl -o- https:\u002F\u002Fraw.githubusercontent.com\u002Fnvm-sh\u002Fnvm\u002Fv0.34.0\u002Finstall.sh | bash\n\n# 克隆仓库\ngit clone https:\u002F\u002Fgithub.com\u002Fdiplomacy\u002Fdiplomacy.git\n\n# 在本地安装依赖\n# 建议在 conda 或 virtualenv 环境中安装\ncd diplomacy\u002F\npip install -r requirements_dev.txt\n\n# 构建 Node 模块\ncd diplomacy\u002Fweb\nnpm install .\nnpm install . --only=dev\n\n# 在一个终端窗口或标签页中启动 React 服务器\nnpm start\n\n# 在另一个终端窗口或标签页中启动 Diplomacy 服务器\npython -m diplomacy.server.run\n```\n\n网页界面将可通过 \u003Chttp:\u002F\u002Flocalhost:3000> 访问。\n\n用户可以使用 admin\u002Fpassword 或 username\u002Fpassword 登录。如果输入的用户名在数据库中不存在，系统会自动创建新用户。\n\n![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FGoodStartLabs_AI_Diplomacy_readme_c1820757b650.png)\n\n### 可视化游戏\n\n可以通过网页界面右上角的“从磁盘加载游戏”菜单来可视化游戏。\n\n![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FGoodStartLabs_AI_Diplomacy_readme_6e3d78ce26fd.png)\n\n## 网络对战\n\n可以通过 WebSocket 远程加入网络对战。下面的脚本演示了如何进行网络对战。\n\n注意：为了使脚本正常运行，必须使用 `python -m diplomacy.server.run` 启动服务器。\n\n```python3\nimport asyncio\nimport random\nfrom diplomacy.client.connection import connect\nfrom diplomacy.utils import exceptions\n\nPOWERS = ['AUSTRIA', 'ENGLAND', 'FRANCE', 'GERMANY', 'ITALY', 'RUSSIA', 'TURKEY']\n\nasync def create_game(game_id, hostname='localhost', port=8432):\n    \"\"\" 在服务器上创建一场游戏 \"\"\"\n    connection = await connect(hostname, port)\n    channel = await connection.authenticate('random_user', 'password')\n    await channel.create_game(game_id=game_id, rules={'REAL_TIME', 'NO_DEADLINE', 'POWER_CHOICE'})\n\nasync def play(game_id, power_name, hostname='localhost', port=8432):\n    \"\"\" 以指定势力身份进行游戏 \"\"\"\n    connection = await connect(hostname, port)\n    channel = await connection.authenticate('user_' + power_name, 'password')\n\n    # 等待游戏开始并加入\n    while not (await channel.list_games(game_id=game_id)):\n        await asyncio.sleep(1.)\n    game = await channel.join_game(game_id=game_id, power_name=power_name)\n\n    # 开始游戏\n    while not game.is_game_done:\n        current_phase = game.get_current_phase()\n\n        # 提交指令\n        if game.get_orderable_locations(power_name):\n            possible_orders = game.get_all_possible_orders()\n            orders = [random.choice(possible_orders[loc]) for loc in game.get_orderable_locations(power_name)\n                      if possible_orders[loc]]\n            print('[%s\u002F%s] - 提交了: %s' % (power_name, game.get_current_phase(), orders))\n            await game.set_orders(power_name=power_name, orders=orders, wait=False)\n\n        # 可以通过 game.send_message 发送消息\n        # await game.send_game_message(message=game.new_power_message('FRANCE', '这是消息'))\n\n        # 等待游戏处理完成\n        while current_phase == game.get_current_phase():\n            await asyncio.sleep(0.1)\n\n    # 可以使用 to_saved_game_format 保存游戏的本地副本\n    # 若要下载包含所有势力消息的游戏副本，需要以管理员身份导出游戏，即使用 'admin' \u002F 'password' 登录\n\nasync def launch(game_id):\n    \"\"\" 创建并进行一场网络对战 \"\"\"\n    await create_game(game_id)\n    await asyncio.gather(*[play(game_id, power_name) for power_name in POWERS])\n\nif __name__ == '__main__':\n    asyncio.run(launch(game_id=str(random.randint(1, 1000))))\n```\n\n## 许可证\n\n版权所有 © 2025 Good Start Labs\n\n更多详情请参阅 [LICENSE](LICENSE) 文件。","# AI_Diplomacy 快速上手指南\n\nAI_Diplomacy 是一个基于大语言模型（LLM）的策略游戏 AI 框架，扩展了经典的《外交》（Diplomacy）游戏。它能让每个国家由具备记忆、关系追踪和战略决策能力的自主 AI 代理控制。\n\n## 环境准备\n\n### 系统要求\n- **Python**: 3.10 或更高版本\n- **操作系统**: Linux, macOS, 或 Windows (推荐 Linux\u002FmacOS 以获得最佳异步性能)\n- **API 密钥**: 至少需要一个支持的 LLM 提供商密钥 (OpenAI, Anthropic\u002FClaude, Google\u002FGemini, DeepSeek, OpenRouter 等)\n\n### 前置依赖\n确保已安装 `git` 和 `pip`。本项目主要依赖 Python 标准库及常见的 AI 客户端库（安装步骤中会自动处理）。\n\n## 安装步骤\n\n1. **克隆仓库**\n   ```bash\n   git clone https:\u002F\u002Fgithub.com\u002Fdiplomacy\u002Fdiplomacy.git\n   cd diplomacy\n   # 注意：AI_Diplomacy 是作为扩展模块存在的，请确保你位于包含 ai_diplomacy 目录的项目根目录下\n   ```\n   *(注：如果该项目是独立仓库，请直接克隆对应的 AI_Diplomacy 仓库地址)*\n\n2. **创建虚拟环境（推荐）**\n   ```bash\n   python -m venv venv\n   source venv\u002Fbin\u002Factivate  # Windows 用户请使用: venv\\Scripts\\activate\n   ```\n\n3. **安装依赖**\n   ```bash\n   pip install -r requirements.txt\n   ```\n   *如果没有 `requirements.txt`，请根据项目实际依赖安装核心库，通常包括：*\n   ```bash\n   pip install openai anthropic google-generativeai httpx asyncio\n   ```\n\n4. **配置 API 密钥**\n   在终端导出你的 LLM 提供商密钥（以 OpenAI 和 Anthropic 为例）：\n   ```bash\n   export OPENAI_API_KEY=\"your-openai-key\"\n   export ANTHROPIC_API_KEY=\"your-anthropic-key\"\n   # 其他提供商同理，或在运行命令中通过 #api_key 语法直接指定\n   ```\n\n## 基本使用\n\n### 1. 运行一场基础游戏\n启动一个由 AI 控制的完整游戏，设置最大年份为 1910 年，每回合进行 3 轮谈判：\n\n```bash\npython lm_game.py --max_year 1910 --num_negotiation_rounds 3\n```\n\n### 2. 指定不同的 AI 模型\n你可以为七个国家（奥地利、英国、法国、德国、意大利、俄罗斯、土耳其）分配不同的模型。以下示例交替使用 Claude 3.5 Sonnet 和 GPT-4o：\n\n```bash\npython lm_game.py --max_year 1910 --models \"claude-3-5-sonnet-20241022,gpt-4o,claude-3-5-sonnet-20241022,gpt-4o,claude-3-5-sonnet-20241022,gpt-4o,claude-3-5-sonnet-20241022\"\n```\n\n**模型指定语法说明：**\n格式为 `\u003C客户端前缀>:模型名[@基础 URL][#API 密钥]`。\n- **单模型通用**：所有国家使用 OpenRouter 上的 GPT-4o\n  ```bash\n  python lm_game.py --models \"openrouter:gpt-4o\"\n  ```\n- **自定义地址\u002F密钥**：仅为奥地利指定本地部署的模型，其余使用默认环境变量\n  ```bash\n  python lm_game.py --models \"openai:llama-3.2-3b@http:\u002F\u002Flocalhost:8000#myapikey,openai:gpt-4o,openai:gpt-4o,openai:gpt-4o,openai:gpt-4o,openai:gpt-4o,openai:gpt-4o\"\n  ```\n\n### 3. 保存结果与断点续跑\n将游戏日志和状态保存到指定目录，并支持从中断的阶段恢复：\n\n```bash\n# 首次运行并保存结果\npython lm_game.py --run_dir results\u002Fgame_run_001 --num_negotiation_rounds 2\n\n# 从特定阶段（如 1902 年春季移动阶段）恢复运行\npython lm_game.py --run_dir results\u002Fgame_run_001 --resume_from_phase S1902M\n```\n\n### 4. 批量实验（进阶）\n使用 `experiment_runner.py` 并行运行多场游戏以进行对比分析：\n\n```bash\npython3 experiment_runner.py \\\n    --experiment_dir \"results\u002Fexp001\" \\\n    --iterations 10 \\\n    --parallel 10 \\\n    --max_year 1905 \\\n    --models \"gpt-4o\"\n```\n\n### 核心功能提示\n- **记忆系统**：AI 会自动维护“私人日记”，记录谈判、订单推理和背叛检测，并每年进行总结以防上下文溢出。\n- **战略规划**：添加 `--planning_phase` 参数可启用高层战略指令生成阶段，使 AI 决策更具长远性。\n- **日志分析**：所有 LLM 交互、关系变化和订单生成逻辑均会被详细记录在 `run_dir` 指定的文件夹中。","某高校博弈论研究团队正在利用《外交》（Diplomacy）棋盘游戏模拟多智能体间的复杂谈判与背叛行为，以测试大语言模型在动态战略环境下的决策能力。\n\n### 没有 AI_Diplomacy 时\n- **状态维护困难**：研究人员需手动记录每个势力的目标变化和与其他玩家的关系（敌对\u002F盟友），极易出现数据遗漏或逻辑矛盾。\n- **谈判深度不足**：传统脚本只能基于固定规则回复，无法进行多轮次、带有情感色彩和策略欺骗的深度私下协商。\n- **背叛检测滞后**：难以实时对比“口头承诺”与“实际指令”，往往要在游戏结束后通过人工复盘才能发现谁背叛了谁。\n- **上下文记忆受限**：随着游戏年份增加，历史对话和战局细节超出模型窗口限制，导致 AI 遗忘关键前情，决策变得短视且不合理。\n\n### 使用 AI_Diplomacy 后\n- **自主状态管理**：AI_Diplomacy 为每个势力配备独立的智能体，自动追踪动态目标并实时更新关系图谱，确保战略逻辑始终连贯。\n- **拟人化深度博弈**：智能体能根据性格设定发起多轮私密谈判，精准识别被忽略的信息，甚至主动实施或识破复杂的欺诈策略。\n- **实时背信分析**：系统自动比对谈判记录与最终下达的军事指令，即时标记背叛行为并生成分析报告，让研究者能直观看到信任崩塌的瞬间。\n- **智能记忆压缩**：通过双层记忆系统（私人日记 + 年度总结），自动提炼关键历史事件，既防止上下文溢出，又确保 AI 拥有长期的战略视野。\n\nAI_Diplomacy 将原本枯燥的规则执行转化为充满心理博弈的动态实验场，让研究者能以前所未有的粒度观察大模型在复杂社交环境中的进化过程。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002FGoodStartLabs_AI_Diplomacy_c1820757.png","GoodStartLabs","Good Start Labs","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002FGoodStartLabs_6ac9dee8.png","",null,"goodstartlabs","https:\u002F\u002Fgoodstartlabs.com","https:\u002F\u002Fgithub.com\u002FGoodStartLabs",[84,88,92,96,100,104,108,112],{"name":85,"color":86,"percentage":87},"Python","#3572A5",47,{"name":89,"color":90,"percentage":91},"Jupyter Notebook","#DA5B0B",39.8,{"name":93,"color":94,"percentage":95},"JavaScript","#f1e05a",7.2,{"name":97,"color":98,"percentage":99},"TypeScript","#3178c6",5.1,{"name":101,"color":102,"percentage":103},"CSS","#663399",0.5,{"name":105,"color":106,"percentage":107},"Shell","#89e051",0.2,{"name":109,"color":110,"percentage":111},"HTML","#e34c26",0.1,{"name":113,"color":114,"percentage":115},"Dockerfile","#384d54",0,641,88,"2026-04-03T12:07:05","NOASSERTION","未说明","非必需（基于云端 LLM API，如 OpenAI、Claude、Gemini 等，无需本地 GPU 推理）",{"notes":123,"python":120,"dependencies":124},"该工具主要依赖外部大语言模型 API（支持 OpenAI、Claude、Gemini、DeepSeek、OpenRouter 等），无需本地部署大型模型或特定 CUDA 环境。运行前需配置相应 API 密钥。核心逻辑包含异步调用和 BFS 路径分析，建议具备基础 Python 运行环境。",[120],[54,13,15,26,14],[127,128,129,130],"ai","ai-benchmark","benchmark","llm","2026-03-27T02:49:30.150509","2026-04-06T05:17:23.281480",[],[]]