[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-pgalko--BambooAI":3,"tool-pgalko--BambooAI":61},[4,18,26,36,44,52],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":17},4358,"openclaw","openclaw\u002Fopenclaw","OpenClaw 是一款专为个人打造的本地化 AI 助手，旨在让你在自己的设备上拥有完全可控的智能伙伴。它打破了传统 AI 助手局限于特定网页或应用的束缚，能够直接接入你日常使用的各类通讯渠道，包括微信、WhatsApp、Telegram、Discord、iMessage 等数十种平台。无论你在哪个聊天软件中发送消息，OpenClaw 都能即时响应，甚至支持在 macOS、iOS 和 Android 设备上进行语音交互，并提供实时的画布渲染功能供你操控。\n\n这款工具主要解决了用户对数据隐私、响应速度以及“始终在线”体验的需求。通过将 AI 部署在本地，用户无需依赖云端服务即可享受快速、私密的智能辅助，真正实现了“你的数据，你做主”。其独特的技术亮点在于强大的网关架构，将控制平面与核心助手分离，确保跨平台通信的流畅性与扩展性。\n\nOpenClaw 非常适合希望构建个性化工作流的技术爱好者、开发者，以及注重隐私保护且不愿被单一生态绑定的普通用户。只要具备基础的终端操作能力（支持 macOS、Linux 及 Windows WSL2），即可通过简单的命令行引导完成部署。如果你渴望拥有一个懂你",349277,3,"2026-04-06T06:32:30",[13,14,15,16],"Agent","开发框架","图像","数据工具","ready",{"id":19,"name":20,"github_repo":21,"description_zh":22,"stars":23,"difficulty_score":10,"last_commit_at":24,"category_tags":25,"status":17},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,"2026-04-05T11:01:52",[14,15,13],{"id":27,"name":28,"github_repo":29,"description_zh":30,"stars":31,"difficulty_score":32,"last_commit_at":33,"category_tags":34,"status":17},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",141543,2,"2026-04-06T11:32:54",[14,13,35],"语言模型",{"id":37,"name":38,"github_repo":39,"description_zh":40,"stars":41,"difficulty_score":32,"last_commit_at":42,"category_tags":43,"status":17},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",107888,"2026-04-06T11:32:50",[14,15,13],{"id":45,"name":46,"github_repo":47,"description_zh":48,"stars":49,"difficulty_score":10,"last_commit_at":50,"category_tags":51,"status":17},4487,"LLMs-from-scratch","rasbt\u002FLLMs-from-scratch","LLMs-from-scratch 是一个基于 PyTorch 的开源教育项目，旨在引导用户从零开始一步步构建一个类似 ChatGPT 的大型语言模型（LLM）。它不仅是同名技术著作的官方代码库，更提供了一套完整的实践方案，涵盖模型开发、预训练及微调的全过程。\n\n该项目主要解决了大模型领域“黑盒化”的学习痛点。许多开发者虽能调用现成模型，却难以深入理解其内部架构与训练机制。通过亲手编写每一行核心代码，用户能够透彻掌握 Transformer 架构、注意力机制等关键原理，从而真正理解大模型是如何“思考”的。此外，项目还包含了加载大型预训练权重进行微调的代码，帮助用户将理论知识延伸至实际应用。\n\nLLMs-from-scratch 特别适合希望深入底层原理的 AI 开发者、研究人员以及计算机专业的学生。对于不满足于仅使用 API，而是渴望探究模型构建细节的技术人员而言，这是极佳的学习资源。其独特的技术亮点在于“循序渐进”的教学设计：将复杂的系统工程拆解为清晰的步骤，配合详细的图表与示例，让构建一个虽小但功能完备的大模型变得触手可及。无论你是想夯实理论基础，还是为未来研发更大规模的模型做准备",90106,"2026-04-06T11:19:32",[35,15,13,14],{"id":53,"name":54,"github_repo":55,"description_zh":56,"stars":57,"difficulty_score":10,"last_commit_at":58,"category_tags":59,"status":17},4292,"Deep-Live-Cam","hacksider\u002FDeep-Live-Cam","Deep-Live-Cam 是一款专注于实时换脸与视频生成的开源工具，用户仅需一张静态照片，即可通过“一键操作”实现摄像头画面的即时变脸或制作深度伪造视频。它有效解决了传统换脸技术流程繁琐、对硬件配置要求极高以及难以实时预览的痛点，让高质量的数字内容创作变得触手可及。\n\n这款工具不仅适合开发者和技术研究人员探索算法边界，更因其极简的操作逻辑（仅需三步：选脸、选摄像头、启动），广泛适用于普通用户、内容创作者、设计师及直播主播。无论是为了动画角色定制、服装展示模特替换，还是制作趣味短视频和直播互动，Deep-Live-Cam 都能提供流畅的支持。\n\n其核心技术亮点在于强大的实时处理能力，支持口型遮罩（Mouth Mask）以保留使用者原始的嘴部动作，确保表情自然精准；同时具备“人脸映射”功能，可同时对画面中的多个主体应用不同面孔。此外，项目内置了严格的内容安全过滤机制，自动拦截涉及裸露、暴力等不当素材，并倡导用户在获得授权及明确标注的前提下合规使用，体现了技术发展与伦理责任的平衡。",88924,"2026-04-06T03:28:53",[14,15,13,60],"视频",{"id":62,"github_repo":63,"name":64,"description_en":65,"description_zh":66,"ai_summary_zh":66,"readme_en":67,"readme_zh":68,"quickstart_zh":69,"use_case_zh":70,"hero_image_url":71,"owner_login":72,"owner_name":72,"owner_avatar_url":73,"owner_bio":74,"owner_company":75,"owner_location":76,"owner_email":75,"owner_twitter":72,"owner_website":75,"owner_url":77,"languages":78,"stars":99,"forks":100,"last_commit_at":101,"license":102,"difficulty_score":32,"env_os":103,"env_gpu":103,"env_ram":103,"env_deps":104,"category_tags":113,"github_topics":116,"view_count":32,"oss_zip_url":75,"oss_zip_packed_at":75,"status":17,"created_at":132,"updated_at":133,"faqs":134,"releases":160},4566,"pgalko\u002FBambooAI","BambooAI","A Python library powered by Language Models (LLMs) for conversational data discovery and analysis.","BambooAI 是一款基于大语言模型（LLM）的开源 Python 库，旨在让数据分析变得像日常对话一样简单。它允许用户直接使用自然语言提问，自动完成从数据理解、代码生成、执行分析到可视化呈现的全过程，无需用户具备深厚的编程背景。\n\n这一工具有效解决了传统数据分析中代码门槛高、探索效率低的问题。无论是处理本地 CSV 文件，还是通过 API 获取外部实时数据，BambooAI 都能灵活应对。它特别适合数据分析师、业务人员以及希望快速验证想法的研究者使用，同时也为开发者提供了强大的自动化辅助能力。\n\nBambooAI 的技术亮点在于其智能的“自愈”机制，当生成的代码出错时能自动修正并重新运行；同时支持规划代理（Planning Agent）以拆解复杂任务，并结合向量数据库构建长期记忆，从而在多轮对话中保持上下文连贯。此外，它还集成了网络搜索功能，可补充外部知识以增强分析深度。通过简洁的 Web 界面或 Jupyter Notebook，BambooAI 让数据洞察变得更加直观和高效。","# BambooAI\n\n\u003Cimg width=\"100\" alt=\"BambooAI Logo\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fpgalko_BambooAI_readme_c700377a3027.png\" \u002F>\n\nhttps:\u002F\u002Fbambooai.org\n\nBambooAI is an open-source library that enables natural language-based data analysis using Large Language Models (LLMs). It works with both local datasets and can fetch data from external sources and APIs.\n\n## Table of Contents\n- [Overview](#overview)\n- [Features](#features)\n- [Demo Videos](#demo-videos)\n- [Installation](#installation)\n- [Quick Start](#quick-start)\n- [How It Works](#how-it-works)\n- [Configuration](#configuration)\n  - [Parameters](#parameters)\n  - [Agent and Model Configuration](#agent-and-model-configuration)\n- [Auxiliary Datasets](#auxiliary-datasets)\n- [Dataframe Ontology (Semantic Memory)](#dataframe-ontology-semantic-memory)\n- [Vector DB (Episodic Memory)](#vector-db-episodic-memory)\n- [Usage Examples](#usage-examples)\n- [Web Application Setup](#web-application-setup)\n  - [Using Docker (Recommended)](#option-1-using-docker-recommended)\n  - [Using pip package](#option-2-using-pip-package)\n  - [Using complete repository](#option-3-using-complete-repository) \n- [Model Support](#model-support)\n- [Environment Variables](#environment-variables)\n- [Logging](#logging)\n- [Performance Comparison](#performance-comparison)\n- [Contributing](#contributing)\n\n## Overview\n\nBambooAI is an experimental tool that makes data analysis more accessible by allowing users to interact with their data through natural language conversations. It's designed to:\n\n- Process natural language queries about datasets\n- Generate and execute Python code for analysis and visualization\n- Help users derive insights without extensive coding knowledge\n- Augment capabilities of data analysts at all levels\n- Streamline data analysis workflows\n\n## Features\n\n- Natural language interface for data analysis\n- Web UI and Jupyter notebook support\n- Support for local and external datasets\n- Integration with internet searches and external APIs\n- User feedback during stream\n- Optional planning agent for complex tasks\n- Integration of custom ontologies\n- Code generation for data analysis and visualization\n- Self healing\u002Ferror correction\n- Custom code edits and code execution\n- Knowledge base integration via vector database\n- Workflows saving and follow ups\n- In-context and multimodal queries\n\n## Demo Videos\n\n### Machine Learning Example (Jupyter Notebook)\nA demonstration of creating a machine learning model to predict Titanic passenger survival:\n\nhttps:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Fassets\u002F59ef810c-80d8-4ef1-8edf-82ba64178b85\n\n### Sports Data Analysis (Web UI)\nExample of various sports data analysis queries:\n\nhttps:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Fassets\u002F7b9c9cd6-56e3-46ee-a6c6-c32324a0c5ef\n\n## Installation\n\n```bash\npip install bambooai\n```\n\nOr alternatively clone the repo and install the requirements\n\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Fpgalko\u002FBambooAI.git\npip install -r requirements.txt\n```\n\n## Quick Start\n\nTry it out on a basic example in Google Colab: [![Open In Colab](https:\u002F\u002Fcolab.research.google.com\u002Fassets\u002Fcolab-badge.svg)](https:\u002F\u002Fcolab.research.google.com\u002Fdrive\u002F1grKtqKD4u8cVGMoVv__umci4F7IU14vU?usp=sharing)\n\n### Basic Example\n\n1. Install BambooAI:\n   ```bash\n   pip install bambooai\n   ```\n\n3. Configure environment:\n   ```bash\n   cp .env.example .env\n   # Edit .env with your settings\n   ```\n\n4. Configure agents\u002Fmodels\n   ```bash\n   cp LLM_CONFIG_sample.json LLM_CONFIG.json\n   # Edit LLM_CONFIG.json with your desired combination of agents, models and parameters\n   ```\n\n5. Run\n    ```python\n    import pandas as pd\n    from bambooai import BambooAI\n\n    import plotly.io as pio\n    pio.renderers.default = 'jupyterlab'\n\n    df = pd.read_csv('titanic.csv')\n    bamboo = BambooAI(df=df, planning=True, vector_db=False, search_tool=True)\n    bamboo.pd_agent_converse()\n    ```\n\n## How It Works\n\nThe BambooAI operates through six key steps:\n\n1. **Initiation**\n   - Launches with a user question or prompt for one\n   - Continues in a conversation loop until exit\n\n2. **Task Routing**\n   - Classifies questions using LLM\n   - Routes to appropriate handler (text response or code generation)\n\n3. **User Feedback**\n   - If the instruction is vague or unclear the model will pause and ask user for feedback\n   - If the model encounters any ambiguity during the solving process, it will pause and ask for direction offering a few options\n\n4. **Dynamic Prompt Build**\n   - Evaluates data requirements\n   - Asks for feedback or uses tools if more context is needed\n   - Formulates analysis plan\n   - Performs semantic search for similar questions\n   - Generates code using selected LLM\n\n5. **Debugging and Execution**\n   - Executes generated code\n   - Handles errors with LLM-based correction\n   - Retries until successful or limit reached\n\n6. **Results and Knowledge Base**\n   - Ranks answers for quality\n   - Stores high-quality solutions in vector database\n   - Presents formatted results or visualizations\n\n### Flow Chart\n![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fpgalko_BambooAI_readme_019eb5f0e8cf.png)\n\n## Configuration\n\n### Parameters\n\nBambooAI accepts the following initialization parameters:\n\n```python\nbamboo = BambooAI(\n    df=None,                    # DataFrame to analyze\n    auxiliary_datasets=None,    # List of paths to auxiliary datasets\n    max_conversations=4,        # Number of conversation pairs to keep in memory\n    search_tool=False,          # Enable internet search capability\n    planning=False,             # Enable planning agent for complex tasks\n    webui=False,                # Run as web application\n    vector_db=False,            # Enable vector database for knowledge storage\n    df_ontology=False,          # Use custom dataframe ontology\n    exploratory=True,           # Enable expert selection for query handling\n    custom_prompt_file=None     # Enable the use of custom\u002Fmodified prompt templates\n)\n```\n\n#### Detailed Parameter Descriptions:\n\n- `df` (pd.DataFrame, optional)\n  - Input dataframe for analysis\n  - If not provided, BambooAI will attempt to source data from the internet or auxiliary datasets\n\n- `auxiliary_datasets` (list, default=None)\n  - List of paths to auxiliary datasets\n  - These will be incorporated into the solution as needed, and pulled when the code executes\n  - These are to complement the main dataframe\n\n- `max_conversations` (int, default=4)\n  - Number of user-assistant conversation pairs to maintain in context\n  - Affects context window and token usage\n\n- `search_tool` (bool, default=False)\n  - Enables internet search capabilities\n  - Requires appropriate API keys when enabled\n\n- `planning` (bool, default=False)\n  - Enables the Planning agent for complex tasks\n  - Breaks down tasks into manageable steps\n  - Improves solution quality for complex queries\n\n- `webui` (bool, default=False)\n  - Runs BambooAI as a web application\n  - Uses Flask API for web interface\n\n- `vector_db` (bool, default=False)\n  - Enables vector database for knowledge storage and semantic search\n  - Stores high-quality solutions for future reference\n  - Requires Pinecone API key\n  - Supports two embeddings models `text-embedding-3-small`(OpenAI) and `all-MiniLM-L6-v2`(HF)\n\n- `df_ontology` (str, default=None)\n  - Uses custom dataframe ontology for improved understanding\n  - Requires OWL ontology as a `.ttl` file. The parameter takes the path to the TTL file.\n  - Significantly improves solution quality\n\n- `exploratory` (bool, default=True)\n  - Enables expert selection for query handling\n  - Chooses between Research Specialist and Data Analyst roles\n\n- `custom_prompt_file` (str, default=None)\n  - Enables users to provide custom prompt templates\n  - Requires path to the YAML file containing the templates\n\n### Agent and Model Configuration\n\nBambooAI uses multi-agent system where different specialized agents handle specific aspects of the data analysis process. Each agent can be configured to use different LLM models and parameters based on their specific requirements.\n\n#### Configuration Structure\n\nThe LLM configuration is stored in `LLM_CONFIG.json`. Here's the complete configuration structure:\n\n```json\n{\n  \"agent_configs\": [\n    {\"agent\": \"Expert Selector\", \"details\": {\"model\": \"gpt-4.1\", \"provider\":\"openai\",\"max_tokens\": 2000, \"temperature\": 0}},\n    {\"agent\": \"Analyst Selector\", \"details\": {\"model\": \"claude-3-7-sonnet-20250219\", \"provider\":\"anthropic\",\"max_tokens\": 2000, \"temperature\": 0}},\n    {\"agent\": \"Theorist\", \"details\": {\"model\": \"gemini-2.5-pro-preview-03-25\", \"provider\":\"gemini\",\"max_tokens\": 4000, \"temperature\": 0}},\n    {\"agent\": \"Dataframe Inspector\", \"details\": {\"model\": \"gemini-2.0-flash\", \"provider\":\"gemini\",\"max_tokens\": 8000, \"temperature\": 0}},\n    {\"agent\": \"Planner\", \"details\": {\"model\": \"gemini-2.5-pro-preview-03-25\", \"provider\":\"gemini\",\"max_tokens\": 8000, \"temperature\": 0}},\n    {\"agent\": \"Code Generator\", \"details\": {\"model\": \"claude-3-5-sonnet-20241022\", \"provider\":\"anthropic\",\"max_tokens\": 8000, \"temperature\": 0}},\n    {\"agent\": \"Error Corrector\", \"details\": {\"model\": \"claude-3-5-sonnet-20241022\", \"provider\":\"anthropic\",\"max_tokens\": 8000, \"temperature\": 0}},\n    {\"agent\": \"Reviewer\", \"details\": {\"model\": \"gemini-2.5-pro-preview-03-25\", \"provider\":\"gemini\",\"max_tokens\": 8000, \"temperature\": 0}},\n    {\"agent\": \"Solution Summarizer\", \"details\": {\"model\": \"gemini-2.5-flash-preview-04-17\", \"provider\":\"gemini\",\"max_tokens\": 4000, \"temperature\": 0}},\n    {\"agent\": \"Google Search Executor\", \"details\": {\"model\": \"gemini-2.5-flash-preview-04-17\", \"provider\":\"gemini\",\"max_tokens\": 4000, \"temperature\": 0}},\n    {\"agent\": \"Google Search Summarizer\", \"details\": {\"model\": \"gemini-2.5-flash-preview-04-17\", \"provider\":\"gemini\",\"max_tokens\": 4000, \"temperature\": 0}}\n  ],\n  \"model_properties\": {\n    \"gpt-4o\": {\"capability\":\"base\",\"multimodal\":\"true\", \"templ_formating\":\"text\", \"prompt_tokens\": 0.0025, \"completion_tokens\": 0.010},\n    \"gpt-4.1\": {\"capability\":\"base\",\"multimodal\":\"true\", \"templ_formating\":\"text\", \"prompt_tokens\": 0.002, \"completion_tokens\": 0.008},\n    \"gpt-4o-mini\": {\"capability\":\"base\", \"multimodal\":\"true\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.00015, \"completion_tokens\": 0.0006},\n    \"gpt-4.1-mini\": {\"capability\":\"base\", \"multimodal\":\"true\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.0004, \"completion_tokens\": 0.0016},\n    \"o1-mini\": {\"capability\":\"reasoning\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.003, \"completion_tokens\": 0.012},\n    \"o3-mini\": {\"capability\":\"reasoning\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.0011, \"completion_tokens\": 0.0044},\n    \"o1\": {\"capability\":\"reasoning\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.015, \"completion_tokens\": 0.06},\n    \"gemini-2.0-flash\": {\"capability\":\"base\", \"multimodal\":\"true\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.0001, \"completion_tokens\": 0.0004},\n    \"gemini-2.5-flash-preview-04-17\": {\"capability\":\"reasoning\", \"multimodal\":\"true\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.00015, \"completion_tokens\": 0.0035},\n    \"gemini-2.0-flash-thinking-exp-01-21\": {\"capability\":\"reasoning\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.0, \"completion_tokens\": 0.0},\n    \"gemini-2.5-pro-exp-03-25\": {\"capability\":\"reasoning\", \"multimodal\":\"true\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.0, \"completion_tokens\": 0.0},\n    \"gemini-2.5-pro-preview-03-25\": {\"capability\":\"reasoning\", \"multimodal\":\"true\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.00125, \"completion_tokens\": 0.01},\n    \"claude-3-5-haiku-20241022\": {\"capability\":\"base\", \"multimodal\":\"true\",\"templ_formating\":\"xml\", \"prompt_tokens\": 0.0008, \"completion_tokens\": 0.004},\n    \"claude-3-5-sonnet-20241022\": {\"capability\":\"base\", \"multimodal\":\"true\",\"templ_formating\":\"xml\", \"prompt_tokens\": 0.003, \"completion_tokens\": 0.015},\n    \"claude-3-7-sonnet-20250219\": {\"capability\":\"base\", \"multimodal\":\"true\",\"templ_formating\":\"xml\", \"prompt_tokens\": 0.003, \"completion_tokens\": 0.015},\n    \"open-mixtral-8x7b\": {\"capability\":\"base\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.0007, \"completion_tokens\": 0.0007},\n    \"mistral-small-latest\": {\"capability\":\"base\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.001, \"completion_tokens\": 0.003},\n    \"codestral-latest\": {\"capability\":\"base\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.001, \"completion_tokens\": 0.003},\n    \"open-mixtral-8x22b\": {\"capability\":\"base\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.002, \"completion_tokens\": 0.006},\n    \"mistral-large-2407\": {\"capability\":\"base\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.003, \"completion_tokens\": 0.009},\n    \"deepseek-chat\": {\"capability\":\"base\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.00014, \"completion_tokens\": 0.00028},\n    \"deepseek-reasoner\": {\"capability\":\"reasoning\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.00055, \"completion_tokens\": 0.00219},\n    \"\u002Fmnt\u002Fc\u002FUsers\u002Fpgalk\u002Fvllm\u002Fmodels\u002FDeepSeek-R1-Distill-Qwen-14B\": {\"capability\":\"reasoning\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.00, \"completion_tokens\": 0.00},\n    \"deepseek-r1-distill-llama-70b\": {\"capability\":\"reasoning\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.00, \"completion_tokens\": 0.00},\n    \"deepseek-r1:32b\": {\"capability\":\"reasoning\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.00, \"completion_tokens\": 0.00},\n    \"deepseek-ai\u002Fdeepseek-r1\": {\"capability\":\"reasoning\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.00, \"completion_tokens\": 0.00},\n    \"MiniMax-M2.7\": {\"capability\":\"base\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.001, \"completion_tokens\": 0.005},\n    \"MiniMax-M2.7-highspeed\": {\"capability\":\"base\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.001, \"completion_tokens\": 0.005}\n  }\n}\n```\nThe `LLM_CONFIG.json` configuration file needs to be located in the BambooAI working dir, eg. `\u002FUsers\u002Fpalogalko\u002FAI_Experiments\u002FBamboo_AI\u002Fweb_app\u002FLLM_CONFIG.json`, and all API keys for the specified models need to be present in the `.env` also located in the working dir.\nThe above combination of agents\u002Fmodels is the most performant according to our tests as of 22 Apr 2025 using sports and performance datasets. I would strongly encourage you to experiment with these settings to see what combination best suits your particular use case.\n\n#### Agent Roles\n\n- **Expert Selector**: Determines the best expert type for handling the query\n- **Analyst Selector**: Selects specific analysis approach\n- **Theorist**: Provides theoretical background and methodology\n- **Dataframe Inspector**: Analyzes and understands data structure. (Requires ontology file)\n- **Planner**: Creates step-by-step analysis plans\n- **Code Generator**: Writes Python code for analysis\n- **Error Corrector**: Debugs and fixes code issues\n- **Reviewer**: Evaluates solution quality, and adjusts the plans accordingly\n- **Solution Summarizer**: Creates concise result summaries\n- **Google Search Executor**: Optimizes and executes search queries\n- **Google Search Summarizer**: Synthesizes search results\n\n#### Configuration Fields\n\n- `agent_configs`: Agents configuration\n  - `agent`: The type of agent\n  - `details`:\n    - `model`: Model identifier\n    - `provider`: Service provider (openai, anthropic, gemini, etc.)\n    - `max_tokens`: Maximum tokens for completion\n    - `temperature`: Creativity parameter (0-1)\n\n- `model_properties`: Model properties\n  - `capability`: Base or Reasoning model\n  - `multimodal`: Multimodal or text only\n  - `templ_formating`: Prompt formatting. XML or Text\n  - `prompt_tokens`: Cost of input (1K)\n  - `completion_tokens`: Cost of output (1K)\n\nIf you assign a model for an agent in `agent_configs` make sure that the model is defined in `model_properties`.\n\n#### Example Alternative Configurations\n\n1. **Using Ollama:**\n```json\n{\n  \"agent\": \"Planner\",\n  \"details\": {\n    \"model\": \"llama3:70b\",\n    \"provider\": \"ollama\",\n    \"max_tokens\": 2000,\n    \"temperature\": 0\n  }\n}\n```\n\n2. **Using VLLM:**\n```json\n{\n  \"agent\": \"Code Generator\",\n  \"details\": {\n    \"model\": \"\u002Fpath\u002Fto\u002Fmodel\u002FDeepSeek-R1-Distill-14B\",\n    \"provider\": \"vllm\",\n    \"max_tokens\": 2000,\n    \"temperature\": 0\n  }\n}\n```\n\n3. **Using MiniMax:**\n```json\n{\n  \"agent\": \"Code Generator\",\n  \"details\": {\n    \"model\": \"MiniMax-M2.7\",\n    \"provider\": \"minimax\",\n    \"max_tokens\": 8000,\n    \"temperature\": 0.1\n  }\n}\n```\n\n## Auxiliary Datasets\n\nBambooAI supports working with multiple datasets simultaneously, allowing for more comprehensive and contextual analysis. \nThe auxiliary datasets feature enables you to reference and incorporate additional data sources alongside your primary dataset.\n\nWhen you ask questions that might benefit from auxiliary data, BambooAI will:\n\n1. Analyze which datasets contain relevant information\n2. Load only the necessary datasets\n3. Join or cross-reference the data as needed\n4. Generate and execute code that properly handles the multi-dataset operations\n\n### How to Use\n\n```python\nfrom bambooai import BambooAI\nimport pandas as pd\n\n# Load primary dataset\nmain_df = pd.read_csv('main_data.csv')\n\n# Specify paths to auxiliary datasets\nauxiliary_paths = [\n    'path\u002Fto\u002Fsupporting_data1.csv',\n    'path\u002Fto\u002Fsupporting_data2.parquet',\n    'path\u002Fto\u002Freference_data.csv'\n]\n\n# Initialize BambooAI with auxiliary datasets\nbamboo = BambooAI(\n    df=main_df,\n    auxiliary_datasets=auxiliary_paths,\n)\n```\n\n## Dataframe Ontology (Semantic Memory)\n\nBambooAI supports custom ontologies to ground the agents within the specific domain of interest.\n\n[Ontology Integration Wiki](https:\u002F\u002Fgithub.com\u002Fpgalko\u002FBambooAI\u002Fwiki\u002FDataframe-Ontology-Integration)\n\n[Medium Blog Post](https:\u002F\u002Fmedium.com\u002F@palogalko\u002Fknowledge-graph-driven-data-analysis-making-ai-speak-your-domains-language-b2200fd60413)\n\n### How to Use\n\n```python\nfrom bambooai import BambooAI\nimport pandas as pd\n\n# Initialize with ontology file path\nbamboo = BambooAI(\n    df=your_dataframe,\n    df_ontology=\"path\u002Fto\u002Fontology.ttl\"\n)\n```\n\n### What It Does\n\nThe ontology file defines your data structure using RDF\u002FOWL notation, including:\n- Object properties (relationships)\n- Data properties (attributes)\n- Classes (data types)\n- Individuals (specific instances)\n\nThis helps BambooAI understand complex data relationships and generate more accurate code.\n\n## Vector DB (Episodic Memory)\n\nBambooAI supports integration with vector database. The main putpose is to allow storage and retrieval of successfull analysis allowing the system to evolve and learn over time.\n\n[Medium Blog Post](https:\u002F\u002Fmedium.com\u002F@palogalko\u002Flong-term-memory-in-ai-powered-sports-science-data-analysis-335777e06ac6)\n\n### How to Use\n\n```python\nfrom bambooai import BambooAI\nimport pandas as pd\n\n# Initialize with ontology file path\nbamboo = BambooAI(\n    df=your_dataframe,\n    vector_db=True\n)\n```\nSupports both Pinecone and Qdrant vector databases. Configure your choice using environment variables:\n\n**For Pinecone:**\nRequires an account with [Pinecone (free)](https:\u002F\u002Fapp.pinecone.io\u002F), and the API key stored in the `.env`:\n```\nVECTOR_DB_TYPE=pinecone\nPINECONE_API_KEY=\u003CYOUR API KEY HERE>\nPINECONE_CLOUD=aws\nPINECONE_REGION=us-east-1\n```\n\n**For Qdrant:**\nCan use either local Qdrant instance or Qdrant Cloud. Configure in `.env`:\n```\nVECTOR_DB_TYPE=qdrant\nQDRANT_URL=http:\u002F\u002Flocalhost:6333  # For local Qdrant\nQDRANT_API_KEY=\u003CYOUR API KEY HERE>  # Optional for local, required for cloud\n```\n\n### What It Does\n\nUpon successful analysis completion, user has an ability to rank and store the solution. \n- The intent of the highly ranked solutions (>6) will be vectorised using the selected model, and stored in the configured vector database (Pinecone or Qdrant) together with the solution metadata\n- Metadata:\n  - Data Model\n  - Plan\n  - Code\n  - Rank\n- When a new task arives the system will query the vector index and retrieve the closes match that is above similarity threshold (0.8)\n- The saved solutions will serve as a reference for subsequent similar tasks guiding the relevant agents through the solving process. \n\n## Usage Examples\n\n### Interactive Mode (Jupyter Notebook or CLI)\n```python\nimport pandas as pd\nfrom bambooai import BambooAI\n\nimport plotly.io as pio\npio.renderers.default = 'jupyterlab'\n\ndf = pd.read_csv('training_activity_data.csv')\naux_data = [\n    'path\u002Fto\u002Fwellness_data.csv',\n    'path\u002Fto\u002Fnutrition_data.parquet',\n]\n\nbamboo = BambooAI(df=df, search_tool=True, planning=True)\nbamboo.pd_agent_converse()\n```\n\n### Single Query Mode (Jupyter Notebook or CLI)\n```python\nbamboo.pd_agent_converse(\"Calculate 30, 50, 75 and 90 percentiles of the heart rate column\")\n```\n\n## Web Application Setup\n\nWeb UI screenshot (Interactive Workflow Map):\n\n\u003Cimg width=\"2056\" alt=\"Workflow Map Feature\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fpgalko_BambooAI_readme_9f9d3b725d20.png\" \u002F>\n\n\n### Option 1: Using Docker (Recommended)\n\nBambooAI can be easily deployed using Docker, which provides a consistent environment regardless of your operating system or local setup.\n\nFor detailed Docker setup and usage instructions, please refer to our [Docker Setup Wiki](https:\u002F\u002Fgithub.com\u002Fpgalko\u002FBambooAI\u002Fwiki\u002FDocker-Installation-Guide-for-BambooAI).\n\nThe Docker approach offers several advantages:\n- No need to manage Python dependencies locally\n- Consistent environment across different machines\n- Easy configuration through volume mounting\n- Support for both repository-based and standalone deployments\n- **Sandboxed code execution** for enhanced security\n\nPrerequisites:\n- [Docker](https:\u002F\u002Fdocs.docker.com\u002Fget-docker\u002F) installed on your system\n- [Docker Compose](https:\u002F\u002Fdocs.docker.com\u002Fcompose\u002Finstall\u002F) installed on your system\n\n### Option 2: Using pip package\n\n1. Install BambooAI:\n   ```bash\n   pip install bambooai\n   ```\n\n2. Download web_app folder from repository\n\n3. Configure environment:\n   ```bash\n   cp .env.example \u003Cpath_to_web_app>\u002F.env\n   # Edit .env with your settings\n   ```\n\n4. Configure LLM agents, models and parameters\n   ```bash\n   cp LLM_CONFIG_sample.json \u003Cpath_to_web_app>\u002FLLM_CONFIG.json\n   ```\n\n   - Edit `web_app\u002FLLM_CONFIG.json` in the web_app directory\n   - Configure each agent with desired model:\n   ```json\n   {\n      \"agent_configs\": [\n        {\n          \"agent\": \"Code Generator\",\n          \"details\": {\n            \"model\": \"your-preferred-model\",\n            \"provider\": \"provider-name\",\n            \"max_tokens\": 4000,\n            \"temperature\": 0\n          }\n        }\n      ]\n   }\n   ```\n   - If no configuration is provided the execution will fail and an error message will be displayed.\n\n5. Run application:\n   ```bash\n   cd \u003Cpath_to_web_app>\n   python app.py\n   ```\n\n### Option 3: Using complete repository\n\n1. Clone repository:\n   ```bash\n   git clone https:\u002F\u002Fgithub.com\u002Fpgalko\u002FBambooAI.git\n   cd BambooAI\n   ```\n\n2. Install dependencies:\n   ```bash\n   pip install -r requirements.txt\n   ```\n\n3. Configure environment:\n   ```bash\n   cp .env.example web_app\u002F.env\n   # Edit .env with your settings\n   ```\n\n4. Configure LLM agents, models and parameters\n   ```bash\n   cp LLM_CONFIG_sample.json web_app\u002FLLM_CONFIG.json\n   ```\n\n   - Edit `web_app\u002FLLM_CONFIG.json` in the web_app directory\n   - Configure each agent with desired model:\n   ```json\n   {\n      \"agent_configs\": [\n        {\n          \"agent\": \"Code Generator\",\n          \"details\": {\n            \"model\": \"your-preferred-model\",\n            \"provider\": \"provider-name\",\n            \"max_tokens\": 4000,\n            \"temperature\": 0\n          }\n        }\n      ]\n   }\n   ```\n   - If no configuration is provided the execution will fail and an error message will be displayed.\n5. Run application:\n   ```bash\n   cd web_app\n   python app.py\n   ```\n\nAccess web interface at http:\u002F\u002Flocalhost:5000 (5001 if using Docker)\n\n## Model Support\n\n### API-based Models\n- OpenAI\n- Google (Gemini)\n- Anthropic\n- Groq\n- Mistral\n- DeepSeek\n- OpenRouter\n- MiniMax\n\n### Local Models\n- Ollama (all models)\n- VLLM (all models)\n- Various local models\n\n## Environment Variables\n\nRequired variables in `.env`:\n\n#### Model API Keys. Specify the API keys for the models you want to use, and have access to.\n- `\u003CVENDOR_NAME>_API_KEY`: API keys for selected providers\n- `GEMINI_API_KEY`: This needs to be set if you want to use the native Gemini web search tool (Grounding). You can alternatively use Selenium, however it is much slower and not as tightly integrated.\n\n#### Search and VectorDB API Keys(Optional)\n- `PINECONE_API_KEY`: Optional for vector database\n- `SERPER_API_KEY`: Required for Selenium search\n\n#### Remote API Endpoints(Optional)\n- `REMOTE_OLLAMA`: Optional URL for remote Ollama server\n- `REMOTE_VLLM`: Optional URL for remote VLLM server\n\n#### Application Configuration\n- `FLASK_SECRET`: This is used to sign the session cookie for WebApp\n- `WEB_SEARCH_MODE`: 'google_ai' to use Gemini native search tool, or 'selenium' to use selenium web driver\n- `SELENIUM_WEBDRIVER_PATH`: Path to your Selenium WebDriver. This is required if you are using the 'selenium' web search mode.\n- `EXECUTION_MODE`: 'local' to run the code executor locally, or 'api' to run the code executor on a remote server or container.\n- `EXECUTOR_API_BASE_URL`: `URL of the remote code executor API. This is required if you are using the 'api' execution mode eg.http:\u002F\u002F192.168.1.201:5000\n\n## Logging\n\nThe log for each Run\u002FThread is stored in `logs\u002Fbambooai_run_log.json`. The file gets overwriten when the new Thread starts.\nConsolidated logs are stored in `logs\u002Fbambooai_consolidated_log.json` with 5MB size limit and 3-file rotation. Logged information includes:\n\n- Chain ID\n- LLM call details (agent, timestamp, model, prompt, response)\n- Token usage and costs\n- Performance metrics\n- Summary statistics per model\n\n## Performance Comparison\n\n\nFor detailed evaluation report, see: [Objective Assessment Report](https:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Ffiles\u002F20273296\u002FObjective.Assessment.of.AI.Tools.for.Sports.Data.Analytics_.BambooAI.vs.Generic.LLMs.pdf)\n\n\n\n## Contributing\nContributions are welcome via pull requests. Focus on maintaining code readability and conciseness.\n\nThis project is indexed with DeepWiki by Cognition Labs, providing developers with:\n- AI-generated comprehensive documentation\n- Interactive code exploration\n- Context-aware development guidance\n- Visualization of project workflows\n\nAccess the project's full interactive documentation:\n[DeepWiki pgalko\u002FBambooAI](https:\u002F\u002Fdeepwiki.com\u002Fpgalko\u002FBambooAI)\n\n## Notes\n\n- Supports multiple model providers and local execution\n- Exercise caution with code execution\n- Monitor token usage\n- Development is ongoing\n\n## Contact\n\npalo@bambooai.io\n\n## Todo\n\n- Future improvements planned\n","# BambooAI\n\n\u003Cimg width=\"100\" alt=\"BambooAI Logo\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fpgalko_BambooAI_readme_c700377a3027.png\" \u002F>\n\nhttps:\u002F\u002Fbambooai.org\n\nBambooAI 是一个开源库，利用大型语言模型（LLMs）实现基于自然语言的数据分析。它既可以处理本地数据集，也能从外部来源和 API 获取数据。\n\n## 目录\n- [概述](#overview)\n- [功能](#features)\n- [演示视频](#demo-videos)\n- [安装](#installation)\n- [快速入门](#quick-start)\n- [工作原理](#how-it-works)\n- [配置](#configuration)\n  - [参数](#parameters)\n  - [代理与模型配置](#agent-and-model-configuration)\n- [辅助数据集](#auxiliary-datasets)\n- [DataFrame 本体论（语义记忆）](#dataframe-ontology-semantic-memory)\n- [向量数据库（情景记忆）](#vector-db-episodic-memory)\n- [使用示例](#usage-examples)\n- [Web 应用程序设置](#web-application-setup)\n  - [使用 Docker（推荐）](#option-1-using-docker-recommended)\n  - [使用 pip 包](#option-2-using-pip-package)\n  - [使用完整仓库](#option-3-using-complete-repository) \n- [模型支持](#model-support)\n- [环境变量](#environment-variables)\n- [日志记录](#logging)\n- [性能对比](#performance-comparison)\n- [贡献](#contributing)\n\n## 概述\n\nBambooAI 是一款实验性工具，通过允许用户以自然语言对话的方式与数据交互，使数据分析更加易于访问。它的设计目标是：\n\n- 处理关于数据集的自然语言查询\n- 生成并执行用于分析和可视化的 Python 代码\n- 帮助用户在无需大量编程知识的情况下获得洞察\n- 增强各层次数据分析师的能力\n- 简化数据分析流程\n\n## 功能\n\n- 数据分析的自然语言界面\n- 支持 Web UI 和 Jupyter Notebook\n- 支持本地和外部数据集\n- 集成互联网搜索和外部 API\n- 流式过程中提供用户反馈\n- 可选的复杂任务规划代理\n- 集成自定义本体论\n- 为数据分析和可视化生成代码\n- 自我修复\u002F错误纠正\n- 自定义代码编辑和代码执行\n- 通过向量数据库集成知识库\n- 工作流保存和后续跟进\n- 上下文相关及多模态查询\n\n## 演示视频\n\n### 机器学习示例（Jupyter Notebook）\n演示如何创建一个预测泰坦尼克号乘客生存情况的机器学习模型：\n\nhttps:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Fassets\u002F59ef810c-80d8-4ef1-8edf-82ba64178b85\n\n### 体育数据分析（Web UI）\n各种体育数据分析查询示例：\n\nhttps:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Fassets\u002F7b9c9cd6-56e3-46ee-a6c6-c32324a0c5ef\n\n## 安装\n\n```bash\npip install bambooai\n```\n\n或者克隆仓库并安装依赖项：\n\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Fpgalko\u002FBambooAI.git\npip install -r requirements.txt\n```\n\n## 快速入门\n\n在 Google Colab 中尝试一个基本示例：[![Open In Colab](https:\u002F\u002Fcolab.research.google.com\u002Fassets\u002Fcolab-badge.svg)](https:\u002F\u002Fcolab.research.google.com\u002Fdrive\u002F1grKtqKD4u8cVGMoVv__umci4F7IU14vU?usp=sharing)\n\n### 基本示例\n\n1. 安装 BambooAI：\n   ```bash\n   pip install bambooai\n   ```\n\n3. 配置环境：\n   ```bash\n   cp .env.example .env\n   # 编辑 .env 文件以适应您的设置\n   ```\n\n4. 配置代理\u002F模型：\n   ```bash\n   cp LLM_CONFIG_sample.json LLM_CONFIG.json\n   # 根据您希望使用的代理、模型和参数组合编辑 LLM_CONFIG.json 文件\n   ```\n\n5. 运行：\n    ```python\n    import pandas as pd\n    from bambooai import BambooAI\n\n    import plotly.io as pio\n    pio.renderers.default = 'jupyterlab'\n\n    df = pd.read_csv('titanic.csv')\n    bamboo = BambooAI(df=df, planning=True, vector_db=False, search_tool=True)\n    bamboo.pd_agent_converse()\n    ```\n\n## 工作原理\n\nBambooAI 的运行过程分为六个关键步骤：\n\n1. **初始化**\n   - 以用户问题或提示开始\n   - 在退出之前持续进行对话循环\n\n2. **任务路由**\n   - 使用 LLM 对问题进行分类\n   - 路由到适当的处理器（文本响应或代码生成）\n\n3. **用户反馈**\n   - 如果指令模糊或不明确，模型会暂停并请求用户反馈\n   - 如果在求解过程中遇到任何歧义，模型会暂停并提供几个选项供选择\n\n4. **动态提示构建**\n   - 评估数据需求\n   - 请求反馈或使用工具以获取更多上下文\n   - 制定分析计划\n   - 执行语义搜索以查找类似问题\n   - 使用选定的 LLM 生成代码\n\n5. **调试与执行**\n   - 执行生成的代码\n   - 使用 LLM 进行错误纠正\n   - 重试直至成功或达到限制\n\n6. **结果与知识库**\n   - 对答案进行质量排序\n   - 将高质量解决方案存储在向量数据库中\n   - 展示格式化后的结果或可视化内容\n\n### 流程图\n![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fpgalko_BambooAI_readme_019eb5f0e8cf.png)\n\n## 配置\n\n### 参数\n\nBambooAI 接受以下初始化参数：\n\n```python\nbamboo = BambooAI(\n    df=None,                    # 用于分析的 DataFrame\n    auxiliary_datasets=None,    # 辅助数据集路径列表\n    max_conversations=4,        # 保留在内存中的对话对数量\n    search_tool=False,          # 启用互联网搜索功能\n    planning=False,             # 为复杂任务启用规划代理\n    webui=False,                # 作为 Web 应用程序运行\n    vector_db=False,            # 启用向量数据库以存储知识\n    df_ontology=False,          # 使用自定义 DataFrame 本体\n    exploratory=True,           # 启用专家选择来处理查询\n    custom_prompt_file=None     # 允许使用自定义或修改后的提示模板\n)\n```\n\n#### 参数详细说明：\n\n- `df` (pd.DataFrame, 可选)\n  - 用于分析的输入 DataFrame\n  - 如果未提供，BambooAI 将尝试从互联网或辅助数据集中获取数据\n\n- `auxiliary_datasets` (list, 默认 None)\n  - 辅助数据集的路径列表\n  - 这些数据集将在需要时被纳入解决方案，并在代码执行时加载\n  - 它们用于补充主 DataFrame\n\n- `max_conversations` (int, 默认 4)\n  - 在上下文中保持的用户与助手对话对数量\n  - 影响上下文窗口大小和 token 使用量\n\n- `search_tool` (bool, 默认 False)\n  - 启用互联网搜索功能\n  - 启用时需要提供相应的 API 密钥\n\n- `planning` (bool, 默认 False)\n  - 为复杂任务启用规划代理\n  - 将任务分解为可管理的步骤\n  - 提高复杂查询的解决方案质量\n\n- `webui` (bool, 默认 False)\n  - 将 BambooAI 作为 Web 应用程序运行\n  - 使用 Flask API 提供 Web 界面\n\n- `vector_db` (bool, 默认 False)\n  - 启用向量数据库以存储知识和进行语义搜索\n  - 存储高质量的解决方案供未来参考\n  - 需要 Pinecone 的 API 密钥\n  - 支持两种嵌入模型：`text-embedding-3-small`（OpenAI）和 `all-MiniLM-L6-v2`（HF）\n\n- `df_ontology` (str, 默认 None)\n  - 使用自定义 DataFrame 本体以提升理解能力\n  - 需要 OWL 本体文件，格式为 `.ttl`。该参数接受 TTL 文件的路径。\n  - 能显著提高解决方案的质量\n\n- `exploratory` (bool, 默认 True)\n  - 启用专家选择机制来处理查询\n  - 在研究专家和数据分析员角色之间进行选择\n\n- `custom_prompt_file` (str, 默认 None)\n  - 允许用户提供自定义的提示模板\n  - 需要提供包含模板的 YAML 文件路径\n\n### 代理与模型配置\n\nBambooAI 采用多代理系统，不同的专业代理负责数据分析过程中的特定环节。每个代理可以根据其具体需求配置不同的大语言模型及参数。\n\n#### 配置结构\n\nLLM 配置存储在 `LLM_CONFIG.json` 文件中。以下是完整的配置结构：\n\n```json\n{\n  \"agent_configs\": [\n    {\"agent\": \"专家选择器\", \"details\": {\"model\": \"gpt-4.1\", \"provider\":\"openai\",\"max_tokens\": 2000, \"temperature\": 0}},\n    {\"agent\": \"分析师选择器\", \"details\": {\"model\": \"claude-3-7-sonnet-20250219\", \"provider\":\"anthropic\",\"max_tokens\": 2000, \"temperature\": 0}},\n    {\"agent\": \"理论家\", \"details\": {\"model\": \"gemini-2.5-pro-preview-03-25\", \"provider\":\"gemini\",\"max_tokens\": 4000, \"temperature\": 0}},\n    {\"agent\": \"数据框检查器\", \"details\": {\"model\": \"gemini-2.0-flash\", \"provider\":\"gemini\",\"max_tokens\": 8000, \"temperature\": 0}},\n    {\"agent\": \"规划者\", \"details\": {\"model\": \"gemini-2.5-pro-preview-03-25\", \"provider\":\"gemini\",\"max_tokens\": 8000, \"temperature\": 0}},\n    {\"agent\": \"代码生成器\", \"details\": {\"model\": \"claude-3-5-sonnet-20241022\", \"provider\":\"anthropic\",\"max_tokens\": 8000, \"temperature\": 0}},\n    {\"agent\": \"错误修正器\", \"details\": {\"model\": \"claude-3-5-sonnet-20241022\", \"provider\":\"anthropic\",\"max_tokens\": 8000, \"temperature\": 0}},\n    {\"agent\": \"评审员\", \"details\": {\"model\": \"gemini-2.5-pro-preview-03-25\", \"provider\":\"gemini\",\"max_tokens\": 8000, \"temperature\": 0}},\n    {\"agent\": \"解决方案总结器\", \"details\": {\"model\": \"gemini-2.5-flash-preview-04-17\", \"provider\":\"gemini\",\"max_tokens\": 4000, \"temperature\": 0}},\n    {\"agent\": \"谷歌搜索执行器\", \"details\": {\"model\": \"gemini-2.5-flash-preview-04-17\", \"provider\":\"gemini\",\"max_tokens\": 4000, \"temperature\": 0}},\n    {\"agent\": \"谷歌搜索总结器\", \"details\": {\"model\": \"gemini-2.5-flash-preview-04-17\", \"provider\":\"gemini\",\"max_tokens\": 4000, \"temperature\": 0}}\n  ],\n  \"model_properties\": {\n    \"gpt-4o\": {\"capability\":\"base\",\"multimodal\":\"true\", \"templ_formating\":\"text\", \"prompt_tokens\": 0.0025, \"completion_tokens\": 0.010},\n    \"gpt-4.1\": {\"capability\":\"base\",\"multimodal\":\"true\", \"templ_formating\":\"text\", \"prompt_tokens\": 0.002, \"completion_tokens\": 0.008},\n    \"gpt-4o-mini\": {\"capability\":\"base\", \"multimodal\":\"true\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.00015, \"completion_tokens\": 0.0006},\n    \"gpt-4.1-mini\": {\"capability\":\"base\", \"multimodal\":\"true\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.0004, \"completion_tokens\": 0.0016},\n    \"o1-mini\": {\"capability\":\"reasoning\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.003, \"completion_tokens\": 0.012},\n    \"o3-mini\": {\"capability\":\"reasoning\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.0011, \"completion_tokens\": 0.0044},\n    \"o1\": {\"capability\":\"reasoning\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.015, \"completion_tokens\": 0.06},\n    \"gemini-2.0-flash\": {\"capability\":\"base\", \"multimodal\":\"true\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.0001, \"completion_tokens\": 0.0004},\n    \"gemini-2.5-flash-preview-04-17\": {\"capability\":\"reasoning\", \"multimodal\":\"true\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.00015, \"completion_tokens\": 0.0035},\n    \"gemini-2.0-flash-thinking-exp-01-21\": {\"capability\":\"reasoning\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.0, \"completion_tokens\": 0.0},\n    \"gemini-2.5-pro-exp-03-25\": {\"capability\":\"reasoning\", \"multimodal\":\"true\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.0, \"completion_tokens\": 0.0},\n    \"gemini-2.5-pro-preview-03-25\": {\"capability\":\"reasoning\", \"multimodal\":\"true\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.00125, \"completion_tokens\": 0.01},\n    \"claude-3-5-haiku-20241022\": {\"capability\":\"base\", \"multimodal\":\"true\",\"templ_formating\":\"xml\", \"prompt_tokens\": 0.0008, \"completion_tokens\": 0.004},\n    \"claude-3-5-sonnet-20241022\": {\"capability\":\"base\", \"multimodal\":\"true\",\"templ_formating\":\"xml\", \"prompt_tokens\": 0.003, \"completion_tokens\": 0.015},\n    \"claude-3-7-sonnet-20250219\": {\"capability\":\"base\", \"multimodal\":\"true\",\"templ_formating\":\"xml\", \"prompt_tokens\": 0.003, \"completion_tokens\": 0.015},\n    \"open-mixtral-8x7b\": {\"capability\":\"base\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.0007, \"completion_tokens\": 0.0007},\n    \"mistral-small-latest\": {\"capability\":\"base\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.001, \"completion_tokens\": 0.003},\n    \"codestral-latest\": {\"capability\":\"base\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.001, \"completion_tokens\": 0.003},\n    \"open-mixtral-8x22b\": {\"capability\":\"base\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.002, \"completion_tokens\": 0.006},\n    \"mistral-large-2407\": {\"capability\":\"base\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.003, \"completion_tokens\": 0.009},\n    \"deepseek-chat\": {\"capability\":\"base\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.00014, \"completion_tokens\": 0.00028},\n    \"deepseek-reasoner\": {\"capability\":\"reasoning\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.00055, \"completion_tokens\": 0.00219},\n    \"\u002Fmnt\u002Fc\u002FUsers\u002Fpgalk\u002Fvllm\u002Fmodels\u002FDeepSeek-R1-Distill-Qwen-14B\": {\"capability\":\"reasoning\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.00, \"completion_tokens\": 0.00},\n    \"deepseek-r1-distill-llama-70b\": {\"capability\":\"reasoning\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.00, \"completion_tokens\": 0.00},\n    \"deepseek-r1:32b\": {\"capability\":\"reasoning\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.00, \"completion_tokens\": 0.00},\n    \"deepseek-ai\u002Fdeepseek-r1\": {\"capability\":\"reasoning\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.00, \"completion_tokens\": 0.00},\n    \"MiniMax-M2.7\": {\"capability\":\"base\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.001, \"completion_tokens\": 0.005},\n    \"MiniMax-M2.7-highspeed\": {\"capability\":\"base\", \"multimodal\":\"false\",\"templ_formating\":\"text\", \"prompt_tokens\": 0.001, \"completion_tokens\": 0.005}\n  }\n}\n```\n`LLM_CONFIG.json` 配置文件需要位于 BambooAI 工作目录中，例如 `\u002FUsers\u002Fpalogalko\u002FAI_Experiments\u002FBamboo_AI\u002Fweb_app\u002FLLM_CONFIG.json`，且所有指定模型的 API 密钥都需要存在于同样位于工作目录中的 `.env` 文件中。\n根据我们在 2025 年 4 月 22 日使用体育和绩效数据集进行的测试结果，上述代理\u002F模型组合是性能最佳的。我强烈建议您尝试不同的设置，以找到最适合您特定用例的组合。\n\n#### 代理角色\n\n- **专家选择器**：确定处理查询的最佳专家类型\n- **分析师选择器**：选择具体的分析方法\n- **理论家**：提供理论背景和方法论\n- **数据框检查器**：分析并理解数据结构。（需要本体文件）\n- **规划者**：创建逐步的分析计划\n- **代码生成器**：编写用于分析的Python代码\n- **错误纠正器**：调试并修复代码问题\n- **评审员**：评估解决方案的质量，并相应地调整计划\n- **解决方案总结器**：创建简洁的结果摘要\n- **谷歌搜索执行器**：优化并执行搜索查询\n- **谷歌搜索总结器**：综合搜索结果\n\n#### 配置字段\n\n- `agent_configs`：智能体配置\n  - `agent`：智能体类型\n  - `details`：\n    - `model`：模型标识符\n    - `provider`：服务提供商（openai、anthropic、gemini等）\n    - `max_tokens`：完成时的最大令牌数\n    - `temperature`：创造力参数（0-1）\n\n- `model_properties`：模型属性\n  - `capability`：基础模型或推理模型\n  - `multimodal`：多模态或仅文本\n  - `templ_formating`：提示格式化。XML或文本\n  - `prompt_tokens`：输入成本（1K）\n  - `completion_tokens`：输出成本（1K）\n\n如果在`agent_configs`中为某个智能体指定了模型，请确保该模型已在`model_properties`中定义。\n\n#### 示例替代配置\n\n1. **使用Ollama：**\n```json\n{\n  \"agent\": \"Planner\",\n  \"details\": {\n    \"model\": \"llama3:70b\",\n    \"provider\": \"ollama\",\n    \"max_tokens\": 2000,\n    \"temperature\": 0\n  }\n}\n```\n\n2. **使用VLLM：**\n```json\n{\n  \"agent\": \"Code Generator\",\n  \"details\": {\n    \"model\": \"\u002Fpath\u002Fto\u002Fmodel\u002FDeepSeek-R1-Distill-14B\",\n    \"provider\": \"vllm\",\n    \"max_tokens\": 2000,\n    \"temperature\": 0\n  }\n}\n```\n\n3. **使用MiniMax：**\n```json\n{\n  \"agent\": \"Code Generator\",\n  \"details\": {\n    \"model\": \"MiniMax-M2.7\",\n    \"provider\": \"minimax\",\n    \"max_tokens\": 8000,\n    \"temperature\": 0.1\n  }\n}\n```\n\n\n\n## 辅助数据集\n\nBambooAI支持同时处理多个数据集，从而实现更全面和更具上下文的分析。辅助数据集功能使您能够在主要数据集之外引用和整合其他数据源。\n\n当您提出可能受益于辅助数据的问题时，BambooAI会：\n\n1. 分析哪些数据集包含相关信息\n2. 只加载必要的数据集\n3. 根据需要进行数据连接或交叉引用\n4. 生成并执行能够正确处理多数据集操作的代码\n\n### 使用方法\n\n```python\nfrom bambooai import BambooAI\nimport pandas as pd\n\n# 加载主数据集\nmain_df = pd.read_csv('main_data.csv')\n\n# 指定辅助数据集路径\nauxiliary_paths = [\n    'path\u002Fto\u002Fsupporting_data1.csv',\n    'path\u002Fto\u002Fsupporting_data2.parquet',\n    'path\u002Fto\u002Freference_data.csv'\n]\n\n# 初始化带有辅助数据集的BambooAI\nbamboo = BambooAI(\n    df=main_df,\n    auxiliary_datasets=auxiliary_paths,\n)\n```\n\n## 数据框本体（语义记忆）\n\nBambooAI支持自定义本体，以将智能体置于感兴趣的特定领域中。\n\n[本体集成维基](https:\u002F\u002Fgithub.com\u002Fpgalko\u002FBambooAI\u002Fwiki\u002FDataframe-Ontology-Integration)\n\n[Medium博客文章](https:\u002F\u002Fmedium.com\u002F@palogalko\u002Fknowledge-graph-driven-data-analysis-making-ai-speak-your-domains-language-b2200fd60413)\n\n### 使用方法\n\n```python\nfrom bambooai import BambooAI\nimport pandas as pd\n\n# 使用本体文件路径初始化\nbamboo = BambooAI(\n    df=your_dataframe,\n    df_ontology=\"path\u002Fto\u002Fontology.ttl\"\n)\n```\n\n### 它的作用\n\n本体文件使用RDF\u002FOWL表示法定义您的数据结构，包括：\n- 对象属性（关系）\n- 数据属性（属性）\n- 类（数据类型）\n- 个体（具体实例）\n\n这有助于BambooAI理解复杂的数据关系，并生成更准确的代码。\n\n## 向量数据库（情景记忆）\n\nBambooAI支持与向量数据库的集成。其主要目的是允许存储和检索成功的分析结果，从而使系统能够随着时间的推移不断进化和学习。\n\n[Medium博客文章](https:\u002F\u002Fmedium.com\u002F@palogalko\u002Flong-term-memory-in-ai-powered-sports-science-data-analysis-335777e06ac6)\n\n### 使用方法\n\n```python\nfrom bambooai import BambooAI\nimport pandas as pd\n\n# 使用本体文件路径初始化\nbamboo = BambooAI(\n    df=your_dataframe,\n    vector_db=True\n)\n```\n支持Pinecone和Qdrant两种向量数据库。通过环境变量配置您选择的数据库：\n\n**对于Pinecone：**\n需要一个[Pinecone账户（免费）](https:\u002F\u002Fapp.pinecone.io\u002F)，并将API密钥存储在`.env`文件中：\n```\nVECTOR_DB_TYPE=pinecone\nPINECONE_API_KEY=\u003CYOUR API KEY HERE>\nPINECONE_CLOUD=aws\nPINECONE_REGION=us-east-1\n```\n\n**对于Qdrant：**\n可以使用本地Qdrant实例或Qdrant云服务。在`.env`文件中配置：\n```\nVECTOR_DB_TYPE=qdrant\nQDRANT_URL=http:\u002F\u002Flocalhost:6333  # 用于本地Qdrant\nQDRANT_API_KEY=\u003CYOUR API KEY HERE>  # 本地可选，云端必填\n```\n\n### 它的作用\n\n成功完成分析后，用户可以对解决方案进行排名并存储。 \n- 排名较高的解决方案（>6）将使用所选模型进行向量化，并连同解决方案元数据一起存储到配置的向量数据库中（Pinecone或Qdrant）。\n- 元数据包括：\n  - 数据模型\n  - 计划\n  - 编写的代码\n  - 排名\n- 当有新任务到来时，系统会查询向量索引，检索出相似度高于阈值（0.8）的最接近匹配项。\n- 存储的解决方案将作为后续类似任务的参考，指导相关智能体完成求解过程。\n\n## 使用示例\n\n### 交互模式（Jupyter Notebook或CLI）\n```python\nimport pandas as pd\nfrom bambooai import BambooAI\n\nimport plotly.io as pio\npio.renderers.default = 'jupyterlab'\n\ndf = pd.read_csv('training_activity_data.csv')\naux_data = [\n    'path\u002Fto\u002Fwellness_data.csv',\n    'path\u002Fto\u002Fnutrition_data.parquet',\n]\n\nbamboo = BambooAI(df=df, search_tool=True, planning=True)\nbamboo.pd_agent_converse()\n```\n\n### 单次查询模式（Jupyter Notebook或CLI）\n```python\nbamboo.pd_agent_converse(\"计算心率列的30%、50%、75%和90%分位数\")\n```\n\n## Web应用程序设置\n\nWeb UI截图（交互式工作流图）：\n\n\u003Cimg width=\"2056\" alt=\"Workflow Map Feature\" src=\"https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fpgalko_BambooAI_readme_9f9d3b725d20.png\" \u002F>\n\n### 选项 1：使用 Docker（推荐）\n\nBambooAI 可以通过 Docker 轻松部署，Docker 提供了一致的运行环境，不受您的操作系统或本地配置的影响。\n\n有关详细的 Docker 设置和使用说明，请参阅我们的 [Docker 设置 Wiki](https:\u002F\u002Fgithub.com\u002Fpgalko\u002FBambooAI\u002Fwiki\u002FDocker-Installation-Guide-for-BambooAI)。\n\n使用 Docker 的方法具有以下优势：\n- 无需在本地管理 Python 依赖项\n- 在不同机器上保持一致的运行环境\n- 通过挂载卷轻松进行配置\n- 支持基于仓库和独立部署\n- **沙箱式代码执行**，提升安全性\n\n先决条件：\n- 已在您的系统上安装 [Docker](https:\u002F\u002Fdocs.docker.com\u002Fget-docker\u002F)\n- 已在您的系统上安装 [Docker Compose](https:\u002F\u002Fdocs.docker.com\u002Fcompose\u002Finstall\u002F)\n\n### 选项 2：使用 pip 包\n\n1. 安装 BambooAI：\n   ```bash\n   pip install bambooai\n   ```\n\n2. 从仓库下载 web_app 文件夹\n\n3. 配置环境：\n   ```bash\n   cp .env.example \u003Cweb_app路径>\u002F.env\n   # 编辑 .env 文件以设置您的参数\n   ```\n\n4. 配置 LLM 代理、模型和参数：\n   ```bash\n   cp LLM_CONFIG_sample.json \u003Cweb_app路径>\u002FLLM_CONFIG.json\n   ```\n\n   - 编辑 web_app 目录下的 `web_app\u002FLLM_CONFIG.json`\n   - 为每个代理配置所需的模型：\n   ```json\n   {\n      \"agent_configs\": [\n        {\n          \"agent\": \"代码生成器\",\n          \"details\": {\n            \"model\": \"您首选的模型\",\n            \"provider\": \"提供商名称\",\n            \"max_tokens\": 4000,\n            \"temperature\": 0\n          }\n        }\n      ]\n   }\n   ```\n   - 如果未提供配置，程序将无法运行，并会显示错误信息。\n\n5. 运行应用程序：\n   ```bash\n   cd \u003Cweb_app路径>\n   python app.py\n   ```\n\n### 选项 3：使用完整仓库\n\n1. 克隆仓库：\n   ```bash\n   git clone https:\u002F\u002Fgithub.com\u002Fpgalko\u002FBambooAI.git\n   cd BambooAI\n   ```\n\n2. 安装依赖项：\n   ```bash\n   pip install -r requirements.txt\n   ```\n\n3. 配置环境：\n   ```bash\n   cp .env.example web_app\u002F.env\n   # 编辑 .env 文件以设置您的参数\n   ```\n\n4. 配置 LLM 代理、模型和参数：\n   ```bash\n   cp LLM_CONFIG_sample.json web_app\u002FLLM_CONFIG.json\n   ```\n\n   - 编辑 web_app 目录下的 `web_app\u002FLLM_CONFIG.json`\n   - 为每个代理配置所需的模型：\n   ```json\n   {\n      \"agent_configs\": [\n        {\n          \"agent\": \"代码生成器\",\n          \"details\": {\n            \"model\": \"您首选的模型\",\n            \"provider\": \"提供商名称\",\n            \"max_tokens\": 4000,\n            \"temperature\": 0\n          }\n        }\n      ]\n   }\n   ```\n   - 如果未提供配置，程序将无法运行，并会显示错误信息。\n\n5. 运行应用程序：\n   ```bash\n   cd web_app\n   python app.py\n   ```\n\n访问 Web 界面：http:\u002F\u002Flocalhost:5000（如果使用 Docker，则为 5001）。\n\n## 模型支持\n\n### 基于 API 的模型\n- OpenAI\n- Google（Gemini）\n- Anthropic\n- Groq\n- Mistral\n- DeepSeek\n- OpenRouter\n- MiniMax\n\n### 本地模型\n- Ollama（所有模型）\n- VLLM（所有模型）\n- 各种本地模型\n\n## 环境变量\n\n`.env` 文件中需要的变量：\n\n#### 模型 API 密钥。请指定您想要使用且拥有访问权限的模型的 API 密钥。\n- `\u003C厂商名称>_API_KEY`：选定提供商的 API 密钥\n- `GEMINI_API_KEY`：如果您想使用原生 Gemini 网络搜索工具（Grounding），则需要设置此密钥。您也可以选择使用 Selenium，但其速度较慢且集成度较低。\n\n#### 搜索和向量数据库 API 密钥（可选）\n- `PINECONE_API_KEY`：用于向量数据库的可选密钥\n- `SERPER_API_KEY`：用于 Selenium 搜索的必需密钥。\n\n#### 远程 API 端点（可选）\n- `REMOTE_OLLAMA`：远程 Ollama 服务器的可选 URL\n- `REMOTE_VLLM`：远程 VLLM 服务器的可选 URL。\n\n#### 应用程序配置\n- `FLASK_SECRET`：用于对 WebApp 的会话 Cookie 进行签名\n- `WEB_SEARCH_MODE`：'google_ai' 表示使用 Gemini 原生搜索工具，'selenium' 表示使用 Selenium Web 驱动程序\n- `SELENIUM_WEBDRIVER_PATH`：Selenium Web 驱动程序的路径。如果您使用 'selenium' 搜索模式，则必须设置此路径。\n- `EXECUTION_MODE`：'local' 表示在本地运行代码执行器，'api' 表示在远程服务器或容器上运行代码执行器。\n- `EXECUTOR_API_BASE_URL`：远程代码执行器 API 的 URL。如果您使用 'api' 执行模式，则需要设置此参数，例如 http:\u002F\u002F192.168.1.201:5000。\n\n## 日志记录\n\n每次运行\u002F线程的日志都存储在 `logs\u002Fbambooai_run_log.json` 中。当新线程开始时，该文件会被覆盖。\n\n汇总日志存储在 `logs\u002Fbambooai_consolidated_log.json` 中，文件大小限制为 5MB，最多保留 3 个文件。日志信息包括：\n\n- 链 ID\n- LLM 调用详情（代理、时间戳、模型、提示、响应）\n- 令牌使用情况和费用\n- 性能指标\n- 每个模型的统计摘要\n\n## 性能对比\n\n\n有关详细评估报告，请参阅：[客观评估报告](https:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Ffiles\u002F20273296\u002FObjective.Assessment.of.AI.Tools.for.Sports.Data.Analytics_.BambooAI.vs.Generic.LLMs.pdf)\n\n\n\n## 贡献\n欢迎通过拉取请求贡献代码。请注重代码的可读性和简洁性。\n\n该项目已被 Cognition Labs 的 DeepWiki 索引，为开发者提供：\n- AI 生成的全面文档\n- 交互式代码探索\n- 上下文感知开发指导\n- 项目工作流可视化\n\n访问项目的完整交互式文档：\n[DeepWiki pgalko\u002FBambooAI](https:\u002F\u002Fdeepwiki.com\u002Fpgalko\u002FBambooAI)\n\n## 注意事项\n\n- 支持多种模型提供商和本地执行\n- 代码执行需谨慎\n- 请监控令牌使用情况\n- 开发仍在进行中\n\n## 联系方式\n\npalo@bambooai.io\n\n## 待办事项\n\n- 计划进行未来改进","# BambooAI 快速上手指南\n\nBambooAI 是一个开源库，旨在通过大语言模型（LLM）实现基于自然语言的数据分析。它支持本地数据集，也能从外部源和 API 获取数据，帮助用户无需深厚编码功底即可洞察数据价值。\n\n## 环境准备\n\n在开始之前，请确保您的开发环境满足以下要求：\n\n*   **操作系统**：Linux, macOS 或 Windows\n*   **Python 版本**：推荐 Python 3.9 及以上版本\n*   **前置依赖**：\n    *   `pandas`：用于数据处理\n    *   `plotly`：用于数据可视化（可选，但推荐）\n*   **API 密钥**：\n    *   需配置至少一个大模型提供商的 API Key（如 OpenAI, Anthropic, Google Gemini 等）。\n    *   若启用联网搜索或向量数据库功能，需额外配置相应服务的 API Key。\n\n## 安装步骤\n\n您可以选择通过 pip 直接安装，或克隆源码进行安装。\n\n### 方式一：使用 pip 安装（推荐）\n\n```bash\npip install bambooai\n```\n\n> **提示**：国内用户若下载缓慢，可使用清华或阿里镜像源加速：\n> ```bash\n> pip install bambooai -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n> ```\n\n### 方式二：源码安装\n\n```bash\ngit clone https:\u002F\u002Fgithub.com\u002Fpgalko\u002FBambooAI.git\ncd BambooAI\npip install -r requirements.txt\n```\n\n## 基本使用\n\n以下是使用 BambooAI 进行数据分析的最简流程。\n\n### 1. 配置文件与环境变量\n\n在使用前，需要复制示例配置文件并填入您的 API 密钥。\n\n**配置环境变量 (.env)：**\n```bash\ncp .env.example .env\n# 请使用文本编辑器打开 .env 文件，填入您的 LLM API KEY 等设置\n```\n\n**配置模型与代理 (LLM_CONFIG.json)：**\n```bash\ncp LLM_CONFIG_sample.json LLM_CONFIG.json\n# 编辑 LLM_CONFIG.json，根据您的需求选择具体的模型（如 gpt-4, claude-3-5-sonnet 等）和参数\n```\n\n### 2. 代码示例\n\n创建一个 Python 脚本（例如 `demo.py`），放入您的数据文件（如 `titanic.csv`），然后运行以下代码：\n\n```python\nimport pandas as pd\nfrom bambooai import BambooAI\n\n# 设置 Plotly 渲染器（如果在 Jupyter Lab 中使用）\nimport plotly.io as pio\npio.renderers.default = 'jupyterlab'\n\n# 加载数据\ndf = pd.read_csv('titanic.csv')\n\n# 初始化 BambooAI\n# planning=True: 启用规划代理处理复杂任务\n# search_tool=True: 启用联网搜索能力\n# vector_db=False: 暂不启用向量数据库\nbamboo = BambooAI(df=df, planning=True, vector_db=False, search_tool=True)\n\n# 开始自然语言对话交互\nbamboo.pd_agent_converse()\n```\n\n运行后，终端将进入交互模式，您可以直接输入自然语言问题（例如：“预测泰坦尼克号乘客的生存率”或“绘制男女乘客生存比例图”），BambooAI 将自动生成代码、执行分析并返回结果或图表。\n\n### 3. 在线体验\n\n如果您希望先在不配置本地环境的情况下尝试，可以使用 Google Colab 快速体验：\n\n[![Open In Colab](https:\u002F\u002Fcolab.research.google.com\u002Fassets\u002Fcolab-badge.svg)](https:\u002F\u002Fcolab.research.google.com\u002Fdrive\u002F1grKtqKD4u8cVGMoVv__umci4F7IU14vU?usp=sharing)","某电商公司的数据分析师需要在周五下班前，快速从包含百万行订单记录的本地 CSV 文件中找出“上季度复购率下降的原因”，并生成可视化报告向管理层汇报。\n\n### 没有 BambooAI 时\n- **代码编写耗时**：分析师需手动编写大量 Pandas 代码进行数据清洗、分组聚合及关联分析，耗时数小时且容易出错。\n- **技能门槛限制**：若遇到复杂的统计建模或异常值处理，非资深分析师往往需要暂停工作去查阅文档或求助同事。\n- **迭代效率低下**：当管理层提出“换个维度看”或“排除特定地区”等新问题时，必须重新修改代码并重新运行整个脚本。\n- **可视化繁琐**：将分析结果转化为直观的图表需要额外编写 Matplotlib 或 Plotly 代码，调整样式进一步拉长了交付时间。\n\n### 使用 BambooAI 后\n- **自然语言交互**：分析师直接用中文提问“分析上季度复购率下降原因”，BambooAI 自动理解意图并生成执行代码。\n- **智能代码生成与自愈**：BambooAI 自动生成包含数据清洗、复杂聚合及建模的 Python 代码，若运行报错还能自我修正，无需人工干预。\n- **敏捷多轮探索**：面对“排除华东地区再算一次”的新指令，只需在对话中补充条件，BambooAI 即刻基于上下文更新分析结果。\n- **一键可视化输出**：BambooAI 直接根据数据特征推荐并绘制专业的交互式图表（如 Plotly），瞬间完成从数据到洞察的闭环。\n\nBambooAI 将原本需要数小时的数据挖掘与编码工作压缩至分钟级，让业务人员能专注于决策而非代码调试。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fpgalko_BambooAI_7617ec6a.png","pgalko","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fpgalko_d80ba724.jpg","If knowledge is power, knowing what we don’t know is wisdom",null,"Melbourne, Australia","https:\u002F\u002Fgithub.com\u002Fpgalko",[79,83,87,91,95],{"name":80,"color":81,"percentage":82},"Python","#3572A5",55.5,{"name":84,"color":85,"percentage":86},"JavaScript","#f1e05a",26.7,{"name":88,"color":89,"percentage":90},"CSS","#663399",13.6,{"name":92,"color":93,"percentage":94},"HTML","#e34c26",4.1,{"name":96,"color":97,"percentage":98},"Dockerfile","#384d54",0.1,772,82,"2026-04-01T17:12:42","MIT","未说明",{"notes":105,"python":103,"dependencies":106},"该工具主要依赖外部大模型 API（如 OpenAI, Anthropic, Google Gemini）而非本地运行大型模型，因此对本地 GPU 和显存无特殊强制要求。若启用向量数据库功能需配置 Pinecone API Key；若启用网络搜索需相应 API 权限。支持通过 Docker 部署或 pip 安装，也可在 Jupyter\u002FColab 环境中运行。",[107,108,109,110,111,112],"pandas","flask","pinecone-client","openai","anthropic","google-generativeai",[13,35,15,114,115,16,14],"插件","其他",[117,118,107,119,120,121,122,123,124,125,126,127,128,129,111,130,131],"llm","openai-api","python","ai","data-analysis","data-science","ai-agents","vector-database","pinecone","gemini","groq","mistral","ollama","vllm","docker","2026-03-27T02:49:30.150509","2026-04-07T02:34:36.749027",[135,140,145,150,155],{"id":136,"question_zh":137,"answer_zh":138,"source_url":139},20777,"如何运行 Web UI 界面？","请升级到版本 `0.4.00` 或更高，该版本已包含 Web 应用程序。具体的设置说明请参阅项目根目录下的 `README.md` 文件。","https:\u002F\u002Fgithub.com\u002Fpgalko\u002FBambooAI\u002Fissues\u002F17",{"id":141,"question_zh":142,"answer_zh":143,"source_url":144},20778,"如何处理包含多个工作表（sheets）的 Excel 文件？","建议在单次执行中指示 AI 合并文件或处理数据并保存为新文件，然后在后续执行中使用这个合并后的文件。这样可以减少 Token 消耗。如果在循环（loop）模式下使用，只需在循环开始时指令一次（例如：1. 合并文件；2. 加载数据到 DataFrame；3. 处理数据），AI 会在后续提示中记住上下文。注意：输入 \"exit\" 结束循环后内存会被清除，重新开始时需要再次指令。","https:\u002F\u002Fgithub.com\u002Fpgalko\u002FBambooAI\u002Fissues\u002F1",{"id":146,"question_zh":147,"answer_zh":148,"source_url":149},20779,"导入 `bambooai` 时为什么报错提示必须设置 `OPENAI_API_KEY`？","这是由于新版 `openai` 库的初始化要求。该问题已在最新版本中修复。如果您遇到此问题，请确保将 `bambooai` 库更新到最新版本。","https:\u002F\u002Fgithub.com\u002Fpgalko\u002FBambooAI\u002Fissues\u002F11",{"id":151,"question_zh":152,"answer_zh":153,"source_url":154},20780,"是否支持 Qdrant 向量数据库以实现完全本地化部署？","是的，目前社区贡献者已添加了对 Qdrant 的支持，现在可以使用 Qdrant 作为向量存储来实现完全本地化的用例，不再局限于 Pinecone。","https:\u002F\u002Fgithub.com\u002Fpgalko\u002FBambooAI\u002Fissues\u002F49",{"id":156,"question_zh":157,"answer_zh":158,"source_url":159},20781,"导入时出现 `ImportError: lxml.html.clean module is now a separate project` 错误如何解决？","这是因为 `lxml` 的依赖项发生了变化。请在导入 `bambooai` 之前，先运行以下命令安装缺失的组件：\n`pip install lxml[html_clean]`\n或者单独安装：\n`pip install lxml_html_clean`","https:\u002F\u002Fgithub.com\u002Fpgalko\u002FBambooAI\u002Fissues\u002F19",[161,166,171,176,181,186,191,196,201,206,211,216,221,226,231,236,241,246,251,256],{"id":162,"version":163,"summary_zh":164,"released_at":165},126723,"v0.4.26","此项新增对 Qdrant 向量存储的支持，使应用程序能够在完全本地环境中运行。\n\n感谢 @Anush008 ","2025-10-31T04:33:49",{"id":167,"version":168,"summary_zh":169,"released_at":170},126724,"v0.4.25","重构了 groq_models.py 模块，现已支持工具调用。\n- 新增对 Groq API 中 “openai\u002Fgpt-oss-120b” 和 “moonshotai\u002Fkimi-k2-instruct”的支持。\n- 新增对 “gpt-5” 和 “gpt-5-mini”的支持（OpenAI 的验证要求\u002F流程真让人头疼！）","2025-08-10T03:16:56",{"id":172,"version":173,"summary_zh":174,"released_at":175},126725,"v0.4.24","用于 SweatStack 窗口选择的日期选择器（https:\u002F\u002Fgithub.com\u002Fpgalko\u002FBambooAI\u002Fpull\u002F47）\n* 将 Docker Compose 中的默认端口改为 5001，因为 macOS 已经使用了 5000 端口。\n\n* 为 SweatStack 的日期选择添加日期选择器，并改进按钮加载提示信息。\n\n* 为 SweatStack 数据选择重新添加圈速指标。\n\n@AartGoossens","2025-07-07T06:37:54",{"id":177,"version":178,"summary_zh":179,"released_at":180},126726,"v0.4.22","添加提示增强功能，以更好地理解数据集  \n- 除了 `df.head` 外，还增加了数据集摘要\u002F描述。这对于复合数据集（例如 SweatStack 的多运动员数据集）是必需的。  \n- 重新构建本体，以匹配 SweatStack 和 Garmin 数据集。  \n- 更好地管理分析师选择器的上下文窗口。  \n- 进行了一些样式改进。\n\n\u003Cimg width=\"568\" alt=\"image\" src=\"https:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Fassets\u002Fae281ec2-7f78-4ab0-baf1-6e0142b958e4\" \u002F>\n\n@AartGoossens \n","2025-06-25T06:06:48",{"id":182,"version":183,"summary_zh":184,"released_at":185},126727,"v0.4.21","新增导出为PDF功能  \n- 当分析完成后，用户现在可以将摘要和可视化图表导出为PDF格式  \n- 更新了LLM_CONFIG.json文件，加入新的Gemini Pro和Flash模型","2025-06-18T04:05:58",{"id":187,"version":188,"summary_zh":189,"released_at":190},126728,"v0.4.20","新增了生成和探索想法的功能  \n- 新增一项功能，允许用户生成和探索想法。当分析完成后，用户可以请求对原始任务进行变体，以探索和执行这些变体版本。用户可以在1到5的范围内设置变体的程度。  \n- 更新了工作流图，以指示与上述功能相关的分支情况。  \n- 主题切换选项已移至设置菜单下。  \n- 修复了若干 bug。  \n\u003Cimg width=\"395\" alt=\"image\" src=\"https:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Fassets\u002Fbd691a6b-9903-48c1-b9cb-124130f6f8f4\" \u002F>","2025-06-16T13:01:47",{"id":192,"version":193,"summary_zh":194,"released_at":195},126729,"v0.4.19","消息管理的重大重构，以及一些错误修复  \n感谢 @rnYulianto ","2025-06-14T09:13:56",{"id":197,"version":198,"summary_zh":199,"released_at":200},126730,"0.4.18","与 SweatStack 集成\n\nSweatStack 是一款面向运动数据应用的后端即服务（BaaS）。更多关于 SweatStack 的信息，请访问 [sweatstack.no](https:\u002F\u002Fsweatstack.no\u002F)。\n\n感谢 @AartGoossens\n\n\u003Cimg width=\"366\" alt=\"image\" src=\"https:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Fassets\u002Fed38e8ed-ae51-435a-90ed-768c74294c41\" \u002F>\n\n\n","2025-06-13T02:27:06",{"id":202,"version":203,"summary_zh":204,"released_at":205},126731,"v0.4.17","- 修复了若干 bug\n- 修复了自定义代码执行问题\n- 改进了文本高亮的处理方式\n- 优化了 OutputManager 类，并清理了 QA 模块中的系统消息\n\n\u003Cimg width=\"994\" alt=\"image\" src=\"https:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Fassets\u002F9afa42dd-c953-40c2-875b-5065b3316dfc\" \u002F>\n","2025-06-10T08:39:59",{"id":207,"version":208,"summary_zh":209,"released_at":210},126732,"v0.4.15","新增手动情景记忆搜索功能  \n- 该功能允许用户手动检索存储在情景记忆中的过往交互记录。  \n- 使用此功能需先启用 vector_db，并完成索引初始化。  \n- 返回 top_k=10 条结果，且仅保留得分大于 0.2 的记录。","2025-06-07T08:44:18",{"id":212,"version":213,"summary_zh":214,"released_at":215},126733,"v0.4.13","Fixed non-stopping spinner, added the new Gemini Pro model\r\n- Adds the new Gemini Pro 6-5 model\r\n- Added thinking budget\r\n- Fixes non-stopping tool call spinner\r\n- Fixes analyst selector instructions when web search is required","2025-06-06T08:09:15",{"id":217,"version":218,"summary_zh":219,"released_at":220},126734,"v0.4.12","More styling.... God I hate this, I now have a lot more respect for the frontend developers and designers. I am pretty sure I have broken something...","2025-06-03T12:51:08",{"id":222,"version":223,"summary_zh":224,"released_at":225},126735,"v0.4.11","Some styling improvements with backward compatibility","2025-06-02T12:12:15",{"id":227,"version":228,"summary_zh":229,"released_at":230},126736,"v0.4.09","- Format Gemini thoughts\r\n- Custom code exec bug fix","2025-06-01T10:11:15",{"id":232,"version":233,"summary_zh":234,"released_at":235},126737,"v0.4.08","### Major rebuild of QA Retrieval\r\n- This is a first step towards episodic memory functionality\r\n- Adds a seamless integration of the vector storage for the selected tasks\r\n- Links with the local favourite solutions storage.\r\n- When the solution is stored in the favourites, the `intent_breakdown` is vectorised and stored in Pinecone vector db together with the relevant metadata\r\n- When the new task lands the vector_db is searched for similar tasks from the past. If the match is found the metadata is retrieved and offered as examples to guide the solving of the new task.\r\n- Prompts have been adjusted to allow \"smarter\" integration of the retrieved similar past solutions","2025-06-01T03:04:18",{"id":237,"version":238,"summary_zh":239,"released_at":240},126738,"v0.4.07","Added feature to generate datasets and files\r\n- BambooAI can now capture any datasets that resulted from the execution of the code, and consolidates them in the \"datasets\u002Fgenerated\" folder\r\n- User can view or download the generated datasets using Web App or directly from the directory\r\n- The feature works for both local and remote code execution\r\n- Fixed a substantial bug where `default_prompts.yaml` was not being included in the package\r\n- Version change to 0.4.07","2025-05-28T02:19:30",{"id":242,"version":243,"summary_zh":244,"released_at":245},126739,"v0.4.06","Add ability to use custom prompt templates, and support for the new Anthropic Claude 4 models.\r\n- Changed prompt template format to yaml\r\n- Adds a new class `PromptManager` to handle prompt templates assignment\r\n- Adds a new parameter `custom_prompt_file` to allow users to submit path to their custom prompt templates yaml file, that will overwrite the default prompt templates\r\n- All prompt related handling now consolidated within messages package\r\n- Added support for the new Anthropic Claude 4 (Sonnet, Opus) models","2025-05-24T03:32:24",{"id":247,"version":248,"summary_zh":249,"released_at":250},126740,"v0.4.05","### Enable use of auxiliary datasets and generic ontologies\r\n\r\n- Adds functionality to use auxiliary datasets in .csv or .parquet formats\r\n    - Added a parameter to provide paths to aux dataset\r\n    - Up to 3 aux datasets can be included at the time\r\n    - No need to load into memory upon instantiation. The datasets are puled and merged\u002Fjoined during the code execution if they are relevant to the analysis task.\r\n    - Supported if running inside docker container, or using remote code executor API\r\n\r\n- Adds a new UI handling for Ontologies where they are now attached the same way as datasets\r\n\r\n- The framework now offers a support for generic type of ontologies, describing schema and content of one or more custom datasets\r\n\r\n- Removed feature to attach aux datasets via function calls\r\n\r\n- Removed feature to attach datasets from the Cloud storage services. Will need to update the logic, and will re-add in a future release\r\n\r\n- Some bug fixes\r\n\r\n\u003Cimg width=\"1017\" alt=\"Screenshot 2025-05-17 at 2 22 33 pm\" src=\"https:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Fassets\u002Fe963525b-2f20-478c-a24e-6ed1d3d8f2a2\" \u002F>\r\n","2025-05-17T04:26:49",{"id":252,"version":253,"summary_zh":254,"released_at":255},126741,"0.4.04","Added visual workflow navigation feature\r\n- Added a new menu item \"Workflow Map (CMD+M)\" to visually inspect and navigate the branches of the current workflow. This has several benefits including ease of navigation, better context window management, better response accuracy, better management of checkpoints etc.\r\n- The feature is based on interactive mermaid diagrams tracking the flow and branching of the enquiry\r\n\r\n\u003Cimg width=\"2056\" alt=\"Workflow Map Feature\" src=\"https:\u002F\u002Fgithub.com\u002Fuser-attachments\u002Fassets\u002F931d8a49-cf06-43df-b753-b4c26aa7c973\" \u002F>","2025-05-11T04:56:56",{"id":257,"version":258,"summary_zh":259,"released_at":260},126742,"v0.4.03","- Improvements, bug fixes \r\n- Added support for the new Gemini 2.5 Pro model","2025-05-09T08:32:30"]