[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"similar-mrdbourke--cs329s-ml-deployment-tutorial":3,"tool-mrdbourke--cs329s-ml-deployment-tutorial":64},[4,17,27,35,43,56],{"id":5,"name":6,"github_repo":7,"description_zh":8,"stars":9,"difficulty_score":10,"last_commit_at":11,"category_tags":12,"status":16},3808,"stable-diffusion-webui","AUTOMATIC1111\u002Fstable-diffusion-webui","stable-diffusion-webui 是一个基于 Gradio 构建的网页版操作界面，旨在让用户能够轻松地在本地运行和使用强大的 Stable Diffusion 图像生成模型。它解决了原始模型依赖命令行、操作门槛高且功能分散的痛点，将复杂的 AI 绘图流程整合进一个直观易用的图形化平台。\n\n无论是希望快速上手的普通创作者、需要精细控制画面细节的设计师，还是想要深入探索模型潜力的开发者与研究人员，都能从中获益。其核心亮点在于极高的功能丰富度：不仅支持文生图、图生图、局部重绘（Inpainting）和外绘（Outpainting）等基础模式，还独创了注意力机制调整、提示词矩阵、负向提示词以及“高清修复”等高级功能。此外，它内置了 GFPGAN 和 CodeFormer 等人脸修复工具，支持多种神经网络放大算法，并允许用户通过插件系统无限扩展能力。即使是显存有限的设备，stable-diffusion-webui 也提供了相应的优化选项，让高质量的 AI 艺术创作变得触手可及。",162132,3,"2026-04-05T11:01:52",[13,14,15],"开发框架","图像","Agent","ready",{"id":18,"name":19,"github_repo":20,"description_zh":21,"stars":22,"difficulty_score":23,"last_commit_at":24,"category_tags":25,"status":16},1381,"everything-claude-code","affaan-m\u002Feverything-claude-code","everything-claude-code 是一套专为 AI 编程助手（如 Claude Code、Codex、Cursor 等）打造的高性能优化系统。它不仅仅是一组配置文件，而是一个经过长期实战打磨的完整框架，旨在解决 AI 代理在实际开发中面临的效率低下、记忆丢失、安全隐患及缺乏持续学习能力等核心痛点。\n\n通过引入技能模块化、直觉增强、记忆持久化机制以及内置的安全扫描功能，everything-claude-code 能显著提升 AI 在复杂任务中的表现，帮助开发者构建更稳定、更智能的生产级 AI 代理。其独特的“研究优先”开发理念和针对 Token 消耗的优化策略，使得模型响应更快、成本更低，同时有效防御潜在的攻击向量。\n\n这套工具特别适合软件开发者、AI 研究人员以及希望深度定制 AI 工作流的技术团队使用。无论您是在构建大型代码库，还是需要 AI 协助进行安全审计与自动化测试，everything-claude-code 都能提供强大的底层支持。作为一个曾荣获 Anthropic 黑客大奖的开源项目，它融合了多语言支持与丰富的实战钩子（hooks），让 AI 真正成长为懂上",138956,2,"2026-04-05T11:33:21",[13,15,26],"语言模型",{"id":28,"name":29,"github_repo":30,"description_zh":31,"stars":32,"difficulty_score":23,"last_commit_at":33,"category_tags":34,"status":16},2271,"ComfyUI","Comfy-Org\u002FComfyUI","ComfyUI 是一款功能强大且高度模块化的视觉 AI 引擎，专为设计和执行复杂的 Stable Diffusion 图像生成流程而打造。它摒弃了传统的代码编写模式，采用直观的节点式流程图界面，让用户通过连接不同的功能模块即可构建个性化的生成管线。\n\n这一设计巧妙解决了高级 AI 绘图工作流配置复杂、灵活性不足的痛点。用户无需具备编程背景，也能自由组合模型、调整参数并实时预览效果，轻松实现从基础文生图到多步骤高清修复等各类复杂任务。ComfyUI 拥有极佳的兼容性，不仅支持 Windows、macOS 和 Linux 全平台，还广泛适配 NVIDIA、AMD、Intel 及苹果 Silicon 等多种硬件架构，并率先支持 SDXL、Flux、SD3 等前沿模型。\n\n无论是希望深入探索算法潜力的研究人员和开发者，还是追求极致创作自由度的设计师与资深 AI 绘画爱好者，ComfyUI 都能提供强大的支持。其独特的模块化架构允许社区不断扩展新功能，使其成为当前最灵活、生态最丰富的开源扩散模型工具之一，帮助用户将创意高效转化为现实。",107662,"2026-04-03T11:11:01",[13,14,15],{"id":36,"name":37,"github_repo":38,"description_zh":39,"stars":40,"difficulty_score":23,"last_commit_at":41,"category_tags":42,"status":16},3704,"NextChat","ChatGPTNextWeb\u002FNextChat","NextChat 是一款轻量且极速的 AI 助手，旨在为用户提供流畅、跨平台的大模型交互体验。它完美解决了用户在多设备间切换时难以保持对话连续性，以及面对众多 AI 模型不知如何统一管理的痛点。无论是日常办公、学习辅助还是创意激发，NextChat 都能让用户随时随地通过网页、iOS、Android、Windows、MacOS 或 Linux 端无缝接入智能服务。\n\n这款工具非常适合普通用户、学生、职场人士以及需要私有化部署的企业团队使用。对于开发者而言，它也提供了便捷的自托管方案，支持一键部署到 Vercel 或 Zeabur 等平台。\n\nNextChat 的核心亮点在于其广泛的模型兼容性，原生支持 Claude、DeepSeek、GPT-4 及 Gemini Pro 等主流大模型，让用户在一个界面即可自由切换不同 AI 能力。此外，它还率先支持 MCP（Model Context Protocol）协议，增强了上下文处理能力。针对企业用户，NextChat 提供专业版解决方案，具备品牌定制、细粒度权限控制、内部知识库整合及安全审计等功能，满足公司对数据隐私和个性化管理的高标准要求。",87618,"2026-04-05T07:20:52",[13,26],{"id":44,"name":45,"github_repo":46,"description_zh":47,"stars":48,"difficulty_score":23,"last_commit_at":49,"category_tags":50,"status":16},2268,"ML-For-Beginners","microsoft\u002FML-For-Beginners","ML-For-Beginners 是由微软推出的一套系统化机器学习入门课程，旨在帮助零基础用户轻松掌握经典机器学习知识。这套课程将学习路径规划为 12 周，包含 26 节精炼课程和 52 道配套测验，内容涵盖从基础概念到实际应用的完整流程，有效解决了初学者面对庞大知识体系时无从下手、缺乏结构化指导的痛点。\n\n无论是希望转型的开发者、需要补充算法背景的研究人员，还是对人工智能充满好奇的普通爱好者，都能从中受益。课程不仅提供了清晰的理论讲解，还强调动手实践，让用户在循序渐进中建立扎实的技能基础。其独特的亮点在于强大的多语言支持，通过自动化机制提供了包括简体中文在内的 50 多种语言版本，极大地降低了全球不同背景用户的学习门槛。此外，项目采用开源协作模式，社区活跃且内容持续更新，确保学习者能获取前沿且准确的技术资讯。如果你正寻找一条清晰、友好且专业的机器学习入门之路，ML-For-Beginners 将是理想的起点。",84991,"2026-04-05T10:45:23",[14,51,52,53,15,54,26,13,55],"数据工具","视频","插件","其他","音频",{"id":57,"name":58,"github_repo":59,"description_zh":60,"stars":61,"difficulty_score":10,"last_commit_at":62,"category_tags":63,"status":16},3128,"ragflow","infiniflow\u002Fragflow","RAGFlow 是一款领先的开源检索增强生成（RAG）引擎，旨在为大语言模型构建更精准、可靠的上下文层。它巧妙地将前沿的 RAG 技术与智能体（Agent）能力相结合，不仅支持从各类文档中高效提取知识，还能让模型基于这些知识进行逻辑推理和任务执行。\n\n在大模型应用中，幻觉问题和知识滞后是常见痛点。RAGFlow 通过深度解析复杂文档结构（如表格、图表及混合排版），显著提升了信息检索的准确度，从而有效减少模型“胡编乱造”的现象，确保回答既有据可依又具备时效性。其内置的智能体机制更进一步，使系统不仅能回答问题，还能自主规划步骤解决复杂问题。\n\n这款工具特别适合开发者、企业技术团队以及 AI 研究人员使用。无论是希望快速搭建私有知识库问答系统，还是致力于探索大模型在垂直领域落地的创新者，都能从中受益。RAGFlow 提供了可视化的工作流编排界面和灵活的 API 接口，既降低了非算法背景用户的上手门槛，也满足了专业开发者对系统深度定制的需求。作为基于 Apache 2.0 协议开源的项目，它正成为连接通用大模型与行业专有知识之间的重要桥梁。",77062,"2026-04-04T04:44:48",[15,14,13,26,54],{"id":65,"github_repo":66,"name":67,"description_en":68,"description_zh":69,"ai_summary_zh":70,"readme_en":71,"readme_zh":72,"quickstart_zh":73,"use_case_zh":74,"hero_image_url":75,"owner_login":76,"owner_name":77,"owner_avatar_url":78,"owner_bio":79,"owner_company":80,"owner_location":80,"owner_email":80,"owner_twitter":76,"owner_website":81,"owner_url":82,"languages":83,"stars":99,"forks":100,"last_commit_at":101,"license":102,"difficulty_score":103,"env_os":104,"env_gpu":104,"env_ram":104,"env_deps":105,"category_tags":113,"github_topics":114,"view_count":23,"oss_zip_url":80,"oss_zip_packed_at":80,"status":16,"created_at":123,"updated_at":124,"faqs":125,"releases":126},2976,"mrdbourke\u002Fcs329s-ml-deployment-tutorial","cs329s-ml-deployment-tutorial","Code and files to go along with CS329s machine learning model deployment tutorial.","cs329s-ml-deployment-tutorial 是一套源自斯坦福大学 CS329s 课程的实战指南，旨在帮助开发者将训练好的机器学习模型从本地环境部署到云端，并构建可交互的 Web 应用。它主要解决了机器学习项目中“最后一公里”的难题：即如何将 Jupyter Notebook 中的实验代码转化为稳定、可扩展且能被公众访问的生产级服务。\n\n这套教程特别适合具有一定 Python 基础的数据科学家、机器学习工程师以及希望提升工程化能力的研究人员。通过跟随指引，用户将利用 Google Cloud Platform（GCP）作为基础设施，学习如何容器化模型、使用 AI Platform 托管推理服务，并最终通过 Streamlit 框架快速搭建名为\"Food Vision\"的食物图像分类应用，将其发布至互联网。\n\n其技术亮点在于提供了一条完整的端到端链路：从本地虚拟环境配置、Docker 容器封装，到云端模型托管与应用部署的全流程代码与文件支持。教程不仅包含详细的步骤说明和视频演示，还特别强调了云资源管理与成本控制，提醒用户在实践结束后及时关闭服务以避免产生额外费用。对于想要跨","cs329s-ml-deployment-tutorial 是一套源自斯坦福大学 CS329s 课程的实战指南，旨在帮助开发者将训练好的机器学习模型从本地环境部署到云端，并构建可交互的 Web 应用。它主要解决了机器学习项目中“最后一公里”的难题：即如何将 Jupyter Notebook 中的实验代码转化为稳定、可扩展且能被公众访问的生产级服务。\n\n这套教程特别适合具有一定 Python 基础的数据科学家、机器学习工程师以及希望提升工程化能力的研究人员。通过跟随指引，用户将利用 Google Cloud Platform（GCP）作为基础设施，学习如何容器化模型、使用 AI Platform 托管推理服务，并最终通过 Streamlit 框架快速搭建名为\"Food Vision\"的食物图像分类应用，将其发布至互联网。\n\n其技术亮点在于提供了一条完整的端到端链路：从本地虚拟环境配置、Docker 容器封装，到云端模型托管与应用部署的全流程代码与文件支持。教程不仅包含详细的步骤说明和视频演示，还特别强调了云资源管理与成本控制，提醒用户在实践结束后及时关闭服务以避免产生额外费用。对于想要跨越理论与生产落地鸿沟的学习者而言，这是一个极佳的入门实操项目。","# CS329s Machine Learning Model Deployment Tutorial\n\n**Warning:** Following the steps of what's in here may cost you money (Google Cloud is a paid service), be sure to shut down any Google Cloud service you no longer need to use to avoid charges.\n\n**Thank you to:** [Mark Douthwaite's incredible ML + software engineering blog](https:\u002F\u002Fmark.douthwaite.io\u002F), [Lj Miranda's amazing post on software engineering tools for data scientists](https:\u002F\u002Fljvmiranda921.github.io\u002Fnotebook\u002F2020\u002F11\u002F15\u002Fdata-science-swe\u002F), [Chip Huyen](https:\u002F\u002Fhuyenchip.com\u002F) and Ashik Shafi's gracious feedback on the raw materials of this tutorial.\n\n## What is in here?\n\nCode and files to go along with [CS329s machine learning model deployment tutorial](https:\u002F\u002Fstanford-cs329s.github.io\u002Fsyllabus.html).\n\n* Watch the [video tutorial on YouTube](https:\u002F\u002Fyoutu.be\u002Ffw6NMQrYc6w)\n* See the [slides](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Fblob\u002Fmain\u002FCS329s-deploying-ml-models-tutorial.pdf)\n* Get the [model training code](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Fblob\u002Fmain\u002Fmodel_training.ipynb)\n\n## What do I need to get started?\n\n* A [Google Cloud account](https:\u002F\u002Fcloud.google.com\u002Fgcp) and a [Google Cloud Project](https:\u002F\u002Fcloud.google.com\u002Fresource-manager\u002Fdocs\u002Fcreating-managing-projects)\n* [Google Cloud SDK installed](https:\u002F\u002Fcloud.google.com\u002Fsdk\u002Fdocs\u002Finstall) (gcloud CLI utitly)\n* Trained [machine learning model(s)](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Fblob\u002Fmain\u002Fmodel_training.ipynb), our app uses an image classification model trained on a number of different classes of food from [Food101 dataset](https:\u002F\u002Fwww.kaggle.com\u002Fdansbecker\u002Ffood-101)\n* [Docker installed](https:\u002F\u002Fdocs.docker.com\u002Fget-docker\u002F)\n\n**Warning (again):** Using Google Cloud services costs money. If you don't have credits (you get $300USD when you first sign up), you will be charged. Delete and shutdown your work when finished to avoid charges.\n\n## What will I end up with?\n\nIf you go through the steps below without fail, you should end up with a [Streamlit](http:\u002F\u002Fstreamlit.io\u002F)-powered web application (Food Vision 🍔👁) for classifying images of food (deployed on Google Cloud if you want).\n\nOur app running locally making a prediction on an image of ice cream (using a machine learning model deployed on Google Cloud):\n![food vision demo](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmrdbourke_cs329s-ml-deployment-tutorial_readme_94b76013580d.gif)\n\n## Okay, I'm in, how can I use it?\n\nWe're going to tackle this in 3 parts:\n1. Getting the app running (running Streamlit on our local machines)\n2. Deploying a machine learning model to AI Platform (getting Google Cloud to host one of our models)\n3. Deploying our app to App Engine (getting our app on the internet)\n\n### 1. Getting the app running\n\n1. Clone this repo\n```\ngit clone https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\n```\n\n2. Change into the `food-vision` directory\n```\ncd food-vision\n```\n\n3. Create and activate a virtual environment (call it what you want, I called mine \"env\")\n```\npip install virtualenv\nvirtualenv \u003CENV-NAME>\nsource \u003CENV-NAME>\u002Fbin\u002Factivate\n```\n4. Install the required dependencies (Streamlit, TensorFlow, etc)\n```\npip install -r requirements.txt\n```\n5. Activate Streamlit and run `app.py`\n```\nstreamlit run app.py\n``` \nRunning the above command should result in you seeing the following:\n![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmrdbourke_cs329s-ml-deployment-tutorial_readme_aa3a70f2a94c.png)\n\nThis is Food Vision 🍔👁 the app we're making.\n\n6. Try an upload an image (e.g. one of the ones in [`food-images\u002F`](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Ftree\u002Fmain\u002Ffood-images) such as [`ice_cream.jpeg`](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Fblob\u002Fmain\u002Ffood-images\u002Fice_cream.jpeg) and it should load.\n\n7. Notice a \"Predict\" button appears when you upload an image to the app, click it and see what happens.\n\n8. The app breaks because it tries to contact Google Cloud Platform (GCP) looking for a machine learning model and it either:\n * won't be able to find the model (wrong API call or the model doesn't exist)\n * won't be able to use the existing model because the credentials are wrong (seen below)\n![credential error](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmrdbourke_cs329s-ml-deployment-tutorial_readme_5c92c5517828.png)\n \nThis is a good thing! It means our app is trying to contact GCP (using functions in `food-vision\u002Fapp.py` and `food-vision\u002Futils.py`). \n\nNow let's learn how to get a model hosted on GCP.\n\n### 2. Getting a machine learning model hosted on GCP\n \n> How do I fix this error? (Streamlit can't access your model) \n\nTo fix it, we're going to need a couple of things:\n* A trained machine learning model (suited to our problem, we'll be uploading this to Google Storage)\n* A Google Storage bucket (to store our trained model)\n* A hosted model on Google AI Platform (we'll connect the model in our Google Storage bucket to here)\n* A service key to access our hosted model on Google AI Platform\n\nLet's see how we'll can get the above.\n\n1. To train a machine learning model and save it in the [`SavedModel`](https:\u002F\u002Fwww.tensorflow.org\u002Fguide\u002Fsaved_model) format (this TensorFlow specific, do what you need for PyTorch), we can follow the steps in [`model_training.ipynb`](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Fblob\u002Fmain\u002Fmodel_training.ipynb).\n\n2. Once we've got a `SavedModel`, we'll upload it Google Storage but before we do that, we'll need to [create a Google Storage Bucket](https:\u002F\u002Fcloud.google.com\u002Fstorage\u002Fdocs\u002Fcreating-buckets) (a bucket is like a hard drive on the cloud).\n\n![creating a bucket on google cloud](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmrdbourke_cs329s-ml-deployment-tutorial_readme_6f5d1050df31.png)\n\nCall your bucket whatever you like (e.g. my_cool_bucket_name). You'll want to store your data in a region which is either closest to you or wherever you're allowed to store data (if this doesn't make sense, store it in the US).\n\n3. With a bucket created, we can [copy our model to the bucket](https:\u002F\u002Fcloud.google.com\u002Fstorage\u002Fdocs\u002Fuploading-objects#gsutil).\n```\n## Uploading a model to Google Storage from within Colab ##\n\n# Authorize Colab and initalize gcloud (enter the appropriate inputs when asked)\nfrom google.colab import auth\nauth.authenticate_user()\n!curl https:\u002F\u002Fsdk.cloud.google.com | bash\n!gcloud init\n\n# Upload SavedModel to Google Storage Bucket\n!gsutil cp -r \u003CYOUR_MODEL_PATH> \u003CYOUR_GOOGLE_STORAGE_BUCKET>\n```\n\n4. [Connect model in bucket to AI Platform](https:\u002F\u002Fcloud.google.com\u002Fai-platform\u002Fprediction\u002Fdocs\u002Fdeploying-models) (this'll make our model accessible via an API call, if you're not sure what an API call is, imagine writing a function that could trigger our model from anywhere on the internet)\n * Don't like clicking around Google Cloud's console? You can also [use `gcloud` to create a model in AI Platform](https:\u002F\u002Fcloud.google.com\u002Fsdk\u002Fgcloud\u002Freference\u002Fai-platform\u002Fmodels\u002Fcreate) on the command line \n* Create a model on AI Platform (choose a region which is closest to you or where you'd like your model to be accessed from):\n![creating a model on AI Platform](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmrdbourke_cs329s-ml-deployment-tutorial_readme_e3f48946f3fd.png)\n* Once you've got a model on AI Platform (above), you'll need to create a model version which matches up with what your model was trained with (e.g. choose TensorFlow if your model is trained with TensorFlow):\n![creating a model version on AI Platform](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmrdbourke_cs329s-ml-deployment-tutorial_readme_1e6adca4b52b.png)\n* And then link your model version to your trained model in Google Storage:\n![linking a model version to Google Storage](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmrdbourke_cs329s-ml-deployment-tutorial_readme_dfd9eb6a620d.png)\n\n5. Create a [service account to access AI Platform](https:\u002F\u002Fcloud.google.com\u002Fiam\u002Fdocs\u002Fcreating-managing-service-accounts) (GCP loves permissions, it's for the security of your app)\n * You'll want to make a service account with permissions to use the \"ML Engine Developer\" role\n\n![ml developer role permission](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmrdbourke_cs329s-ml-deployment-tutorial_readme_7eebed6ea4b0.png)\n\n6. Once you've got an active service account, [create and download its key](https:\u002F\u002Fcloud.google.com\u002Fiam\u002Fdocs\u002Fcreating-managing-service-account-keys) (this will come in the form of a .JSON file)\n * 🔑 **Note:** Service keys grant access to your GCP account, keep this file private (e.g add `*.json` to your `.gitignore` so you don't accidently add it to GitHub)\n\n7. Update the following variables:\n * In `app.py`, change the existing GCP key path to your key path:\n```\n# Google Cloud Services look for these when your app runs\n\n# Old\nos.environ[\"GOOGLE_APPLICATION_CREDENTIALS\"] = \"daniels-dl-playground-4edbcb2e6e37.json\"\n\n# New \nos.environ[\"GOOGLE_APPLICATION_CREDENTIALS\"] = \"\u003CPATH_TO_YOUR_KEY>\"\n```\n * In `app.py`, change the GCP project and region to your GCP project and region\n```\n# Old\nPROJECT = \"daniels-dl-playground\"\nREGION = \"us-central1\" \n\n# New\nPROJECT = \"\u003CYOUR_GCP_PROJECT_NAME>\"\nREGION = \"\u003CYOUR_GCP_REGION>\"\n```\n * In `utils.py`, change the `\"model_name\"` key of `\"model_1\"` to your model name:\n ```\n # Old\n classes_and_models = {\n    \"model_1\": {\n        \"classes\": base_classes,\n        \"model_name\": \"efficientnet_model_1_10_classes\" \n    }\n }\n \n # New\n  classes_and_models = {\n    \"model_1\": {\n        \"classes\": base_classes,\n        \"model_name\": \"\u003CYOUR_AI_PLATFORM_MODEL_NAME>\" \n    }\n }\n```\n\n8. Retry the app to see if it works (refresh the Streamlit app by pressing R or refreshing the page and then reupload an image and click \"Predict\")\n\n![what you'll see when you click the predict button and your model is hosted correctly](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmrdbourke_cs329s-ml-deployment-tutorial_readme_23d298a18c68.png)\n  \n### 3. Deploying the whole app to GCP\n\n> Okay, I've fixed the permissions error, how do I deploy my model\u002Fapp?\n \nI'm glad you asked...\n \n1. run `make gcloud-deploy`... wait 5-10 mins and your app will be on App Engine (as long as you've activated the App Engine API)\n\n...and you're done\n \n> But wait, what happens when you run `make gcloud-deploy`?\n\nWhen you run `make gcloud-deploy`, the `gcloud-deploy` command within the Makefile ([`food-vision\u002FMakefile`](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Fblob\u002Fmain\u002Ffood-vision\u002FMakefile)) gets triggered. \n\n`make gcloud-deploy` is actually an alias for running:\n\n```\ngcloud app deploy app.yaml\n```\n\nThis is `gcloud`'s way of saying \"Hey, Google Cloud, kick off the steps you need to do to get our locally running app (`food-vision\u002Fapp.py`) running on App Engine.\"\n\nTo do this, the `gcloud app deploy` command does a number of things:\n* Our app is put into a [Docker container](https:\u002F\u002Fwww.docker.com\u002Fresources\u002Fwhat-container) defined by [`[food-vision\u002FDockerfile]`](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Fblob\u002Fmain\u002Ffood-vision\u002FDockerfile) (imagine a Docker container as a box which contains our locally running app and everything it needs to run, once it's in the box, the box can be run anywhere Docker is available and it should work and the Dockerfile defines how the container should be created).\n* Once the Docker container is created, it becomes a Docker image (confusing, I know but think of a Docker image as an immutable Docker container, e.g. it won't change when we move it somewhere).\n* The Docker image is then uploaded to [Google Container Registry (GCR)](https:\u002F\u002Fcloud.google.com\u002Fcontainer-registry), Google's place for hosting Docker images.\n* Once our Docker image is hosted on GCR, it gets deployed to an App Engine instance (think a computer just like ours but running online, where other people can access it).\n* The App Engine instance is defined by the instructions in [`food-vision\u002Fapp.yaml`](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Fblob\u002Fmain\u002Ffood-vision\u002Fapp.yaml), if you check out this file you'll notice it's quite simple, it has two lines:\n```\nruntime: custom # we want to run our own custom Docker container\nenv: flex # we want our App Engine to be flexible and install our various dependencies (in requirements.txt)\n```\n\nSeems like a lot right?\n\nAnd it is, but once you've had a little practice which each, you'll start to realise there's a specific reason behind each of them.\n\nIf all the steps executed correctly, you should see your app running live on App Engine under a URL similar to:\n\n```\nhttp:\u002F\u002F\u003CYOUR_PROJECT_NAME>.ue.r.appspot.com\u002F\n```\n\nWhich should look exactly like our app running locally!\n\n![our streamlit app running on App Engine](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmrdbourke_cs329s-ml-deployment-tutorial_readme_0a44f4a37435.png)\n \n## Breaking down `food-vision`\n\n> What do all the files in `food-vision` do?\n\nThere's a bunch of files in our [`food-vision` directory](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Ftree\u002Fmain\u002Ffood-vision) and seeing them for the first time can be confusing. So here's a quick one-liner for each.\n\n* `.dockerignore` - files\u002Ffolders to ignore when are Docker container is being created (similar to how `.gitignore` tells what files\u002Ffolders to ignore when committing.\n* `Dockerfile` - instructions for how our Docker container (a box with all of what our app needs to run) should be created.\n* `Makefile` - a handy script for executing commands like `make gcloud-deploy` on the command which run larger commands (this saves us typing large commands all the time, see [What is a Makefile?](https:\u002F\u002Fwww.google.com\u002Fsearch?client=safari&rls=en&q=what+is+a+makefile&ie=UTF-8&oe=UTF-8) for more).\n* `SessionState.py`- a Python script to help our Streamlit app maintain state (not delete everything) when we a click a button, see the [Streamlit forums for more](https:\u002F\u002Fdiscuss.streamlit.io\u002Ft\u002Fis-there-any-working-example-for-session-state-for-streamlit-version-0-63-1\u002F4551\u002F2).\n* `app.py` - our Food Vision 👁🍔 app built with [Streamlit](http:\u002F\u002Fstreamlit.io\u002F).\n* `app.yaml` - the instructions for what type of instance App Engine should create when we deploy our app.\n* `requirements.txt`- all of the dependencies required to run `app.py`.\n* `utils.py` - helper functions used in `app.py` (this prevents our app from getting too large).\n\n## Where else your app will break\n\nDuring the tutorial (see [timestamp 1:32:31](https:\u002F\u002Fyoutu.be\u002Ffw6NMQrYc6w?t=5551)), we saw the app we've deployed is far from perfect and we saw a couple of places where our app will break, but there's one more:\n\nThe default app (the on you'll get when you clone the repo) works with 3 models:\n * Model 1: 10 food classes from [Food101](https:\u002F\u002Fwww.kaggle.com\u002Fdansbecker\u002Ffood-101).\n * Model 2: 11 food classes from Food101.\n * Model 3: 11 food classes Food101 + 1 not_food class (random images from ImageNet).\n \nAll of these models can be trained using [`model_training.ipynb`](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Fblob\u002Fmain\u002Fmodel_training.ipynb), however, if you do have access to all 3, your app will break if you choose anything other than Model 1 in the sidebar (the app requires at least 1 model to run).\n\n## Learn more\n\n> Where can I learn all of this?\n\nJust like there's an infinite way you can construct deep learning neural networks with different layers, what we've done here is only *one* way you can deploy machine learning models\u002Fapplications with Google Cloud (other cloud services have similar offerings as well).\n\nIf you'd like to learn more about Google Cloud, I'd recommend [Google's Qwiklabs](https:\u002F\u002Fgoogle.qwiklabs.com\u002F), here you'll get hands-on experience using Google Cloud for different uses-cases (all for free).\n\nIf you'd like more about how software engineering crosses over with machine learning, I'd recommend the following blogs:\n\n* LJ Miranda's [How to improve software engineering skills as a researcher](https:\u002F\u002Fljvmiranda921.github.io\u002Fnotebook\u002F2020\u002F11\u002F15\u002Fdata-science-swe\u002F) \n* Mark Douthwaite's [software engineering and machine learning blog](https:\u002F\u002Fmark.douthwaite.io\u002F)\n\nFor more on the concept of the \"data flywheel\" (discussed during the tutorial), check out Josh Tobin's talk [A Missing Link in the Machine Learning Infrastrcuture Stack](https:\u002F\u002Fyoutu.be\u002Fo4q_ljRkXqw).\n\n## Extensions\n\n> How can I extend this app?\n\n**CI\u002FCD** - you'll hear this a lot when you start building and shipping software. It stands for \"continuous integration\u002Fcontinuous delivery\". I think of it like this, say you make a change to your app and you'd like to push it to your users immediately, you could have a service such as [GitHub Actions](https:\u002F\u002Fgithub.com\u002Ffeatures\u002Factions) watch for changes in your GitHub repo. If a change occurs on a certain branch, GitHub Actions performs steps very similar to what we've done here and redeploys your (updated) app automatically.\n * Mark Douthwaite has a great blog post on [CI\u002FCD with GitHub Actions](https:\u002F\u002Fmark.douthwaite.io\u002Fcontinuous-training-and-delivery\u002F).\n\n**Codify everything!** - when deploying our app, we did a lot of clicking around the Google Cloud console, however you can do all of what we did using the [`gcloud` SDK](https:\u002F\u002Fcloud.google.com\u002Fsdk), this means you could automate everything we've done and make the whole process far less manual!\n\n## Questions?\n\nStart a [discussion](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Fdiscussions) or send me a message: daniel at mrdbourke dot com.\n","# CS329s 机器学习模型部署教程\n\n**警告：** 按照本教程中的步骤操作可能会产生费用（Google Cloud 是一项付费服务），请务必关闭不再使用的 Google Cloud 服务，以避免产生额外费用。\n\n**感谢：** [Mark Douthwaite 的精彩机器学习与软件工程博客](https:\u002F\u002Fmark.douthwaite.io\u002F)、[Lj Miranda 关于数据科学家软件工程工具的优秀文章](https:\u002F\u002Fljvmiranda921.github.io\u002Fnotebook\u002F2020\u002F11\u002F15\u002Fdata-science-swe\u002F)、以及 Chip Huyen 和 Ashik Shafi 对本教程初稿提供的宝贵反馈。\n\n## 本教程包含哪些内容？\n\n与 [CS329s 机器学习模型部署教程](https:\u002F\u002Fstanford-cs329s.github.io\u002Fsyllabus.html) 配套的代码和文件。\n\n* 观看 [YouTube 上的视频教程](https:\u002F\u002Fyoutu.be\u002Ffw6NMQrYc6w)\n* 查阅 [幻灯片](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Fblob\u002Fmain\u002FCS329s-deploying-ml-models-tutorial.pdf)\n* 获取 [模型训练代码](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Fblob\u002Fmain\u002Fmodel_training.ipynb)\n\n## 开始前需要准备什么？\n\n* 一个 [Google Cloud 账户](https:\u002F\u002Fcloud.google.com\u002Fgcp) 和一个 [Google Cloud 项目](https:\u002F\u002Fcloud.google.com\u002Fresource-manager\u002Fdocs\u002Fcreating-managing-projects)\n* 已安装 [Google Cloud SDK](https:\u002F\u002Fcloud.google.com\u002Fsdk\u002Fdocs\u002Finstall)（gcloud CLI 工具）\n* 训练好的 [机器学习模型](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Fblob\u002Fmain\u002Fmodel_training.ipynb)，我们的应用使用了一个基于 [Food101 数据集](https:\u002F\u002Fwww.kaggle.com\u002Fdansbecker\u002Ffood-101) 中多种食物类别的图像分类模型。\n* 已安装 [Docker](https:\u002F\u002Fdocs.docker.com\u002Fget-docker\u002F)\n\n**再次警告：** 使用 Google Cloud 服务需要付费。如果您没有免费额度（首次注册可获得 300 美元），则会产生费用。完成操作后，请删除并关闭相关资源，以避免产生额外费用。\n\n## 最终会得到什么？\n\n如果您按照以下步骤顺利完成，最终将获得一个由 [Streamlit](http:\u002F\u002Fstreamlit.io\u002F) 提供支持的 Web 应用程序（Food Vision 🍔👁），用于对食物图像进行分类（如果您愿意，还可以将其部署到 Google Cloud 上）。\n\n我们的应用在本地运行时，对一张冰淇淋图片进行预测（使用部署在 Google Cloud 上的机器学习模型）：\n![food vision demo](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmrdbourke_cs329s-ml-deployment-tutorial_readme_94b76013580d.gif)\n\n## 好的，我准备好了，该如何使用呢？\n\n我们将分三个部分来完成这个任务：\n1. 让应用运行起来（在本地机器上运行 Streamlit）\n2. 将机器学习模型部署到 AI Platform（让 Google Cloud 托管我们的模型）\n3. 将应用部署到 App Engine（让我们的应用上线）\n\n### 1. 让应用运行起来\n\n1. 克隆此仓库\n```\ngit clone https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\n```\n\n2. 进入 `food-vision` 目录\n```\ncd food-vision\n```\n\n3. 创建并激活虚拟环境（您可以随意命名，我将其命名为 “env”）\n```\npip install virtualenv\nvirtualenv \u003CENV-NAME>\nsource \u003CENV-NAME>\u002Fbin\u002Factivate\n```\n4. 安装所需依赖项（Streamlit、TensorFlow 等）\n```\npip install -r requirements.txt\n```\n5. 启动 Streamlit 并运行 `app.py`\n```\nstreamlit run app.py\n``` \n执行上述命令后，您应该会看到如下界面：\n![](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmrdbourke_cs329s-ml-deployment-tutorial_readme_aa3a70f2a94c.png)\n\n这就是我们正在开发的 Food Vision 🍔👁 应用程序。\n\n6. 尝试上传一张图片（例如 [`food-images\u002F`](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Ftree\u002Fmain\u002Ffood-images) 文件夹中的图片，如 [`ice_cream.jpeg`](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Fblob\u002Fmain\u002Ffood-images\u002Fice_cream.jpeg)），它应该能够成功加载。\n\n7. 注意，当您向应用上传图片时，会出现一个“Predict”按钮，点击它看看会发生什么。\n\n8. 此时应用会崩溃，因为它试图联系 Google Cloud Platform (GCP) 以获取机器学习模型，但可能由于以下原因导致失败：\n * 无法找到模型（API 调用错误或模型不存在）\n * 无法使用现有模型，因为凭据错误（如下所示）\n![credential error](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmrdbourke_cs329s-ml-deployment-tutorial_readme_5c92c5517828.png)\n \n这其实是一件好事！这意味着我们的应用正在尝试与 GCP 进行通信（通过 `food-vision\u002Fapp.py` 和 `food-vision\u002Futils.py` 中的函数）。 \n\n接下来，让我们学习如何在 GCP 上托管模型。\n\n### 2. 在 GCP 上托管机器学习模型\n \n> 如何解决这个错误？（Streamlit 无法访问您的模型）\n\n要解决这个问题，我们需要准备以下几样东西：\n* 一个训练好的机器学习模型（适合我们的问题，我们将把它上传到 Google Storage）\n* 一个 Google Storage 存储桶（用于存放我们的训练好的模型）\n* 一个托管在 Google AI Platform 上的模型（我们将存储桶中的模型与此处关联）\n* 用于访问 Google AI Platform 上托管模型的服务账号密钥\n\n下面我们将逐一介绍如何实现上述目标。\n\n1. 要训练一个机器学习模型并将其保存为 [SavedModel](https:\u002F\u002Fwww.tensorflow.org\u002Fguide\u002Fsaved_model) 格式（此格式为 TensorFlow 特定格式，PyTorch 用户请根据自身需求调整），可以按照 [`model_training.ipynb`](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Fblob\u002Fmain\u002Fmodel_training.ipynb) 中的步骤进行操作。\n\n2. 一旦我们有了 `SavedModel`，就可以将其上传到 Google Storage，但在上传之前，我们需要先 [创建一个 Google Storage 存储桶](https:\u002F\u002Fcloud.google.com\u002Fstorage\u002Fdocs\u002Fcreating-buckets)（存储桶相当于云端的硬盘）。\n\n![在 Google Cloud 上创建存储桶](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmrdbourke_cs329s-ml-deployment-tutorial_readme_6f5d1050df31.png)\n\n您可以随意为存储桶命名（例如 my_cool_bucket_name）。建议将数据存储在离您最近的区域，或者您所在地区允许存储数据的区域（如果不确定，可以选择在美国存储）。\n\n3. 创建好存储桶后，我们可以将模型复制到该存储桶中（参考 [Google Cloud Storage 文档](https:\u002F\u002Fcloud.google.com\u002Fstorage\u002Fdocs\u002Fuploading-objects#gsutil)）。\n```\n## 在 Colab 中将模型上传到 Google Storage ##\n\n# 授权 Colab 并初始化 gcloud（在提示时输入相应信息）\nfrom google.colab import auth\nauth.authenticate_user()\n!curl https:\u002F\u002Fsdk.cloud.google.com | bash\n!gcloud init\n\n# 将 SavedModel 上传到 Google Cloud Storage 存储桶\n!gsutil cp -r \u003CYOUR_MODEL_PATH> \u003CYOUR_GOOGLE_STORAGE_BUCKET>\n```\n\n4. [将存储桶中的模型连接到 AI Platform](https:\u002F\u002Fcloud.google.com\u002Fai-platform\u002Fprediction\u002Fdocs\u002Fdeploying-models)（这将使我们的模型可以通过 API 调用访问。如果你不清楚什么是 API 调用，可以把它想象成一个函数，可以从互联网上的任何地方触发我们的模型）\n * 不喜欢在 Google Cloud 控制台中点击操作？你也可以在命令行上使用 `gcloud` 在 AI Platform 上创建模型：\n * 在 AI Platform 上创建模型（选择离你最近或希望模型被访问的区域）：\n![在 AI Platform 上创建模型](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmrdbourke_cs329s-ml-deployment-tutorial_readme_e3f48946f3fd.png)\n * 一旦你在 AI Platform 上有了模型（如上所示），你需要创建一个与你的模型训练内容相匹配的模型版本（例如，如果你的模型是用 TensorFlow 训练的，就选择 TensorFlow）：\n![在 AI Platform 上创建模型版本](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmrdbourke_cs329s-ml-deployment-tutorial_readme_1e6adca4b52b.png)\n * 然后将你的模型版本链接到 Google Cloud Storage 中的已训练模型：\n![将模型版本链接到 Google Cloud Storage](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmrdbourke_cs329s-ml-deployment-tutorial_readme_dfd9eb6a620d.png)\n\n5. 创建一个用于访问 AI Platform 的服务账号（GCP 非常注重权限管理，这是为了保护你的应用安全）\n * 你需要创建一个具有“ML Engine Developer”角色权限的服务账号\n\n![ML Engine 开发者角色权限](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmrdbourke_cs329s-ml-deployment-tutorial_readme_7eebed6ea4b0.png)\n\n6. 一旦你有了活跃的服务账号，[创建并下载其密钥](https:\u002F\u002Fcloud.google.com\u002Fiam\u002Fdocs\u002Fcreating-managing-service-account-keys)（密钥将以 .JSON 文件的形式提供）\n * 🔑 **注意：** 服务账号密钥会授予对你的 GCP 账号的访问权限，请务必妥善保管此文件（例如，将 `*.json` 添加到 `.gitignore` 文件中，以免意外将其提交到 GitHub）\n\n7. 更新以下变量：\n * 在 `app.py` 中，将现有的 GCP 密钥路径替换为你的密钥路径：\n```\n# Google Cloud 服务会在你的应用运行时查找这些环境变量\n\n# 旧\nos.environ[\"GOOGLE_APPLICATION_CREDENTIALS\"] = \"daniels-dl-playground-4edbcb2e6e37.json\"\n\n# 新\nos.environ[\"GOOGLE_APPLICATION_CREDENTIALS\"] = \"\u003CPATH_TO_YOUR_KEY>\"\n```\n * 在 `app.py` 中，将 GCP 项目和区域替换为你自己的 GCP 项目和区域：\n```\n# 旧\nPROJECT = \"daniels-dl-playground\"\nREGION = \"us-central1\" \n\n# 新\nPROJECT = \"\u003CYOUR_GCP_PROJECT_NAME>\"\nREGION = \"\u003CYOUR_GCP_REGION>\"\n```\n * 在 `utils.py` 中，将 `\"model_1\"` 的 `\"model_name\"` 键替换为你的模型名称：\n```\n# 旧\nclasses_and_models = {\n    \"model_1\": {\n        \"classes\": base_classes,\n        \"model_name\": \"efficientnet_model_1_10_classes\" \n    }\n }\n\n# 新\nclasses_and_models = {\n    \"model_1\": {\n        \"classes\": base_classes,\n        \"model_name\": \"\u003CYOUR_AI_PLATFORM_MODEL_NAME>\" \n    }\n}\n```\n\n8. 再次运行应用，看看是否正常工作（按 R 键刷新 Streamlit 应用程序，或刷新页面，然后重新上传一张图片并点击“预测”按钮）\n\n![点击“预测”按钮且模型正确部署时显示的内容](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmrdbourke_cs329s-ml-deployment-tutorial_readme_23d298a18c68.png)\n  \n### 3. 将整个应用部署到 GCP\n\n> 好吧，我已经修复了权限错误，那我该如何部署我的模型\u002F应用呢？\n\n很高兴你这么问……\n \n1. 运行 `make gcloud-deploy`……等待 5–10 分钟，你的应用就会部署到 App Engine 上（前提是你已经启用了 App Engine API）。\n\n……这样就完成了！\n \n> 但是等等，运行 `make gcloud-deploy` 会发生什么？\n\n当你运行 `make gcloud-deploy` 时，Makefile 中的 `gcloud-deploy` 命令（位于 [`food-vision\u002FMakefile`](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Fblob\u002Fmain\u002Ffood-vision\u002FMakefile)）会被触发。\n\n实际上，`make gcloud-deploy` 是以下命令的别名：\n\n```\ngcloud app deploy app.yaml\n```\n\n这就是 `gcloud` 的一种方式，用来告诉 Google Cloud：“嘿，Google Cloud，请开始执行必要的步骤，将我们本地运行的应用程序（`food-vision\u002Fapp.py`）部署到 App Engine 上。”\n\n为此，`gcloud app deploy` 命令会执行以下操作：\n* 我们的应用程序会被放入由 [`food-vision\u002FDockerfile`](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Fblob\u002Fmain\u002Ffood-vision\u002FDockerfile) 定义的 Docker 容器中（可以将 Docker 容器想象成一个盒子，里面包含了我们在本地运行的应用程序以及它运行所需的一切。一旦进入容器，只要 Docker 可用，这个容器就可以在任何地方运行，并且应该能够正常工作；而 Dockerfile 则定义了如何创建这个容器）。\n* 当 Docker 容器创建完成后，它就变成了一个 Docker 镜像（听起来可能有点复杂，但你可以把 Docker 镜像理解为不可变的 Docker 容器，也就是说，当我们把它移动到其他地方时，它的内容不会改变）。\n* 接着，Docker 镜像会被上传到 [Google Container Registry (GCR)](https:\u002F\u002Fcloud.google.com\u002Fcontainer-registry)，这是 Google 用于托管 Docker 镜像的地方。\n* 一旦我们的 Docker 镜像被托管在 GCR 上，它就会被部署到一个 App Engine 实例上（可以把它想象成一台类似于我们本地电脑的在线计算机，其他人也可以访问它）。\n* App Engine 实例是由 [`food-vision\u002Fapp.yaml`](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Fblob\u002Fmain\u002Ffood-vision\u002Fapp.yaml) 中的指令定义的。如果你查看这个文件，会发现它非常简单，只有两行：\n```\nruntime: custom # 我们希望运行自定义的 Docker 容器\nenv: flex # 我们希望 App Engine 具有灵活性，能够安装 requirements.txt 中列出的各种依赖项\n```\n\n听起来步骤很多，对吧？\n\n确实如此，但只要你对每一步都多加练习，就会逐渐明白其中每一步背后都有其特定的原因。\n\n如果所有步骤都成功执行，你应该能够在 App Engine 上看到你的应用正在运行，网址类似于：\n\n```\nhttp:\u002F\u002F\u003CYOUR_PROJECT_NAME>.ue.r.appspot.com\u002F\n```\n\n这看起来应该和我们在本地运行的应用完全一致！\n\n![我们的 Streamlit 应用在 App Engine 上运行](https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmrdbourke_cs329s-ml-deployment-tutorial_readme_0a44f4a37435.png)\n\n## 拆解 `food-vision`\n\n> `food-vision` 目录下的所有文件分别有什么作用？\n\n我们的 [`food-vision` 目录](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Ftree\u002Fmain\u002Ffood-vision) 里包含许多文件，初次看到时可能会让人感到困惑。下面为每个文件简要说明：\n\n* `.dockerignore` - 在构建 Docker 容器时需要忽略的文件或文件夹（类似于 `.gitignore` 在提交代码时指定要忽略的文件和文件夹）。\n* `Dockerfile` - 定义如何创建我们的 Docker 容器（一个包含应用运行所需一切的“盒子”）的指令。\n* `Makefile` - 一个便捷的脚本，用于在命令行中执行诸如 `make gcloud-deploy` 等命令，从而简化复杂操作（这样我们就不必每次都手动输入长命令了；更多信息请参阅 [什么是 Makefile？](https:\u002F\u002Fwww.google.com\u002Fsearch?client=safari&rls=en&q=what+is+a+makefile&ie=UTF-8&oe=UTF-8)）。\n* `SessionState.py` - 一个 Python 脚本，帮助我们的 Streamlit 应用在点击按钮时保持状态（不会重置所有内容），详情可参见 [Streamlit 论坛](https:\u002F\u002Fdiscuss.streamlit.io\u002Ft\u002Fis-there-any-working-example-for-session-state-for-streamlit-version-0-63-1\u002F4551\u002F2)。\n* `app.py` - 我们的 Food Vision 👁🍔 应用，基于 [Streamlit](http:\u002F\u002Fstreamlit.io\u002F) 构建。\n* `app.yaml` - 指定当我们部署应用时，App Engine 应该创建哪种类型的实例。\n* `requirements.txt` - 运行 `app.py` 所需的所有依赖项。\n* `utils.py` - 在 `app.py` 中使用的辅助函数，避免主程序过于庞大。\n\n## 你的应用还可能在哪些地方出错\n\n在教程中（见 [1:32:31](https:\u002F\u002Fyoutu.be\u002Ffw6NMQrYc6w?t=5551) 处），我们看到已部署的应用远非完美，并且指出了几个可能导致应用崩溃的地方，但还有一个需要注意：\n\n默认应用（克隆仓库后得到的版本）支持 3 种模型：\n * 模型 1：来自 [Food101](https:\u002F\u002Fwww.kaggle.com\u002Fdansbecker\u002Ffood-101) 的 10 类食物。\n * 模型 2：同样来自 Food101 的 11 类食物。\n * 模型 3：Food101 的 11 类食物加上 1 类“非食物”类别（来自 ImageNet 的随机图像）。\n\n这些模型都可以通过 [`model_training.ipynb`](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Fblob\u002Fmain\u002Fmodel_training.ipynb) 进行训练。然而，如果你拥有全部 3 种模型，当你在侧边栏中选择除模型 1 之外的其他选项时，应用就会崩溃（应用至少需要一种模型才能运行）。\n\n## 拓展学习\n\n> 我在哪里可以学到这些知识？\n\n正如你可以用不同的层结构构建无限种深度学习神经网络一样，我们在这里所做的只是使用 Google Cloud 部署机器学习模型或应用的一种方式（其他云服务也有类似的功能）。\n\n如果你想深入了解 Google Cloud，我推荐 [Google 的 Qwiklabs](https:\u002F\u002Fgoogle.qwiklabs.com\u002F)。在这里，你可以针对不同场景免费获得使用 Google Cloud 的实战经验。\n\n如果你想进一步了解软件工程与机器学习的交叉领域，可以参考以下博客：\n\n* LJ Miranda 的 [作为研究人员如何提升软件工程技能](https:\u002F\u002Fljvmiranda921.github.io\u002Fnotebook\u002F2020\u002F11\u002F15\u002Fdata-science-swe\u002F)\n* Mark Douthwaite 的 [软件工程与机器学习博客](https:\u002F\u002Fmark.douthwaite.io\u002F)\n\n关于教程中提到的“数据飞轮”概念，可以观看 Josh Tobin 的演讲 [机器学习基础设施栈中的缺失一环](https:\u002F\u002Fyoutu.be\u002Fo4q_ljRkXqw)。\n\n## 扩展建议\n\n> 我该如何扩展这个应用？\n\n**CI\u002FCD** - 在开始构建和发布软件时，你会经常听到这个词。它代表“持续集成\u002F持续交付”。我的理解是：假设你对应用做了一些更改，并希望立即推送给用户，那么你可以使用像 [GitHub Actions](https:\u002F\u002Fgithub.com\u002Ffeatures\u002Factions) 这样的服务来监控 GitHub 仓库中的变化。一旦某个分支发生更改，GitHub Actions 就会执行与我们在此处类似的操作，自动重新部署你的（更新后的）应用。\n * Mark Douthwaite 写过一篇关于 [使用 GitHub Actions 实现 CI\u002FCD 的优秀文章](https:\u002F\u002Fmark.douthwaite.io\u002Fcontinuous-training-and-delivery\u002F)。\n\n**将所有操作代码化！** - 在部署应用时，我们大量使用了 Google Cloud 控制台进行点击操作。但实际上，你可以使用 [`gcloud` SDK](https:\u002F\u002Fcloud.google.com\u002Fsdk) 来完成所有这些步骤，这意味着你可以将整个流程自动化，从而大大减少手动操作！\n\n## 有问题吗？\n\n请发起 [讨论](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Fdiscussions) 或发送邮件至 daniel@mrdbourke.com。","# CS329s 机器学习模型部署快速上手指南\n\n本指南基于斯坦福 CS329s 课程教程，帮助你构建一个基于 Streamlit 的 Web 应用（Food Vision），并将训练好的机器学习模型部署到 Google Cloud Platform (GCP) 上进行推理。\n\n> **⚠️ 费用警告**：本教程涉及 Google Cloud 付费服务。新用户通常有 $300 免费额度，但请务必在使用完毕后**删除资源并关闭服务**，以免产生意外费用。\n\n## 1. 环境准备\n\n在开始之前，请确保你的开发环境满足以下要求：\n\n*   **操作系统**：Linux, macOS 或 Windows (WSL2 推荐)。\n*   **Google Cloud 账号**：已注册并创建一个 [Google Cloud Project](https:\u002F\u002Fcloud.google.com\u002Fresource-manager\u002Fdocs\u002Fcreating-managing-projects)。\n*   **Google Cloud SDK**：已安装 `gcloud` 命令行工具。\n    *   安装指南：[Google Cloud SDK Install](https:\u002F\u002Fcloud.google.com\u002Fsdk\u002Fdocs\u002Finstall)\n    *   *国内提示*：如果下载缓慢，可尝试配置代理或使用国内镜像源加速 Python 包安装。\n*   **Docker**：已安装并运行 Docker Desktop 或 Docker Engine。\n    *   安装指南：[Get Docker](https:\u002F\u002Fdocs.docker.com\u002Fget-docker\u002F)\n*   **Python 环境**：建议 Python 3.8+。\n*   **预训练模型**：本教程使用基于 Food101 数据集训练的图像分类模型（TensorFlow SavedModel 格式）。你可以直接使用仓库提供的 [`model_training.ipynb`](https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\u002Fblob\u002Fmain\u002Fmodel_training.ipynb) 进行训练。\n\n## 2. 安装步骤\n\n### 第一步：克隆项目并设置虚拟环境\n\n```bash\n# 1. 克隆仓库\ngit clone https:\u002F\u002Fgithub.com\u002Fmrdbourke\u002Fcs329s-ml-deployment-tutorial\n\n# 2. 进入项目目录\ncd food-vision\n\n# 3. 创建并激活虚拟环境 (以 'env' 为例)\npip install virtualenv\nvirtualenv env\nsource env\u002Fbin\u002Factivate  # Windows 用户请使用: env\\Scripts\\activate\n\n# 4. 安装依赖 (Streamlit, TensorFlow 等)\n# 国内用户建议使用清华或阿里镜像加速\npip install -r requirements.txt -i https:\u002F\u002Fpypi.tuna.tsinghua.edu.cn\u002Fsimple\n```\n\n### 第二步：本地运行应用（初步测试）\n\n在配置云端模型之前，先在本地启动应用以验证环境。\n\n```bash\nstreamlit run app.py\n```\n\n*   启动后，浏览器会自动打开应用界面。\n*   尝试上传一张图片（例如 `food-images\u002Fice_cream.jpeg`）。\n*   点击 \"Predict\" 按钮。\n*   **预期结果**：此时应用会报错（无法连接 GCP 或凭证错误）。这是正常的，说明本地应用逻辑正常，接下来需要配置云端模型。\n\n### 第三步：在 Google Cloud 上托管模型\n\n要修复上述错误，需将模型部署到 GCP AI Platform。\n\n1.  **创建 Storage Bucket**：\n    在 Google Cloud Console 中创建一个存储桶（Bucket），用于存放模型文件。\n\n2.  **上传模型**：\n    如果你使用 Colab 或已初始化 `gcloud` 的本地终端，执行以下命令上传 `SavedModel`：\n\n    ```bash\n    # 授权并初始化 (如果在 Colab 中运行)\n    # from google.colab import auth\n    # auth.authenticate_user()\n    # !curl https:\u002F\u002Fsdk.cloud.google.com | bash\n    # !gcloud init\n\n    # 上传模型到 Storage Bucket\n    # 替换 \u003CYOUR_MODEL_PATH> 和 \u003CYOUR_GOOGLE_STORAGE_BUCKET>\n    gsutil cp -r \u003CYOUR_MODEL_PATH> \u003CYOUR_GOOGLE_STORAGE_BUCKET>\n    ```\n\n3.  **在 AI Platform 创建模型及版本**：\n    *   在 GCP Console 的 **AI Platform > Models** 中创建新模型（选择靠近你的区域，如 `us-central1`）。\n    *   创建 **Model Version**，框架选择 TensorFlow，并链接到上一步上传的 Google Storage 路径。\n\n4.  **创建服务账号与密钥**：\n    *   在 **IAM & Admin > Service Accounts** 创建新服务账号。\n    *   赋予角色：**ML Engine Developer**。\n    *   创建并下载 **JSON 密钥文件**。\n    *   **安全提示**：切勿将此 JSON 文件提交到 GitHub，请将其添加到 `.gitignore`。\n\n5.  **更新代码配置**：\n    修改 `food-vision\u002Fapp.py` 和 `food-vision\u002Futils.py` 中的配置变量：\n\n    *   **app.py**: 设置凭证路径、项目 ID 和区域\n        ```python\n        # Google Cloud Services look for these when your app runs\n        \n        # 修改为你的密钥文件路径\n        os.environ[\"GOOGLE_APPLICATION_CREDENTIALS\"] = \"\u003CPATH_TO_YOUR_KEY>.json\"\n        \n        # 修改为你的 GCP 项目名和区域\n        PROJECT = \"\u003CYOUR_GCP_PROJECT_NAME>\"\n        REGION = \"\u003CYOUR_GCP_REGION>\" \n        ```\n\n    *   **utils.py**: 设置模型名称\n        ```python\n        classes_and_models = {\n           \"model_1\": {\n               \"classes\": base_classes,\n               # 修改为你在 AI Platform 创建的模型名称\n               \"model_name\": \"\u003CYOUR_AI_PLATFORM_MODEL_NAME>\" \n           }\n        }\n        ```\n\n6.  **验证连接**：\n    重新运行 `streamlit run app.py`，上传图片并点击 \"Predict\"。如果配置正确，应用将返回预测结果。\n\n## 3. 基本使用：部署应用到互联网\n\n完成模型配置后，将整个应用部署到 Google App Engine。\n\n1.  **确保 App Engine API 已启用**：\n    在 Google Cloud Console 中搜索 \"App Engine\" 并完成初始化（选择区域）。\n\n2.  **执行部署命令**：\n    在项目根目录 (`food-vision`) 下运行：\n\n    ```bash\n    make gcloud-deploy\n    ```\n\n    *注：该命令实际上是 `gcloud app deploy app.yaml` 的别名。它会自动构建 Docker 容器，推送到 Google Container Registry (GCR)，并部署到 App Engine。*\n\n3.  **访问应用**：\n    部署过程大约需要 5-10 分钟。完成后，终端会显示应用的公网 URL。点击该链接即可在互联网上访问你的 Food Vision 应用。\n\n---\n**清理资源**：实验结束后，请记得在 Google Cloud Console 中删除 App Engine 版本、AI Platform 模型版本、Storage Bucket 以及服务账号，以避免持续扣费。","一家初创餐饮科技公司急需将实验室中训练好的食物识别模型转化为可对外服务的 Web 应用，以便集成到其智能点餐系统中。\n\n### 没有 cs329s-ml-deployment-tutorial 时\n- 数据科学家缺乏清晰的部署路径，不知道如何将本地的 TensorFlow 模型安全地托管到 Google Cloud AI Platform，导致模型长期闲置在笔记本中。\n- 手动配置 Docker 容器和 Streamlit 前端极易出错，团队往往因环境依赖冲突或缺少最佳实践而耗费数天排查“在我机器上能跑”的问题。\n- 缺乏对云资源成本控制的警示与指导，新手容易忘记关闭服务，导致产生意想不到的高额谷歌云账单。\n- 前后端联调困难，本地开发的 App 无法稳定调用云端模型接口，项目进度严重受阻。\n\n### 使用 cs329s-ml-deployment-tutorial 后\n- 跟随教程三步走策略，团队迅速将食物分类模型部署至 Google Cloud AI Platform，并成功构建了基于 Streamlit 的\"Food Vision\"演示应用。\n- 直接复用经过验证的代码模板和 Docker 配置，避免了环境搭建陷阱，实现了从本地测试到云端托管的无缝衔接。\n- 教程中醒目的成本警告和资源管理指南帮助团队建立了规范的启停流程，有效避免了因遗忘关闭服务而产生的额外费用。\n- 提供了完整的端到端示例（含预训练模型和测试图片），开发人员只需少量修改即可让应用在互联网上运行，大幅缩短上线周期。\n\ncs329s-ml-deployment-tutorial 通过提供标准化的实战代码与避坑指南，将复杂的机器学习工程化部署过程简化为可执行的清晰步骤，显著降低了从实验到生产的门槛。","https:\u002F\u002Foss.gittoolsai.com\u002Fimages\u002Fmrdbourke_cs329s-ml-deployment-tutorial_aa3a70f2.png","mrdbourke","Daniel Bourke","https:\u002F\u002Foss.gittoolsai.com\u002Favatars\u002Fmrdbourke_c53fa7a6.jpg","Machine Learning Engineer live on YouTube.",null,"www.mrdbourke.com","https:\u002F\u002Fgithub.com\u002Fmrdbourke",[84,88,92,96],{"name":85,"color":86,"percentage":87},"Jupyter Notebook","#DA5B0B",95.1,{"name":89,"color":90,"percentage":91},"Python","#3572A5",4.6,{"name":93,"color":94,"percentage":95},"Dockerfile","#384d54",0.1,{"name":97,"color":98,"percentage":95},"Makefile","#427819",615,190,"2026-03-26T19:59:44","MIT",4,"未说明",{"notes":106,"python":104,"dependencies":107},"本教程主要依赖 Google Cloud Platform (GCP) 服务（需付费），包括 AI Platform、Storage Bucket 和 App Engine。用户需自行训练 TensorFlow SavedModel 格式模型并上传至 GCP。本地运行需安装 Docker 以构建容器，并通过 gcloud CLI 进行部署。首次使用 GCP 需注意关闭服务以避免产生费用。",[108,109,110,111,112],"Streamlit","TensorFlow","Google Cloud SDK (gcloud)","Docker","virtualenv",[13],[115,116,117,118,119,120,121,122],"google-cloud","machine-learning","food-vision","deployment-tutorial","cloud-services","tensorflow","machine-learning-deployment","machine-learning-tutorial","2026-03-27T02:49:30.150509","2026-04-06T06:46:08.152908",[],[]]