diff --git a/README.md b/README.md index 25970ddab5a..b7398b89ba3 100644 --- a/README.md +++ b/README.md @@ -147,7 +147,7 @@ Or, through our CLI command: zenml stack deploy --provider aws ``` -Alternatively, if the necessary pieces of infrastructure is already deployed, you can register a cloud stack seamlessly through the stack wizard: +Alternatively, if the necessary pieces of infrastructure are already deployed, you can register a cloud stack seamlessly through the stack wizard: ```bash zenml stack register --provider aws @@ -195,9 +195,9 @@ def trainer(training_df: pd.DataFrame) -> Annotated["model", torch.nn.Module]: ![Exploring ZenML Models](/docs/book/.gitbook/assets/readme_mcp.gif) -### Purpose built for machine learning with integration to you favorite tools +### Purpose built for machine learning with integrations to your favorite tools -While ZenML brings a lot of value of the box, it also integrates into your existing tooling and infrastructure without you having to be locked in. +While ZenML brings a lot of value out of the box, it also integrates into your existing tooling and infrastructure without you having to be locked in. ```python from bentoml._internal.bento import bento @@ -333,7 +333,7 @@ the Apache License Version 2.0. Projects Showcase

- 🎉 Version 0.70.0 is out. Check out the release notes + 🎉 Version 0.71.0 is out. Check out the release notes here.
🖥️ Download our VS Code Extension here. diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 2809cc60164..419d8f03378 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,5 +1,117 @@ +# 0.71.0 + +ZenML version 0.71.0 delivers a new Modal step operator integration as its +core feature, enabling efficient cloud execution for ML pipelines with granular +hardware configuration options. The release strengthens enterprise capabilities +through improved token management and dashboard features, while expanding +artifact handling with dynamic naming and enhanced visualization support. +Additionally, it includes various infrastructure improvements and bug fixes +that enhance the platform's stability and usability, particularly around +Docker connectivity, Kubernetes management, and service connector operations. + +## New Feature: Modal Step Operator Integration + +ZenML now [integrates with Modal](https://modal.com/), bringing lightning-fast +cloud execution capabilities to your ML pipelines. This new step operator[ +https://docs.zenml.io/stack-components/step-operators/modal] allows you to +execute individual pipeline steps on Modal's specialized compute instances, +offering notable speed particularly for Docker image building and hardware +provisioning. With simple configuration options, you can precisely specify +hardware requirements like GPU type, CPU count, and memory for each step, +making it ideal for resource-intensive ML workloads. + +## Other Highlights + +- **Workload API Token Management:** Refactored token management for improved +security with a generic API token dispenser. +- **Dashboard Enhancements:** + - Introduced service account management capabilities. + - Added API key creation and integration features. +- **Dynamic Artifact Naming:** Introduced capability to dynamically name +artifacts. +- **Visualization Enhancements:** Made dictionaries and lists visualizable, +added JSON visualization type. + +## Additional Features and Improvements + +- Improved error messages for Docker daemon connectivity +- Enhanced SageMaker URL handling +- Simplified model version artifact linkage +- Added testing for pipeline templates +- Improved Kubernetes pod and label length management +- Allowed skipping type annotations for step inputs +- Enabled using feature service instances instead of just names + +## Bug Fixes + +- Fixed issues with getting out of an inaccessible active stack +- Fixed race conditions in the service connector type registry +- Resolved migration test complications +- Corrected documentation links +- Fixed artifact store and artifact URI handling +- Addressed various scalability and compatibility issues + +## Documentation Updates + +- Added documentation redirects +- Updated PyTorch documentation links +- Improved service connector documentation + +## What's Changed +* Refactored workload API token management for better security and implemented generic API token dispenser by @stefannica in https://github.com/zenml-io/zenml/pull/3154 +* Add 0.70.0 to the migration tests by @avishniakov in https://github.com/zenml-io/zenml/pull/3190 +* Adjustments to the PR template by @bcdurak in https://github.com/zenml-io/zenml/pull/3194 +* [docs] Fix links in the how-to section of docs by @wjayesh in https://github.com/zenml-io/zenml/pull/3196 +* Fixing sagemaker urls to take the settings into consideration by @bcdurak in https://github.com/zenml-io/zenml/pull/3195 +* Add cached run into testing of migrations by @avishniakov in https://github.com/zenml-io/zenml/pull/3199 +* Fix service connector type registry race conditions by @stefannica in https://github.com/zenml-io/zenml/pull/3202 +* Refactor container resource configuration in Vertex Orchestrator test by @avishniakov in https://github.com/zenml-io/zenml/pull/3203 +* [docs] Add missing redirects by @wjayesh in https://github.com/zenml-io/zenml/pull/3200 +* Add links to `uv` new PyTorch documentation by @strickvl in https://github.com/zenml-io/zenml/pull/3204 +* Fix broken docs link by @strickvl in https://github.com/zenml-io/zenml/pull/3208 +* Bugfix for getting out of an inaccessible active stack with no permissions by @bcdurak in https://github.com/zenml-io/zenml/pull/3198 +* Simplify model version artifact linkage by @schustmi in https://github.com/zenml-io/zenml/pull/3175 +* Reenable macos testing by @avishniakov in https://github.com/zenml-io/zenml/pull/3205 +* Various fixes and improvements by @stefannica in https://github.com/zenml-io/zenml/pull/3211 +* Pass config path during zenml pipeline build by @schustmi in https://github.com/zenml-io/zenml/pull/3212 +* Add test for running templates by @schustmi in https://github.com/zenml-io/zenml/pull/3192 +* Fix service connector docstring by @schustmi in https://github.com/zenml-io/zenml/pull/3216 +* Improve error message when docker daemon is not reachable by @schustmi in https://github.com/zenml-io/zenml/pull/3214 +* Don't run migration for empty updates by @schustmi in https://github.com/zenml-io/zenml/pull/3210 +* Remove `--check` from format script and fix naming by @safoinme in https://github.com/zenml-io/zenml/pull/3218 +* More scalability improvements by @schustmi in https://github.com/zenml-io/zenml/pull/3206 +* Use correct keyword for artifact store open by @schustmi in https://github.com/zenml-io/zenml/pull/3220 +* Fix passing of some sagemaker settings by @schustmi in https://github.com/zenml-io/zenml/pull/3221 +* Add hint when trying to connect with api key by @schustmi in https://github.com/zenml-io/zenml/pull/3222 +* Allow passing None values as parameter for optional complex types by @schustmi in https://github.com/zenml-io/zenml/pull/3215 +* Limit kubernetes pod and label length by @schustmi in https://github.com/zenml-io/zenml/pull/3217 +* Updating the quickstart example to use the new `log_metadata` by @bcdurak in https://github.com/zenml-io/zenml/pull/3188 +* Allow skipping type annotations for step inputs by @schustmi in https://github.com/zenml-io/zenml/pull/3223 +* Modal Step Operator by @strickvl in https://github.com/zenml-io/zenml/pull/2948 +* Add dynamic artifacts naming, documentation and tests by @avishniakov in https://github.com/zenml-io/zenml/pull/3201 +* Run template CLI command and bugfix by @schustmi in https://github.com/zenml-io/zenml/pull/3225 +* Make dicts/lists visualizable and add JSON as viz type by @wjayesh in https://github.com/zenml-io/zenml/pull/2882 +* Instances of the `FeatureService`s are now used instead of only the names of the FeatureServices. by @aiakide in https://github.com/zenml-io/zenml/pull/3209 +* Quickstart fixes by @schustmi in https://github.com/zenml-io/zenml/pull/3227 +* Add missing docs by @schustmi in https://github.com/zenml-io/zenml/pull/3226 +* Misc cleanup by @schustmi in https://github.com/zenml-io/zenml/pull/3229 +* Fix input resolution for steps with dynamic artifact names by @schustmi in https://github.com/zenml-io/zenml/pull/3228 +* Follow-up on the `run_metadata` changes by @bcdurak in https://github.com/zenml-io/zenml/pull/3193 +* Fixed broken links by @htahir1 in https://github.com/zenml-io/zenml/pull/3232 +* Fixed wandb login problem in Quickstart by @htahir1 in https://github.com/zenml-io/zenml/pull/3233 +* Misc bugfixes by @schustmi in https://github.com/zenml-io/zenml/pull/3234 +* Add additional way to fetch docker repo digest by @schustmi in https://github.com/zenml-io/zenml/pull/3231 +* AWS Image Builder implementation by @stefannica in https://github.com/zenml-io/zenml/pull/2904 +* Disable client-side caching for some orchestrators by @schustmi in https://github.com/zenml-io/zenml/pull/3235 +* Fix artifact uris for artifacts with name placeholders by @schustmi in https://github.com/zenml-io/zenml/pull/3237 +* Materializer test fix on Windows by @bcdurak in https://github.com/zenml-io/zenml/pull/3238 +* Fix GET step run endpoint to return unhydrated response if requested by @schustmi in https://github.com/zenml-io/zenml/pull/3240 +* Pipeline run API token fixes and improvements by @stefannica in https://github.com/zenml-io/zenml/pull/3242 + +**Full Changelog**: https://github.com/zenml-io/zenml/compare/0.70.0...0.71.0 + # 0.70.0 The **ZenML 0.70.0** release includes a significant number of database schema changes and migrations, which means upgrading to this version will require extra caution. As always, please make sure to make a copy of your production database before upgrading. diff --git a/examples/quickstart/configs/training_aws.yaml b/examples/quickstart/configs/training_aws.yaml index a6defc08198..2a443a41bb3 100644 --- a/examples/quickstart/configs/training_aws.yaml +++ b/examples/quickstart/configs/training_aws.yaml @@ -1,7 +1,7 @@ # Environment configuration settings: docker: - parent_image: "715803424590.dkr.ecr.eu-central-1.amazonaws.com/zenml-public-pipelines:quickstart-0.70.0-py3.11-aws" + parent_image: "715803424590.dkr.ecr.eu-central-1.amazonaws.com/zenml-public-pipelines:quickstart-0.71.0-py3.11-aws" skip_build: True # If you switch this to False remove the parent_image requirements: requirements.txt environment: diff --git a/examples/quickstart/configs/training_azure.yaml b/examples/quickstart/configs/training_azure.yaml index f126792f56d..bcd945e1b08 100644 --- a/examples/quickstart/configs/training_azure.yaml +++ b/examples/quickstart/configs/training_azure.yaml @@ -1,7 +1,7 @@ # Environment configuration settings: docker: - parent_image: "zenmldocker/zenml-public-pipelines:quickstart-0.70.0-py3.11-azure" + parent_image: "zenmldocker/zenml-public-pipelines:quickstart-0.71.0-py3.11-azure" skip_build: True requirements: requirements.txt environment: diff --git a/examples/quickstart/configs/training_gcp.yaml b/examples/quickstart/configs/training_gcp.yaml index eadd19f3616..f22b355efff 100644 --- a/examples/quickstart/configs/training_gcp.yaml +++ b/examples/quickstart/configs/training_gcp.yaml @@ -1,7 +1,7 @@ # Environment configuration settings: docker: - parent_image: "zenmldocker/zenml-public-pipelines:quickstart-0.70.0-py3.11-gcp" + parent_image: "zenmldocker/zenml-public-pipelines:quickstart-0.71.0-py3.11-gcp" skip_build: True requirements: requirements.txt environment: diff --git a/examples/quickstart/quickstart.ipynb b/examples/quickstart/quickstart.ipynb index 58cd9c79c0f..dccd1fd4d31 100644 --- a/examples/quickstart/quickstart.ipynb +++ b/examples/quickstart/quickstart.ipynb @@ -1,753 +1,753 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "63ab391a", - "metadata": {}, - "source": [ - "# ZenML Quickstart: Bridging Local Development and Cloud Deployment\n", - "\n", - "This repository demonstrates how ZenML streamlines the transition of machine learning workflows from local environments to cloud-scale operations.\n", - "\n", - "## Key advantages:\n", - "\n", - "Deploy to major cloud providers with minimal code changes\n", - "\n", - "* Connect directly to your existing infrastructure\n", - "* Bridge the gap between ML and Ops teams\n", - "* Gain deep insights into pipeline metadata via the ZenML Dashboard\n", - "\n", - "Unlike traditional MLOps tools, ZenML offers unparalleled flexibility and control. It integrates seamlessly with your infrastructure, allowing both ML and Ops teams to collaborate effectively without compromising on their specific requirements.\n", - "\n", - "The notebook guides you through adapting local code for cloud deployment, showcasing ZenML's ability to enhance workflow efficiency while maintaining reproducibility and auditability in production.\n", - "\n", - "Ready to unify your ML development and operations? Let's begin. The diagram below \n", - "describes what we'll show you in this example.\n", - "\n", - "\"Pipelines\n", - "\n", - "1) We have done some of the experimenting for you already and created a simple finetuning pipeline for a text-to-text task.\n", - "\n", - "2) We will run this pipeline on your machine and a verify that everything works as expected.\n", - "\n", - "3) Now we'll connect ZenML to your infrastructure and configure everything.\n", - "\n", - "4) Finally, we are ready to run our code remotely.\n", - "\n", - "Follow along this notebook to understand how you can use ZenML to productionalize your ML workflows!" - ] - }, - { - "cell_type": "markdown", - "id": "8f466b16", - "metadata": {}, - "source": [ - "## Run on Colab\n", - "\n", - "You can use Google Colab to run this notebook, no local installation\n", - "required!\n", - "\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](\n", - "https://colab.research.google.com/github/zenml-io/zenml/blob/main/examples/quickstart/quickstart.ipynb)" - ] - }, - { - "cell_type": "markdown", - "id": "66b2977c", - "metadata": {}, - "source": [ - "# 👶 Step 0. Install Requirements\n", - "\n", - "Let's install ZenML and all requirement to get started." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e4385bdb-6cc8-4a6b-8de2-a7fd658556aa", - "metadata": {}, - "outputs": [], - "source": [ - "# Choose a cloud provider - at the end of this notebook you will run a pipeline on this cloud provider\n", - "CLOUD_PROVIDER = None # Set this to \"GCP\", \"AWS\" or \"AZURE\" as needed\n", - "\n", - "\n", - "def in_google_colab() -> bool:\n", - " \"\"\"Checks wether this notebook is run in google colab.\"\"\"\n", - " try:\n", - " import google.colab # noqa\n", - "\n", - " return True\n", - "\n", - " except ModuleNotFoundError:\n", - " return False\n", - "\n", - "\n", - "if in_google_colab():\n", - " # Pull required modules from this example\n", - " !git clone -b main https://github.com/zenml-io/zenml\n", - " !cp -r zenml/examples/quickstart/* .\n", - " !rm -rf zenml\n", - "\n", - "\n", - "# Common imports and setup\n", - "if CLOUD_PROVIDER.lower() == \"gcp\":\n", - " !pip install -r requirements_gcp.txt\n", - "\n", - "elif CLOUD_PROVIDER.lower() == \"aws\":\n", - " !pip install -r requirements_aws.txt\n", - "\n", - "elif CLOUD_PROVIDER.lower() == \"azure\":\n", - " !pip install -r requirements_azure.txt\n", - "\n", - "else: # In this case the second half of the notebook won't work for you\n", - " !pip install -r requirements.txt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f76f562e", - "metadata": {}, - "outputs": [], - "source": [ - "# Restart Kernel to ensure all libraries are properly loaded\n", - "import IPython\n", - "\n", - "IPython.Application.instance().kernel.do_shutdown(restart=True)" - ] - }, - { - "cell_type": "markdown", - "id": "3b044374", - "metadata": {}, - "source": [ - "\n", - "Please wait for the installation to complete before running subsequent cells. At\n", - "the end of the installation, the notebook kernel will restart." - ] - }, - { - "cell_type": "markdown", - "id": "966ce581", - "metadata": {}, - "source": [ - "## ☁️ Step 1: Connect to your ZenML Server\n", - "To run this quickstart you need to connect to a ZenML Server. You can deploy it [yourself on your own infrastructure](https://docs.zenml.io/getting-started/deploying-zenml) or try it out for free, no credit-card required in our [ZenML Pro managed service](https://zenml.io/pro)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e2587315", - "metadata": {}, - "outputs": [], - "source": [ - "zenml_server_url = (\n", - " None # INSERT URL TO SERVER HERE in the form \"https://URL_TO_SERVER\"\n", - ")\n", - "\n", - "assert zenml_server_url\n", - "\n", - "!zenml login $zenml_server_url" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f78a2f42-7a53-45f1-b45b-77bfc3762260", - "metadata": {}, - "outputs": [], - "source": [ - "# Disable wandb\n", - "import os\n", - "\n", - "os.environ[\"WANDB_DISABLED\"] = \"true\"\n", - "\n", - "# Initialize ZenML and define the root for imports and docker builds\n", - "!zenml init\n", - "\n", - "!zenml stack set default" - ] - }, - { - "cell_type": "markdown", - "id": "35e48460", - "metadata": {}, - "source": [ - "## 🥇 Step 2: Build and run your first pipeline\n", - "\n", - "In this quickstart we'll be working with a small dataset of sentences in old english paired with more modern formulations. The task is a text-to-text transformation.\n", - "\n", - "When you're getting started with a machine learning problem you'll want to break down your code into distinct functions that load your data, bring it into the correct shape and finally produce a model. This is the experimentation phase where we try to massage our data into the right format and feed it into our model training.\n", - "\n", - "\"Experimentation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3cd974d1", - "metadata": {}, - "outputs": [], - "source": [ - "import requests\n", - "from datasets import Dataset\n", - "from typing_extensions import Annotated\n", - "\n", - "from zenml import step\n", - "\n", - "PROMPT = \"\" # In case you want to also use a prompt you can set it here\n", - "\n", - "\n", - "def read_data_from_url(url):\n", - " \"\"\"Reads data from url.\n", - "\n", - " Assumes the individual data points are linebreak separated\n", - " and input, targets are separated by a `|` pipe.\n", - " \"\"\"\n", - " inputs = []\n", - " targets = []\n", - "\n", - " response = requests.get(url)\n", - " response.raise_for_status() # Raise an exception for bad responses\n", - "\n", - " for line in response.text.splitlines():\n", - " old, modern = line.strip().split(\"|\")\n", - " inputs.append(f\"{PROMPT}{old}\")\n", - " targets.append(modern)\n", - "\n", - " return {\"input\": inputs, \"target\": targets}\n", - "\n", - "\n", - "@step\n", - "def load_data(\n", - " data_url: str,\n", - ") -> Annotated[Dataset, \"full_dataset\"]:\n", - " \"\"\"Load and prepare the dataset.\"\"\"\n", - "\n", - " # Fetch and process the data\n", - " data = read_data_from_url(data_url)\n", - "\n", - " # Convert to Dataset\n", - " return Dataset.from_dict(data)" - ] - }, - { - "cell_type": "markdown", - "id": "b6286b67", - "metadata": {}, - "source": [ - "ZenML is built in a way that allows you to experiment with your data and build\n", - "your pipelines one step at a time. If you want to call this function to see how it\n", - "works, you can just call it directly. Here we take a look at the first few rows\n", - "of your training dataset." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d838e2ea", - "metadata": {}, - "outputs": [], - "source": [ - "data_source = \"https://storage.googleapis.com/zenml-public-bucket/quickstart-files/translations.txt\"\n", - "\n", - "dataset = load_data(data_url=data_source)\n", - "print(f\"Input: {dataset['input'][1]} - Target: {dataset['target'][1]}\")" - ] - }, - { - "cell_type": "markdown", - "id": "28c05291", - "metadata": {}, - "source": [ - "Everything looks as we'd expect and the input/output pair looks to be in the right format 🥳.\n", - "\n", - "For the sake of this quickstart we have prepared a few steps in the steps-directory. We'll now connect these together into a pipeline. To do this simply plug multiple steps together through their inputs and outputs. Then just add the `@pipeline` decorator to the function that connects the steps." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b50a9537", - "metadata": {}, - "outputs": [], - "source": [ - "import materializers\n", - "from steps import (\n", - " evaluate_model,\n", - " load_data,\n", - " split_dataset,\n", - " test_model,\n", - " tokenize_data,\n", - " train_model,\n", - ")\n", - "from steps.model_trainer import T5_Model\n", - "\n", - "from zenml import Model, pipeline\n", - "from zenml.client import Client\n", - "\n", - "assert materializers\n", - "\n", - "# Initialize the ZenML client to fetch objects from the ZenML Server\n", - "client = Client()\n", - "\n", - "Client().activate_stack(\n", - " \"default\"\n", - ") # We will start by using the default stack which is local\n", - "\n", - "model_name = \"YeOldeEnglishTranslator\"\n", - "model = Model(\n", - " name=\"YeOldeEnglishTranslator\",\n", - " description=\"Model to translate from old to modern english\",\n", - " tags=[\"quickstart\", \"llm\", \"t5\"],\n", - ")\n", - "\n", - "\n", - "@pipeline(model=model)\n", - "def english_translation_pipeline(\n", - " data_url: str,\n", - " model_type: T5_Model,\n", - " per_device_train_batch_size: int,\n", - " gradient_accumulation_steps: int,\n", - " dataloader_num_workers: int,\n", - " num_train_epochs: int = 5,\n", - "):\n", - " \"\"\"Define a pipeline that connects the steps.\"\"\"\n", - " full_dataset = load_data(data_url)\n", - " tokenized_dataset, tokenizer = tokenize_data(\n", - " dataset=full_dataset, model_type=model_type\n", - " )\n", - " tokenized_train_dataset, tokenized_eval_dataset, tokenized_test_dataset = (\n", - " split_dataset(\n", - " tokenized_dataset,\n", - " train_size=0.7,\n", - " test_size=0.1,\n", - " eval_size=0.2,\n", - " subset_size=0.1, # We use a subset of the dataset to speed things up\n", - " random_state=42,\n", - " )\n", - " )\n", - " model = train_model(\n", - " tokenized_dataset=tokenized_train_dataset,\n", - " model_type=model_type,\n", - " num_train_epochs=num_train_epochs,\n", - " per_device_train_batch_size=per_device_train_batch_size,\n", - " gradient_accumulation_steps=gradient_accumulation_steps,\n", - " dataloader_num_workers=dataloader_num_workers,\n", - " )\n", - " evaluate_model(model=model, tokenized_dataset=tokenized_eval_dataset)\n", - " test_model(\n", - " model=model,\n", - " tokenized_test_dataset=tokenized_test_dataset,\n", - " tokenizer=tokenizer,\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "7cd73c23", - "metadata": {}, - "source": [ - "We're ready to run the pipeline now, which we can do just as with the step - by calling the\n", - "pipeline function itself:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1e0aa9af", - "metadata": {}, - "outputs": [], - "source": [ - "# Run the pipeline and configure some parameters at runtime\n", - "pipeline_run = english_translation_pipeline(\n", - " data_url=\"https://storage.googleapis.com/zenml-public-bucket/quickstart-files/translations.txt\",\n", - " model_type=\"t5-small\",\n", - " num_train_epochs=1, # to make this demo fast, we start at 1 epoch\n", - " per_device_train_batch_size=2,\n", - " gradient_accumulation_steps=4,\n", - " dataloader_num_workers=4,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "6c42078a", - "metadata": {}, - "source": [ - "As you can see the pipeline has run successfully. Here is a sneak-peak of the dashboard view into this pipeline. The URL for this view can be found in the logs.\n", - "\n", - "\"Dashboard\n", - "\n", - "This isn't all that the ZenML Dashboard has to offer, if you navigate over to the ZenML Model control plane, you'll also find the produced model along with a lot of important metadata.\n", - "\n", - "\"Model\n", - "\n", - "Here you'll also see a collection of example Input-Output pairs. As you can see, the model is currently not performing its task well." - ] - }, - { - "cell_type": "markdown", - "id": "a037f09d", - "metadata": {}, - "source": [ - "We can now access the trained model and it's tokenizer from the ZenML Model Control Plane. This will allow us to interact with the model directly." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "53e514ac-1a0a-49a0-b8a4-e33cee12c765", - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "\n", - "# load the model object\n", - "model = client.get_model_version(model_name).get_model_artifact(\"model\").load()\n", - "tokenizer = (\n", - " client.get_model_version(model_name).get_artifact(\"tokenizer\").load()\n", - ")\n", - "\n", - "test_text = \"I do desire we may be better strangers\" # Insert your own test sentence here\n", - "\n", - "input_ids = tokenizer(\n", - " test_text,\n", - " return_tensors=\"pt\",\n", - " max_length=128,\n", - " truncation=True,\n", - " padding=\"max_length\",\n", - ").input_ids\n", - "\n", - "with torch.no_grad():\n", - " outputs = model.generate(\n", - " input_ids,\n", - " max_length=128,\n", - " num_return_sequences=1,\n", - " no_repeat_ngram_size=2,\n", - " top_k=50,\n", - " top_p=0.95,\n", - " temperature=0.7,\n", - " )\n", - "\n", - "decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)\n", - "\n", - "print(decoded_output)" - ] - }, - { - "cell_type": "markdown", - "id": "1e653c7a-4073-424e-8a59-c69f49526b96", - "metadata": {}, - "source": [ - "## Lets recap what we've done so far\n", - "\n", - "We created a modular pipeline, this pipeline is modularly constructed from different steps. We have shown that this pipeline runs locally.\n", - "\n", - "As expected, the modcel does not yet solve its task. To train a model that can solve our task well, we would have to train a larger model for longer. For this, we'll need to move away from our local environment. " - ] - }, - { - "cell_type": "markdown", - "id": "8c28b474", - "metadata": {}, - "source": [ - "# ⌚ Step 3: Scale it up in the cloud" - ] - }, - { - "cell_type": "markdown", - "id": "a791b32b-f6be-4ae2-867c-5e628f363858", - "metadata": {}, - "source": [ - "Our last section confirmed to us, that the pipeline works. Let's now run the pipeline in the environment of your choice.\n", - "\n", - "For you to be able to try this step, you will need to have access to a cloud environment (AWS, GCP, AZURE). ZenML wraps around all the major cloud providers and orchestration tools and lets you easily deploy your code onto them.\n", - "\n", - "To do this lets head over to the `Stack` section of your ZenML Dashboard. Here you'll be able to either connect to an existing or deploy a new environment. Choose on of the options presented to you there and come back when you have a stack ready to go. \n", - "\n", - "\"Stack\n", - "\n", - "Then proceed to the section below. Also be sure that you are running with a remote ZenML server (see Step 1 above)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2e7a90b5-78c3-4019-8a81-671b5d62d470", - "metadata": {}, - "outputs": [], - "source": [ - "from zenml.environment import Environment\n", - "\n", - "# Set the cloud provider here\n", - "CLOUD_PROVIDER = None # Set this to \"GCP\", \"AWS\" or \"AZURE\"\n", - "assert CLOUD_PROVIDER\n", - "\n", - "# Set the name of the stack that you created within zenml\n", - "stack_name = None # Set this\n", - "assert stack_name # Set your stack, follow instruction above\n", - "\n", - "from zenml import pipeline\n", - "from zenml.client import Client\n", - "from zenml.config import DockerSettings\n", - "\n", - "settings = {}\n", - "\n", - "# Common imports and setup\n", - "if CLOUD_PROVIDER.lower() == \"gcp\":\n", - " parent_image = (\n", - " \"zenmldocker/zenml-public-pipelines:quickstart-0.70.0-py3.11-gcp\"\n", - " )\n", - " skip_build = True\n", - "\n", - "elif CLOUD_PROVIDER.lower() == \"aws\":\n", - " from zenml.integrations.aws.flavors.sagemaker_orchestrator_flavor import (\n", - " SagemakerOrchestratorSettings,\n", - " )\n", - "\n", - " parent_image = \"339712793861.dkr.ecr.eu-central-1.amazonaws.com/zenml-public-pipelines:quickstart-0.70.0-py3.11-aws\"\n", - " skip_build = True # if you switch this to False, you need to remove the parent image\n", - "\n", - " settings[\"orchestrator.sagemaker\"] = SagemakerOrchestratorSettings(\n", - " instance_type=\"ml.m5.4xlarge\"\n", - " )\n", - "\n", - "elif CLOUD_PROVIDER.lower() == \"azure\":\n", - " parent_image = (\n", - " \"zenmldocker/zenml-public-pipelines:quickstart-0.70.0-py3.11-azure\"\n", - " )\n", - " skip_build = True\n", - "\n", - "Client().activate_stack(stack_name)\n", - "\n", - "data_source = \"https://storage.googleapis.com/zenml-public-bucket/quickstart-files/translations.txt\"\n", - "\n", - "# We've prebuilt a docker image for this quickstart to speed things up, feel free to delete the DockerSettings to build from scratch\n", - "settings[\"docker\"] = DockerSettings(\n", - " parent_image=parent_image, skip_build=skip_build\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "8a5cd031-0661-4073-a5ea-6aac5f989212", - "metadata": {}, - "source": [ - "If you are in a google colab you might need to rerun the cell above a second time after the runtime restarted." - ] - }, - { - "cell_type": "markdown", - "id": "95f17b7a-5a82-4975-b9bd-6a63fbb97a68", - "metadata": {}, - "source": [ - "## 🚀 Ready to launch" - ] - }, - { - "cell_type": "markdown", - "id": "df14f30c-9a8e-46ca-ba44-cf16ea715dac", - "metadata": {}, - "source": [ - "We now have configured a ZenML stack that represents your very own cloud infrastructure. For the next pipeline run, we'll be training the same t5 model (`t5_small`) on your own infrastrucutre.\n", - "\n", - "Note: The whole process may take a bit longer the first time around, as your pipeline code needs to be built into docker containers to be run in the orchestration environment of your stack. Any consecutive run of the pipeline, even with different parameters set, will not take as long again thanks to docker caching." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cfad9bd5", - "metadata": {}, - "outputs": [], - "source": [ - "# In the case that we are within a colab environment we want to remove\n", - "# these folders\n", - "if Environment.in_google_colab():\n", - " !rm -rf results\n", - " !rm -rf sample_data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "12e758fe-6ea3-42ff-bea8-33953135bf6b", - "metadata": {}, - "outputs": [], - "source": [ - "from pipelines import (\n", - " english_translation_pipeline,\n", - ")\n", - "\n", - "from zenml import Model\n", - "\n", - "model_name = \"YeOldeEnglishTranslator\"\n", - "model = Model(\n", - " name=\"YeOldeEnglishTranslator\",\n", - ")\n", - "\n", - "pipeline_run = english_translation_pipeline.with_options(\n", - " settings=settings, model=model\n", - ")(\n", - " data_url=\"https://storage.googleapis.com/zenml-public-bucket/quickstart-files/translations.txt\",\n", - " model_type=\"t5-small\",\n", - " num_train_epochs=2,\n", - " per_device_train_batch_size=4,\n", - " gradient_accumulation_steps=4,\n", - " dataloader_num_workers=0, # Some cloud environment don't support multiple of these\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "c7eef2c6-6dfb-4b67-9883-594a0df20173", - "metadata": {}, - "source": [ - "You did it! You build a pipeline locally, verified that all its parts work well together and now are running it on a production environment\n", - "\n", - "\"Pipeline\n", - "\n", - "Depending on the backend you chose, you can also go inspect your run in the orchestrator of your choice. Here is an example on GCP Vertex:\n", - "\n", - "\"Pipeline" - ] - }, - { - "cell_type": "markdown", - "id": "1a03054e-8b3e-4edb-9d87-82ae51693d2d", - "metadata": {}, - "source": [ - "## Adding Accelerators\n", - "Each of the cloud providers allows users to add accelerators to their serverless offerings. Here's what you need to add to the pipeline settings in order to unlock gpus. Keep in mind, that you might have to increase your quotas within the cloud providers." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a2f7c3c5-dcde-4824-a012-f3da224cb8d8", - "metadata": {}, - "outputs": [], - "source": [ - "from zenml.config import ResourceSettings\n", - "\n", - "if CLOUD_PROVIDER == \"GCP\":\n", - " from zenml.integrations.gcp.flavors.vertex_orchestrator_flavor import (\n", - " VertexOrchestratorSettings,\n", - " )\n", - "\n", - " # find out about your options here: https://docs.zenml.io/stack-components/orchestrators/vertex#additional-configuration\n", - "\n", - " english_translation_pipeline.with_options(\n", - " settings={\n", - " \"orchestrator.vertex\": VertexOrchestratorSettings(\n", - " node_selector_constraint=(\n", - " \"cloud.google.com/gke-accelerator\",\n", - " \"NVIDIA_TESLA_P4\",\n", - " )\n", - " ),\n", - " \"resources\": ResourceSettings(memory=\"32GB\", gpu_count=1),\n", - " }\n", - " )\n", - "if CLOUD_PROVIDER == \"AWS\":\n", - " from zenml.integrations.aws.flavors.sagemaker_orchestrator_flavor import (\n", - " SagemakerOrchestratorSettings,\n", - " )\n", - "\n", - " # find out your options here: https://docs.zenml.io/stack-components/orchestrators/sagemaker#configuration-at-pipeline-or-step-level\n", - "\n", - " english_translation_pipeline.with_options(\n", - " settings={\n", - " \"orchestrator.sagemaker\": SagemakerOrchestratorSettings(\n", - " instance_type=\"ml.p2.xlarge\"\n", - " )\n", - " }\n", - " )\n", - "if CLOUD_PROVIDER == \"AZURE\":\n", - " from zenml.integrations.azure.flavors import AzureMLOrchestratorSettings\n", - "\n", - " # find out your options here: https://docs.zenml.io/stack-components/orchestrators/azureml#settings\n", - " # The quickest way is probably to configure a compute-instance in azure ml. This instance should contain\n", - " # a gpu. Then specify the name of the compute instance here.\n", - "\n", - " compute_name = None # Insert the name of your compute instance here\n", - "\n", - " english_translation_pipeline.with_options(\n", - " settings={\n", - " \"orchestrator.azureml\": AzureMLOrchestratorSettings(\n", - " mode=\"compute-instance\", compute_name=compute_name\n", - " )\n", - " }\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "8231677c-1fd6-4ec3-8c8c-47fd9406072e", - "metadata": {}, - "source": [ - "## Now it's up to you" - ] - }, - { - "cell_type": "markdown", - "id": "90c31e0d-dfab-4692-a406-0dc439f25443", - "metadata": {}, - "source": [ - "You can now start worrying about making the model actually work well on our toy example or any other dataset you like.\n", - "\n", - "Here are some things that you could do:\n", - "\n", - "* Iterate on the training data and its tokenization\n", - "* You can switch out the model itself. Instead of `model_type=\"t5_small\"` you could use `model_type=\"t5_large\"` for example\n", - "* You can train for longer by increasing the `num_train_epochs=xxx`. In order to speed this up you can also add accelerators to your orchestrators. Learn more about this in the section below.\n", - "\n", - "No matter what avenue you choose to actually make the model work, we would love to see how you did it, so please reach out and share your solution with us either on [**Slack Community**](https://zenml.io/slack) or through our email hello@zenml.io." - ] - }, - { - "cell_type": "markdown", - "id": "594ee4fc-f102-4b99-bdc3-2f1670c87679", - "metadata": {}, - "source": [ - "## Further exploration\n", - "\n", - "This was just the tip of the iceberg of what ZenML can do; check out the [**docs**](https://docs.zenml.io/) to learn more\n", - "about the capabilities of ZenML. For example, you might want to:\n", - "\n", - "- [Deploy ZenML](https://docs.zenml.io/user-guide/production-guide/connect-deployed-zenml) to collaborate with your colleagues.\n", - "- Run the same pipeline on a [cloud MLOps stack in production](https://docs.zenml.io/user-guide/production-guide/cloud-stack).\n", - "- Track your metrics in an experiment tracker like [MLflow](https://docs.zenml.io/stacks-and-components/component-guide/experiment-trackers/mlflow).\n", - "\n", - "## What next?\n", - "\n", - "* If you have questions or feedback... join our [**Slack Community**](https://zenml.io/slack) and become part of the ZenML family!\n", - "* If you want to quickly get started with ZenML, check out [ZenML Pro](https://zenml.io/pro)." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 + "cells": [ + { + "cell_type": "markdown", + "id": "63ab391a", + "metadata": {}, + "source": [ + "# ZenML Quickstart: Bridging Local Development and Cloud Deployment\n", + "\n", + "This repository demonstrates how ZenML streamlines the transition of machine learning workflows from local environments to cloud-scale operations.\n", + "\n", + "## Key advantages:\n", + "\n", + "Deploy to major cloud providers with minimal code changes\n", + "\n", + "* Connect directly to your existing infrastructure\n", + "* Bridge the gap between ML and Ops teams\n", + "* Gain deep insights into pipeline metadata via the ZenML Dashboard\n", + "\n", + "Unlike traditional MLOps tools, ZenML offers unparalleled flexibility and control. It integrates seamlessly with your infrastructure, allowing both ML and Ops teams to collaborate effectively without compromising on their specific requirements.\n", + "\n", + "The notebook guides you through adapting local code for cloud deployment, showcasing ZenML's ability to enhance workflow efficiency while maintaining reproducibility and auditability in production.\n", + "\n", + "Ready to unify your ML development and operations? Let's begin. The diagram below \n", + "describes what we'll show you in this example.\n", + "\n", + "\"Pipelines\n", + "\n", + "1) We have done some of the experimenting for you already and created a simple finetuning pipeline for a text-to-text task.\n", + "\n", + "2) We will run this pipeline on your machine and a verify that everything works as expected.\n", + "\n", + "3) Now we'll connect ZenML to your infrastructure and configure everything.\n", + "\n", + "4) Finally, we are ready to run our code remotely.\n", + "\n", + "Follow along this notebook to understand how you can use ZenML to productionalize your ML workflows!" + ] + }, + { + "cell_type": "markdown", + "id": "8f466b16", + "metadata": {}, + "source": [ + "## Run on Colab\n", + "\n", + "You can use Google Colab to run this notebook, no local installation\n", + "required!\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](\n", + "https://colab.research.google.com/github/zenml-io/zenml/blob/main/examples/quickstart/quickstart.ipynb)" + ] + }, + { + "cell_type": "markdown", + "id": "66b2977c", + "metadata": {}, + "source": [ + "# 👶 Step 0. Install Requirements\n", + "\n", + "Let's install ZenML and all requirement to get started." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e4385bdb-6cc8-4a6b-8de2-a7fd658556aa", + "metadata": {}, + "outputs": [], + "source": [ + "# Choose a cloud provider - at the end of this notebook you will run a pipeline on this cloud provider\n", + "CLOUD_PROVIDER = None # Set this to \"GCP\", \"AWS\" or \"AZURE\" as needed\n", + "\n", + "\n", + "def in_google_colab() -> bool:\n", + " \"\"\"Checks wether this notebook is run in google colab.\"\"\"\n", + " try:\n", + " import google.colab # noqa\n", + "\n", + " return True\n", + "\n", + " except ModuleNotFoundError:\n", + " return False\n", + "\n", + "\n", + "if in_google_colab():\n", + " # Pull required modules from this example\n", + " !git clone -b main https://github.com/zenml-io/zenml\n", + " !cp -r zenml/examples/quickstart/* .\n", + " !rm -rf zenml\n", + "\n", + "\n", + "# Common imports and setup\n", + "if CLOUD_PROVIDER.lower() == \"gcp\":\n", + " !pip install -r requirements_gcp.txt\n", + "\n", + "elif CLOUD_PROVIDER.lower() == \"aws\":\n", + " !pip install -r requirements_aws.txt\n", + "\n", + "elif CLOUD_PROVIDER.lower() == \"azure\":\n", + " !pip install -r requirements_azure.txt\n", + "\n", + "else: # In this case the second half of the notebook won't work for you\n", + " !pip install -r requirements.txt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f76f562e", + "metadata": {}, + "outputs": [], + "source": [ + "# Restart Kernel to ensure all libraries are properly loaded\n", + "import IPython\n", + "\n", + "IPython.Application.instance().kernel.do_shutdown(restart=True)" + ] + }, + { + "cell_type": "markdown", + "id": "3b044374", + "metadata": {}, + "source": [ + "\n", + "Please wait for the installation to complete before running subsequent cells. At\n", + "the end of the installation, the notebook kernel will restart." + ] + }, + { + "cell_type": "markdown", + "id": "966ce581", + "metadata": {}, + "source": [ + "## ☁️ Step 1: Connect to your ZenML Server\n", + "To run this quickstart you need to connect to a ZenML Server. You can deploy it [yourself on your own infrastructure](https://docs.zenml.io/getting-started/deploying-zenml) or try it out for free, no credit-card required in our [ZenML Pro managed service](https://zenml.io/pro)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2587315", + "metadata": {}, + "outputs": [], + "source": [ + "zenml_server_url = (\n", + " None # INSERT URL TO SERVER HERE in the form \"https://URL_TO_SERVER\"\n", + ")\n", + "\n", + "assert zenml_server_url\n", + "\n", + "!zenml login $zenml_server_url" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f78a2f42-7a53-45f1-b45b-77bfc3762260", + "metadata": {}, + "outputs": [], + "source": [ + "# Disable wandb\n", + "import os\n", + "\n", + "os.environ[\"WANDB_DISABLED\"] = \"true\"\n", + "\n", + "# Initialize ZenML and define the root for imports and docker builds\n", + "!zenml init\n", + "\n", + "!zenml stack set default" + ] + }, + { + "cell_type": "markdown", + "id": "35e48460", + "metadata": {}, + "source": [ + "## 🥇 Step 2: Build and run your first pipeline\n", + "\n", + "In this quickstart we'll be working with a small dataset of sentences in old english paired with more modern formulations. The task is a text-to-text transformation.\n", + "\n", + "When you're getting started with a machine learning problem you'll want to break down your code into distinct functions that load your data, bring it into the correct shape and finally produce a model. This is the experimentation phase where we try to massage our data into the right format and feed it into our model training.\n", + "\n", + "\"Experimentation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3cd974d1", + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "from datasets import Dataset\n", + "from typing_extensions import Annotated\n", + "\n", + "from zenml import step\n", + "\n", + "PROMPT = \"\" # In case you want to also use a prompt you can set it here\n", + "\n", + "\n", + "def read_data_from_url(url):\n", + " \"\"\"Reads data from url.\n", + "\n", + " Assumes the individual data points are linebreak separated\n", + " and input, targets are separated by a `|` pipe.\n", + " \"\"\"\n", + " inputs = []\n", + " targets = []\n", + "\n", + " response = requests.get(url)\n", + " response.raise_for_status() # Raise an exception for bad responses\n", + "\n", + " for line in response.text.splitlines():\n", + " old, modern = line.strip().split(\"|\")\n", + " inputs.append(f\"{PROMPT}{old}\")\n", + " targets.append(modern)\n", + "\n", + " return {\"input\": inputs, \"target\": targets}\n", + "\n", + "\n", + "@step\n", + "def load_data(\n", + " data_url: str,\n", + ") -> Annotated[Dataset, \"full_dataset\"]:\n", + " \"\"\"Load and prepare the dataset.\"\"\"\n", + "\n", + " # Fetch and process the data\n", + " data = read_data_from_url(data_url)\n", + "\n", + " # Convert to Dataset\n", + " return Dataset.from_dict(data)" + ] + }, + { + "cell_type": "markdown", + "id": "b6286b67", + "metadata": {}, + "source": [ + "ZenML is built in a way that allows you to experiment with your data and build\n", + "your pipelines one step at a time. If you want to call this function to see how it\n", + "works, you can just call it directly. Here we take a look at the first few rows\n", + "of your training dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d838e2ea", + "metadata": {}, + "outputs": [], + "source": [ + "data_source = \"https://storage.googleapis.com/zenml-public-bucket/quickstart-files/translations.txt\"\n", + "\n", + "dataset = load_data(data_url=data_source)\n", + "print(f\"Input: {dataset['input'][1]} - Target: {dataset['target'][1]}\")" + ] + }, + { + "cell_type": "markdown", + "id": "28c05291", + "metadata": {}, + "source": [ + "Everything looks as we'd expect and the input/output pair looks to be in the right format 🥳.\n", + "\n", + "For the sake of this quickstart we have prepared a few steps in the steps-directory. We'll now connect these together into a pipeline. To do this simply plug multiple steps together through their inputs and outputs. Then just add the `@pipeline` decorator to the function that connects the steps." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b50a9537", + "metadata": {}, + "outputs": [], + "source": [ + "import materializers\n", + "from steps import (\n", + " evaluate_model,\n", + " load_data,\n", + " split_dataset,\n", + " test_model,\n", + " tokenize_data,\n", + " train_model,\n", + ")\n", + "from steps.model_trainer import T5_Model\n", + "\n", + "from zenml import Model, pipeline\n", + "from zenml.client import Client\n", + "\n", + "assert materializers\n", + "\n", + "# Initialize the ZenML client to fetch objects from the ZenML Server\n", + "client = Client()\n", + "\n", + "Client().activate_stack(\n", + " \"default\"\n", + ") # We will start by using the default stack which is local\n", + "\n", + "model_name = \"YeOldeEnglishTranslator\"\n", + "model = Model(\n", + " name=\"YeOldeEnglishTranslator\",\n", + " description=\"Model to translate from old to modern english\",\n", + " tags=[\"quickstart\", \"llm\", \"t5\"],\n", + ")\n", + "\n", + "\n", + "@pipeline(model=model)\n", + "def english_translation_pipeline(\n", + " data_url: str,\n", + " model_type: T5_Model,\n", + " per_device_train_batch_size: int,\n", + " gradient_accumulation_steps: int,\n", + " dataloader_num_workers: int,\n", + " num_train_epochs: int = 5,\n", + "):\n", + " \"\"\"Define a pipeline that connects the steps.\"\"\"\n", + " full_dataset = load_data(data_url)\n", + " tokenized_dataset, tokenizer = tokenize_data(\n", + " dataset=full_dataset, model_type=model_type\n", + " )\n", + " tokenized_train_dataset, tokenized_eval_dataset, tokenized_test_dataset = (\n", + " split_dataset(\n", + " tokenized_dataset,\n", + " train_size=0.7,\n", + " test_size=0.1,\n", + " eval_size=0.2,\n", + " subset_size=0.1, # We use a subset of the dataset to speed things up\n", + " random_state=42,\n", + " )\n", + " )\n", + " model = train_model(\n", + " tokenized_dataset=tokenized_train_dataset,\n", + " model_type=model_type,\n", + " num_train_epochs=num_train_epochs,\n", + " per_device_train_batch_size=per_device_train_batch_size,\n", + " gradient_accumulation_steps=gradient_accumulation_steps,\n", + " dataloader_num_workers=dataloader_num_workers,\n", + " )\n", + " evaluate_model(model=model, tokenized_dataset=tokenized_eval_dataset)\n", + " test_model(\n", + " model=model,\n", + " tokenized_test_dataset=tokenized_test_dataset,\n", + " tokenizer=tokenizer,\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "7cd73c23", + "metadata": {}, + "source": [ + "We're ready to run the pipeline now, which we can do just as with the step - by calling the\n", + "pipeline function itself:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e0aa9af", + "metadata": {}, + "outputs": [], + "source": [ + "# Run the pipeline and configure some parameters at runtime\n", + "pipeline_run = english_translation_pipeline(\n", + " data_url=\"https://storage.googleapis.com/zenml-public-bucket/quickstart-files/translations.txt\",\n", + " model_type=\"t5-small\",\n", + " num_train_epochs=1, # to make this demo fast, we start at 1 epoch\n", + " per_device_train_batch_size=2,\n", + " gradient_accumulation_steps=4,\n", + " dataloader_num_workers=4,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "6c42078a", + "metadata": {}, + "source": [ + "As you can see the pipeline has run successfully. Here is a sneak-peak of the dashboard view into this pipeline. The URL for this view can be found in the logs.\n", + "\n", + "\"Dashboard\n", + "\n", + "This isn't all that the ZenML Dashboard has to offer, if you navigate over to the ZenML Model control plane, you'll also find the produced model along with a lot of important metadata.\n", + "\n", + "\"Model\n", + "\n", + "Here you'll also see a collection of example Input-Output pairs. As you can see, the model is currently not performing its task well." + ] + }, + { + "cell_type": "markdown", + "id": "a037f09d", + "metadata": {}, + "source": [ + "We can now access the trained model and it's tokenizer from the ZenML Model Control Plane. This will allow us to interact with the model directly." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53e514ac-1a0a-49a0-b8a4-e33cee12c765", + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "\n", + "# load the model object\n", + "model = client.get_model_version(model_name).get_model_artifact(\"model\").load()\n", + "tokenizer = (\n", + " client.get_model_version(model_name).get_artifact(\"tokenizer\").load()\n", + ")\n", + "\n", + "test_text = \"I do desire we may be better strangers\" # Insert your own test sentence here\n", + "\n", + "input_ids = tokenizer(\n", + " test_text,\n", + " return_tensors=\"pt\",\n", + " max_length=128,\n", + " truncation=True,\n", + " padding=\"max_length\",\n", + ").input_ids\n", + "\n", + "with torch.no_grad():\n", + " outputs = model.generate(\n", + " input_ids,\n", + " max_length=128,\n", + " num_return_sequences=1,\n", + " no_repeat_ngram_size=2,\n", + " top_k=50,\n", + " top_p=0.95,\n", + " temperature=0.7,\n", + " )\n", + "\n", + "decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)\n", + "\n", + "print(decoded_output)" + ] + }, + { + "cell_type": "markdown", + "id": "1e653c7a-4073-424e-8a59-c69f49526b96", + "metadata": {}, + "source": [ + "## Lets recap what we've done so far\n", + "\n", + "We created a modular pipeline, this pipeline is modularly constructed from different steps. We have shown that this pipeline runs locally.\n", + "\n", + "As expected, the modcel does not yet solve its task. To train a model that can solve our task well, we would have to train a larger model for longer. For this, we'll need to move away from our local environment. " + ] + }, + { + "cell_type": "markdown", + "id": "8c28b474", + "metadata": {}, + "source": [ + "# ⌚ Step 3: Scale it up in the cloud" + ] + }, + { + "cell_type": "markdown", + "id": "a791b32b-f6be-4ae2-867c-5e628f363858", + "metadata": {}, + "source": [ + "Our last section confirmed to us, that the pipeline works. Let's now run the pipeline in the environment of your choice.\n", + "\n", + "For you to be able to try this step, you will need to have access to a cloud environment (AWS, GCP, AZURE). ZenML wraps around all the major cloud providers and orchestration tools and lets you easily deploy your code onto them.\n", + "\n", + "To do this lets head over to the `Stack` section of your ZenML Dashboard. Here you'll be able to either connect to an existing or deploy a new environment. Choose on of the options presented to you there and come back when you have a stack ready to go. \n", + "\n", + "\"Stack\n", + "\n", + "Then proceed to the section below. Also be sure that you are running with a remote ZenML server (see Step 1 above)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e7a90b5-78c3-4019-8a81-671b5d62d470", + "metadata": {}, + "outputs": [], + "source": [ + "from zenml.environment import Environment\n", + "\n", + "# Set the cloud provider here\n", + "CLOUD_PROVIDER = None # Set this to \"GCP\", \"AWS\" or \"AZURE\"\n", + "assert CLOUD_PROVIDER\n", + "\n", + "# Set the name of the stack that you created within zenml\n", + "stack_name = None # Set this\n", + "assert stack_name # Set your stack, follow instruction above\n", + "\n", + "from zenml import pipeline\n", + "from zenml.client import Client\n", + "from zenml.config import DockerSettings\n", + "\n", + "settings = {}\n", + "\n", + "# Common imports and setup\n", + "if CLOUD_PROVIDER.lower() == \"gcp\":\n", + " parent_image = (\n", + " \"zenmldocker/zenml-public-pipelines:quickstart-0.71.0-py3.11-gcp\"\n", + " )\n", + " skip_build = True\n", + "\n", + "elif CLOUD_PROVIDER.lower() == \"aws\":\n", + " from zenml.integrations.aws.flavors.sagemaker_orchestrator_flavor import (\n", + " SagemakerOrchestratorSettings,\n", + " )\n", + "\n", + " parent_image = \"339712793861.dkr.ecr.eu-central-1.amazonaws.com/zenml-public-pipelines:quickstart-0.71.0-py3.11-aws\"\n", + " skip_build = True # if you switch this to False, you need to remove the parent image\n", + "\n", + " settings[\"orchestrator.sagemaker\"] = SagemakerOrchestratorSettings(\n", + " instance_type=\"ml.m5.4xlarge\"\n", + " )\n", + "\n", + "elif CLOUD_PROVIDER.lower() == \"azure\":\n", + " parent_image = (\n", + " \"zenmldocker/zenml-public-pipelines:quickstart-0.71.0-py3.11-azure\"\n", + " )\n", + " skip_build = True\n", + "\n", + "Client().activate_stack(stack_name)\n", + "\n", + "data_source = \"https://storage.googleapis.com/zenml-public-bucket/quickstart-files/translations.txt\"\n", + "\n", + "# We've prebuilt a docker image for this quickstart to speed things up, feel free to delete the DockerSettings to build from scratch\n", + "settings[\"docker\"] = DockerSettings(\n", + " parent_image=parent_image, skip_build=skip_build\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "8a5cd031-0661-4073-a5ea-6aac5f989212", + "metadata": {}, + "source": [ + "If you are in a google colab you might need to rerun the cell above a second time after the runtime restarted." + ] + }, + { + "cell_type": "markdown", + "id": "95f17b7a-5a82-4975-b9bd-6a63fbb97a68", + "metadata": {}, + "source": [ + "## 🚀 Ready to launch" + ] + }, + { + "cell_type": "markdown", + "id": "df14f30c-9a8e-46ca-ba44-cf16ea715dac", + "metadata": {}, + "source": [ + "We now have configured a ZenML stack that represents your very own cloud infrastructure. For the next pipeline run, we'll be training the same t5 model (`t5_small`) on your own infrastrucutre.\n", + "\n", + "Note: The whole process may take a bit longer the first time around, as your pipeline code needs to be built into docker containers to be run in the orchestration environment of your stack. Any consecutive run of the pipeline, even with different parameters set, will not take as long again thanks to docker caching." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cfad9bd5", + "metadata": {}, + "outputs": [], + "source": [ + "# In the case that we are within a colab environment we want to remove\n", + "# these folders\n", + "if Environment.in_google_colab():\n", + " !rm -rf results\n", + " !rm -rf sample_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12e758fe-6ea3-42ff-bea8-33953135bf6b", + "metadata": {}, + "outputs": [], + "source": [ + "from pipelines import (\n", + " english_translation_pipeline,\n", + ")\n", + "\n", + "from zenml import Model\n", + "\n", + "model_name = \"YeOldeEnglishTranslator\"\n", + "model = Model(\n", + " name=\"YeOldeEnglishTranslator\",\n", + ")\n", + "\n", + "pipeline_run = english_translation_pipeline.with_options(\n", + " settings=settings, model=model\n", + ")(\n", + " data_url=\"https://storage.googleapis.com/zenml-public-bucket/quickstart-files/translations.txt\",\n", + " model_type=\"t5-small\",\n", + " num_train_epochs=2,\n", + " per_device_train_batch_size=4,\n", + " gradient_accumulation_steps=4,\n", + " dataloader_num_workers=0, # Some cloud environment don't support multiple of these\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "c7eef2c6-6dfb-4b67-9883-594a0df20173", + "metadata": {}, + "source": [ + "You did it! You build a pipeline locally, verified that all its parts work well together and now are running it on a production environment\n", + "\n", + "\"Pipeline\n", + "\n", + "Depending on the backend you chose, you can also go inspect your run in the orchestrator of your choice. Here is an example on GCP Vertex:\n", + "\n", + "\"Pipeline" + ] + }, + { + "cell_type": "markdown", + "id": "1a03054e-8b3e-4edb-9d87-82ae51693d2d", + "metadata": {}, + "source": [ + "## Adding Accelerators\n", + "Each of the cloud providers allows users to add accelerators to their serverless offerings. Here's what you need to add to the pipeline settings in order to unlock gpus. Keep in mind, that you might have to increase your quotas within the cloud providers." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a2f7c3c5-dcde-4824-a012-f3da224cb8d8", + "metadata": {}, + "outputs": [], + "source": [ + "from zenml.config import ResourceSettings\n", + "\n", + "if CLOUD_PROVIDER == \"GCP\":\n", + " from zenml.integrations.gcp.flavors.vertex_orchestrator_flavor import (\n", + " VertexOrchestratorSettings,\n", + " )\n", + "\n", + " # find out about your options here: https://docs.zenml.io/stack-components/orchestrators/vertex#additional-configuration\n", + "\n", + " english_translation_pipeline.with_options(\n", + " settings={\n", + " \"orchestrator.vertex\": VertexOrchestratorSettings(\n", + " node_selector_constraint=(\n", + " \"cloud.google.com/gke-accelerator\",\n", + " \"NVIDIA_TESLA_P4\",\n", + " )\n", + " ),\n", + " \"resources\": ResourceSettings(memory=\"32GB\", gpu_count=1),\n", + " }\n", + " )\n", + "if CLOUD_PROVIDER == \"AWS\":\n", + " from zenml.integrations.aws.flavors.sagemaker_orchestrator_flavor import (\n", + " SagemakerOrchestratorSettings,\n", + " )\n", + "\n", + " # find out your options here: https://docs.zenml.io/stack-components/orchestrators/sagemaker#configuration-at-pipeline-or-step-level\n", + "\n", + " english_translation_pipeline.with_options(\n", + " settings={\n", + " \"orchestrator.sagemaker\": SagemakerOrchestratorSettings(\n", + " instance_type=\"ml.p2.xlarge\"\n", + " )\n", + " }\n", + " )\n", + "if CLOUD_PROVIDER == \"AZURE\":\n", + " from zenml.integrations.azure.flavors import AzureMLOrchestratorSettings\n", + "\n", + " # find out your options here: https://docs.zenml.io/stack-components/orchestrators/azureml#settings\n", + " # The quickest way is probably to configure a compute-instance in azure ml. This instance should contain\n", + " # a gpu. Then specify the name of the compute instance here.\n", + "\n", + " compute_name = None # Insert the name of your compute instance here\n", + "\n", + " english_translation_pipeline.with_options(\n", + " settings={\n", + " \"orchestrator.azureml\": AzureMLOrchestratorSettings(\n", + " mode=\"compute-instance\", compute_name=compute_name\n", + " )\n", + " }\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "8231677c-1fd6-4ec3-8c8c-47fd9406072e", + "metadata": {}, + "source": [ + "## Now it's up to you" + ] + }, + { + "cell_type": "markdown", + "id": "90c31e0d-dfab-4692-a406-0dc439f25443", + "metadata": {}, + "source": [ + "You can now start worrying about making the model actually work well on our toy example or any other dataset you like.\n", + "\n", + "Here are some things that you could do:\n", + "\n", + "* Iterate on the training data and its tokenization\n", + "* You can switch out the model itself. Instead of `model_type=\"t5_small\"` you could use `model_type=\"t5_large\"` for example\n", + "* You can train for longer by increasing the `num_train_epochs=xxx`. In order to speed this up you can also add accelerators to your orchestrators. Learn more about this in the section below.\n", + "\n", + "No matter what avenue you choose to actually make the model work, we would love to see how you did it, so please reach out and share your solution with us either on [**Slack Community**](https://zenml.io/slack) or through our email hello@zenml.io." + ] + }, + { + "cell_type": "markdown", + "id": "594ee4fc-f102-4b99-bdc3-2f1670c87679", + "metadata": {}, + "source": [ + "## Further exploration\n", + "\n", + "This was just the tip of the iceberg of what ZenML can do; check out the [**docs**](https://docs.zenml.io/) to learn more\n", + "about the capabilities of ZenML. For example, you might want to:\n", + "\n", + "- [Deploy ZenML](https://docs.zenml.io/user-guide/production-guide/connect-deployed-zenml) to collaborate with your colleagues.\n", + "- Run the same pipeline on a [cloud MLOps stack in production](https://docs.zenml.io/user-guide/production-guide/cloud-stack).\n", + "- Track your metrics in an experiment tracker like [MLflow](https://docs.zenml.io/stacks-and-components/component-guide/experiment-trackers/mlflow).\n", + "\n", + "## What next?\n", + "\n", + "* If you have questions or feedback... join our [**Slack Community**](https://zenml.io/slack) and become part of the ZenML family!\n", + "* If you want to quickly get started with ZenML, check out [ZenML Pro](https://zenml.io/pro)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/examples/quickstart/requirements.txt b/examples/quickstart/requirements.txt index d1f413a694a..9aaa5c4c921 100644 --- a/examples/quickstart/requirements.txt +++ b/examples/quickstart/requirements.txt @@ -1,4 +1,4 @@ -zenml[server]==0.70.0 +zenml[server]==0.71.0 notebook pyarrow datasets diff --git a/examples/quickstart/requirements_aws.txt b/examples/quickstart/requirements_aws.txt index 2dbaf5f90a0..cac1b1bba46 100644 --- a/examples/quickstart/requirements_aws.txt +++ b/examples/quickstart/requirements_aws.txt @@ -1,4 +1,4 @@ -zenml[server]==0.70.0 +zenml[server]==0.71.0 notebook pyarrow datasets diff --git a/examples/quickstart/requirements_azure.txt b/examples/quickstart/requirements_azure.txt index c25c0d82c95..bfdca63d3ff 100644 --- a/examples/quickstart/requirements_azure.txt +++ b/examples/quickstart/requirements_azure.txt @@ -1,4 +1,4 @@ -zenml[server]==0.70.0 +zenml[server]==0.71.0 notebook pyarrow datasets diff --git a/examples/quickstart/requirements_gcp.txt b/examples/quickstart/requirements_gcp.txt index 12471da1832..e0e443ecf90 100644 --- a/examples/quickstart/requirements_gcp.txt +++ b/examples/quickstart/requirements_gcp.txt @@ -1,4 +1,4 @@ -zenml[server]==0.70.0 +zenml[server]==0.71.0 notebook pyarrow datasets diff --git a/pyproject.toml b/pyproject.toml index 6da6f36ed2f..a71b71979af 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "zenml" -version = "0.70.0" +version = "0.71.0" packages = [{ include = "zenml", from = "src" }] description = "ZenML: Write production-ready ML code." authors = ["ZenML GmbH "] diff --git a/scripts/test-migrations.sh b/scripts/test-migrations.sh index 145c0df7b27..170ff03e2e3 100755 --- a/scripts/test-migrations.sh +++ b/scripts/test-migrations.sh @@ -23,7 +23,7 @@ else fi # List of versions to test -VERSIONS=("0.40.3" "0.43.0" "0.44.3" "0.45.6" "0.47.0" "0.50.0" "0.51.0" "0.52.0" "0.53.1" "0.54.1" "0.55.5" "0.56.4" "0.57.1" "0.60.0" "0.61.0" "0.62.0" "0.63.0" "0.64.0" "0.65.0" "0.68.0" "0.70.0") +VERSIONS=("0.40.3" "0.43.0" "0.44.3" "0.45.6" "0.47.0" "0.50.0" "0.51.0" "0.52.0" "0.53.1" "0.54.1" "0.55.5" "0.56.4" "0.57.1" "0.60.0" "0.61.0" "0.62.0" "0.63.0" "0.64.0" "0.65.0" "0.68.0" "0.70.0" "0.71.0") # Try to get the latest version using pip index version=$(pip index versions zenml 2>/dev/null | grep -v YANKED | head -n1 | awk '{print $2}' | tr -d '()') diff --git a/src/zenml/VERSION b/src/zenml/VERSION index d9bd9500760..f91d144a8b2 100644 --- a/src/zenml/VERSION +++ b/src/zenml/VERSION @@ -1 +1 @@ -0.70.0 \ No newline at end of file +0.71.0 \ No newline at end of file diff --git a/src/zenml/integrations/wandb/flavors/wandb_experiment_tracker_flavor.py b/src/zenml/integrations/wandb/flavors/wandb_experiment_tracker_flavor.py index 11e84ca2d38..7a1a7321704 100644 --- a/src/zenml/integrations/wandb/flavors/wandb_experiment_tracker_flavor.py +++ b/src/zenml/integrations/wandb/flavors/wandb_experiment_tracker_flavor.py @@ -23,7 +23,7 @@ cast, ) -from pydantic import field_validator +from pydantic import field_validator, BaseModel from zenml.config.base_settings import BaseSettings from zenml.experiment_trackers.base_experiment_tracker import ( @@ -60,18 +60,26 @@ def _convert_settings(cls, value: Any) -> Any: Args: value: The settings. + Raises: + ValueError: If converting the settings failed. + Returns: Dict representation of the settings. """ import wandb if isinstance(value, wandb.Settings): - # Depending on the wandb version, either `make_static` or `to_dict` - # is available to convert the settings to a dictionary - if hasattr(value, "make_static"): + # Depending on the wandb version, either `model_dump`, + # `make_static` or `to_dict` is available to convert the settings + # to a dictionary + if isinstance(value, BaseModel): + return value.model_dump() + elif hasattr(value, "make_static"): return cast(Dict[str, Any], value.make_static()) - else: + elif hasattr(value, "to_dict"): return value.to_dict() + else: + raise ValueError("Unable to convert wandb settings to dict.") else: return value diff --git a/src/zenml/zen_server/auth.py b/src/zenml/zen_server/auth.py index a7918f3e81c..80290f091cc 100644 --- a/src/zenml/zen_server/auth.py +++ b/src/zenml/zen_server/auth.py @@ -413,10 +413,7 @@ def get_pipeline_run_status( logger.error(error) raise CredentialsNotValid(error) - if pipeline_run_status in [ - ExecutionStatus.FAILED, - ExecutionStatus.COMPLETED, - ]: + if pipeline_run_status.is_finished: error = ( f"The execution of pipeline run " f"{decoded_token.pipeline_run_id} has already concluded and " @@ -461,10 +458,7 @@ def get_step_run_status( logger.error(error) raise CredentialsNotValid(error) - if step_run_status in [ - ExecutionStatus.FAILED, - ExecutionStatus.COMPLETED, - ]: + if step_run_status.is_finished: error = ( f"The execution of step run " f"{decoded_token.step_run_id} has already concluded and " diff --git a/src/zenml/zen_server/deploy/helm/Chart.yaml b/src/zenml/zen_server/deploy/helm/Chart.yaml index 7c899cef38e..16ac743142c 100644 --- a/src/zenml/zen_server/deploy/helm/Chart.yaml +++ b/src/zenml/zen_server/deploy/helm/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 name: zenml -version: "0.70.0" +version: "0.71.0" description: Open source MLOps framework for portable production ready ML pipelines keywords: - mlops diff --git a/src/zenml/zen_server/deploy/helm/README.md b/src/zenml/zen_server/deploy/helm/README.md index 569c2fc2a7f..0aa40f2f3cd 100644 --- a/src/zenml/zen_server/deploy/helm/README.md +++ b/src/zenml/zen_server/deploy/helm/README.md @@ -20,8 +20,8 @@ ZenML is an open-source MLOps framework designed to help you create robust, main To install the ZenML chart directly from Amazon ECR, use the following command: ```bash -# example command for version 0.70.0 -helm install my-zenml oci://public.ecr.aws/zenml/zenml --version 0.70.0 +# example command for version 0.71.0 +helm install my-zenml oci://public.ecr.aws/zenml/zenml --version 0.71.0 ``` Note: Ensure you have OCI support enabled in your Helm client and that you are authenticated with Amazon ECR. diff --git a/src/zenml/zen_server/routers/auth_endpoints.py b/src/zenml/zen_server/routers/auth_endpoints.py index e970ba535e7..a1339c10bf3 100644 --- a/src/zenml/zen_server/routers/auth_endpoints.py +++ b/src/zenml/zen_server/routers/auth_endpoints.py @@ -41,7 +41,6 @@ from zenml.enums import ( APITokenType, AuthScheme, - ExecutionStatus, OAuthDeviceStatus, OAuthGrantTypes, ) @@ -589,10 +588,7 @@ def api_token( "security reasons." ) - if pipeline_run.status in [ - ExecutionStatus.FAILED, - ExecutionStatus.COMPLETED, - ]: + if pipeline_run.status.is_finished: raise ValueError( f"The execution of pipeline run {pipeline_run_id} has already " "concluded and API tokens can no longer be generated for it " @@ -609,10 +605,7 @@ def api_token( "be generated for non-existent step runs for security reasons." ) - if step_run.status in [ - ExecutionStatus.FAILED, - ExecutionStatus.COMPLETED, - ]: + if step_run.status.is_finished: raise ValueError( f"The execution of step run {step_run_id} has already " "concluded and API tokens can no longer be generated for it " diff --git a/src/zenml/zen_stores/migrations/versions/0.71.0_release.py b/src/zenml/zen_stores/migrations/versions/0.71.0_release.py new file mode 100644 index 00000000000..84623865544 --- /dev/null +++ b/src/zenml/zen_stores/migrations/versions/0.71.0_release.py @@ -0,0 +1,23 @@ +"""Release [0.71.0]. + +Revision ID: 0.71.0 +Revises: cc269488e5a9 +Create Date: 2024-12-04 20:44:19.316139 + +""" + +# revision identifiers, used by Alembic. +revision = "0.71.0" +down_revision = "cc269488e5a9" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + """Upgrade database schema and/or data, creating a new revision.""" + pass + + +def downgrade() -> None: + """Downgrade database schema and/or data back to the previous revision.""" + pass