diff --git a/docs/book/component-guide/image-builders/aws.md b/docs/book/component-guide/image-builders/aws.md new file mode 100644 index 00000000000..1943d3a10b9 --- /dev/null +++ b/docs/book/component-guide/image-builders/aws.md @@ -0,0 +1,234 @@ +--- +description: Building container images with AWS CodeBuild +--- + +# AWS Image Builder + +The AWS image builder is an [image builder](./image-builders.md) flavor provided by the ZenML `aws` integration that uses [AWS CodeBuild](https://aws.amazon.com/codebuild) to build container images. + +### When to use it + +You should use the AWS image builder if: + +* you're **unable** to install or use [Docker](https://www.docker.com) on your client machine. +* you're already using AWS. +* your stack is mainly composed of other AWS components such as the [S3 Artifact Store](../artifact-stores/s3.md) or the [SageMaker Orchestrator](../orchestrators/sagemaker.md). + +### How to deploy it + +{% hint style="info" %} +Would you like to skip ahead and deploy a full ZenML cloud stack already, +including the AWS image builder? Check out the +[in-browser stack deployment wizard](../../how-to/infrastructure-deployment/stack-deployment/deploy-a-cloud-stack.md), +or [the ZenML AWS Terraform module](../../how-to/infrastructure-deployment/stack-deployment/deploy-a-cloud-stack-with-terraform.md) +for a shortcut on how to deploy & register this stack component. +{% endhint %} + +### How to use it + +To use the AWS image builder, you need: + +* The ZenML `aws` integration installed. If you haven't done so, run: + + ```shell + zenml integration install aws + ``` +* An [S3 Artifact Store](../artifact-stores/s3.md) where the build context will be uploaded, so AWS CodeBuild can access it. +* Recommended: an [AWS container registry](../container-registries/aws.md) where the built image will be pushed. The AWS CodeBuild service can also work with other container registries, but [explicit authentication](#authentication-methods) must be enabled in this case. +* An [AWS CodeBuild project](https://aws.amazon.com/codebuild) created in the AWS account and region where you want to build the Docker images, preferably in the same region as the ECR container registry where images will be pushed (if applicable). The CodeBuild project configuration is largely irrelevant, as ZenML will override most of the default settings for each build according to the [AWS Docker build guide](https://docs.aws.amazon.com/codebuild/latest/userguide/sample-docker-section.html). Some example default configuration values are: + * **Source Type**: `Amazon S3` + * **Bucket**: The same S3 bucket used by the ZenML S3 Artifact Store. + * **S3 folder**: any value (e.g. `codebuild`); + * **Environment Type**: `Linux Container` + * **Environment Image**: `bentolor/docker-dind-awscli` + * **Privileged Mode**: `false` + +The user must take care that the **Service Role** attached to the CodeBuild project also has the necessary permissions to access the S3 bucket to read objects and the ECR container registry to push images (if applicable): + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:GetObject", + "s3:GetObjectVersion" + ], + "Resource": "arn:aws:s3:::/*" + }, + { + "Effect": "Allow", + "Action": [ + "ecr:BatchGetImage", + "ecr:DescribeImages", + "ecr:BatchCheckLayerAvailability", + "ecr:GetDownloadUrlForLayer", + "ecr:InitiateLayerUpload", + "ecr:UploadLayerPart", + "ecr:CompleteLayerUpload", + "ecr:PutImage" + ], + "Resource": "arn:aws:ecr:::repository/" + }, + { + "Effect": "Allow", + "Action": [ + "ecr:GetAuthorizationToken" + ], + "Resource": "*" + }, + ] +} +``` + +* Recommended: Grant ZenML access to trigger AWS CodeBuild builds by registering an [AWS Service Connector](../../how-to/infrastructure-deployment/auth-management/aws-service-connector.md) with the proper credentials and permissions, as covered in the [Authentication Methods](aws.md#authentication-methods) section. If not provided, the AWS credentials will be inferred from the environment where the pipeline is triggered. + +We can register the image builder and use it in our active stack: + +```shell +zenml image-builder register \ + --flavor=aws \ + --code_build_project= + +# Register and activate a stack with the new image builder +zenml stack register -i ... --set +``` + +You also need to set up [authentication](aws.md#authentication-methods) required to access the CodeBuild AWS service. + +#### Authentication Methods + +Integrating and using an AWS Image Builder in your pipelines is not possible without employing some form of authentication. If you're looking for a quick way to get started locally, you can use the _Local Authentication_ method. However, the recommended way to authenticate to the AWS cloud platform is through [an AWS Service Connector](../../how-to/infrastructure-deployment/auth-management/aws-service-connector.md). This is particularly useful if you are configuring ZenML stacks that combine the AWS Image Builder with other remote stack components also running in AWS. + +{% tabs %} +{% tab title="Implicit Authentication" %} +This method uses the implicit AWS authentication available _in the environment where the ZenML code is running_. On your local machine, this is the quickest way to configure an AWS Image Builder. You don't need to supply credentials explicitly when you register the AWS Image Builder, as it leverages the local credentials and configuration that the AWS CLI stores on your local machine. However, you will need to install and set up the AWS CLI on your machine as a prerequisite, as covered in [the AWS CLI documentation](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html), before you register the AWS Image Builder. + +{% hint style="warning" %} +Stacks using the AWS Image Builder set up with local authentication are not portable across environments. To make ZenML pipelines fully portable, it is recommended to use [an AWS Service Connector](../../how-to/infrastructure-deployment/auth-management/aws-service-connector.md) to authenticate your AWS Image Builder to the AWS cloud platform. +{% endhint %} +{% endtab %} + +{% tab title="AWS Service Connector (recommended)" %} +To set up the AWS Image Builder to authenticate to AWS and access the AWS CodeBuild services, it is recommended to leverage the many features provided by [the AWS Service Connector](../../how-to/infrastructure-deployment/auth-management/aws-service-connector.md) such as auto-configuration, best security practices regarding long-lived credentials and reusing the same credentials across multiple stack components. + +If you don't already have an AWS Service Connector configured in your ZenML deployment, you can register one using the interactive CLI command. You also have the option to configure an AWS Service Connector that can be used to access more than just the AWS CodeBuild service: + +```sh +zenml service-connector register --type aws -i +``` + +A non-interactive CLI example that leverages [the AWS CLI configuration](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) on your local machine to auto-configure an AWS Service Connector for the AWS CodeBuild service: + +```sh +zenml service-connector register --type aws --resource-type aws-generic --auto-configure +``` + +{% code title="Example Command Output" %} +``` +$ zenml service-connector register aws-generic --type aws --resource-type aws-generic --auto-configure +Successfully registered service connector `aws-generic` with access to the following resources: +┏━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━┓ +┃ RESOURCE TYPE │ RESOURCE NAMES ┃ +┠────────────────┼────────────────┨ +┃ 🔶 aws-generic │ eu-central-1 ┃ +┗━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━┛ +``` +{% endcode %} + +> **Note**: Please remember to grant the entity associated with your AWS credentials permissions to access the CodeBuild API and to run CodeBuilder builds: +> +> ```json +> { +> "Version": "2012-10-17", +> "Statement": [ +> { +> "Effect": "Allow", +> "Action": [ +> "codebuild:StartBuild", +> "codebuild:BatchGetBuilds", +> ], +> "Resource": "arn:aws:codebuild:::project/" +> }, +> ] +> } +> ``` +> + +The AWS Service Connector supports [many different authentication methods](../../how-to/infrastructure-deployment/auth-management/aws-service-connector.md#authentication-methods) with different levels of security and convenience. You should pick the one that best fits your use case. + +If you already have one or more AWS Service Connectors configured in your ZenML deployment, you can check which of them can be used to access generic AWS resources like the one required for your AWS Image Builder by running e.g.: + +```sh +zenml service-connector list-resources --resource-type aws-generic +``` + +{% code title="Example Command Output" %} +``` +The following 'aws-generic' resources can be accessed by service connectors configured in your workspace: +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━┓ +┃ CONNECTOR ID │ CONNECTOR NAME │ CONNECTOR TYPE │ RESOURCE TYPE │ RESOURCE NAMES ┃ +┠──────────────────────────────────────┼────────────────┼────────────────┼────────────────┼────────────────┨ +┃ 7113ba9b-efdd-4a0a-94dc-fb67926e58a1 │ aws-generic │ 🔶 aws │ 🔶 aws-generic │ eu-central-1 ┃ +┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━┛ +``` +{% endcode %} + +After having set up or decided on an AWS Service Connector to use to authenticate to AWS, you can register the AWS Image Builder as follows: + +```sh +zenml image-builder register \ + --flavor=aws \ + --code_build_project= \ + --connector +``` + +To connect an AWS Image Builder to an AWS Service Connector at a later point, you can use the following command: + +```sh +zenml image-builder connect --connector +``` + +{% code title="Example Command Output" %} +``` +$ zenml image-builder connect aws-image-builder --connector aws-generic +Successfully connected image builder `aws-image-builder` to the following resources: +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━┓ +┃ CONNECTOR ID │ CONNECTOR NAME │ CONNECTOR TYPE │ RESOURCE TYPE │ RESOURCE NAMES ┃ +┠──────────────────────────────────────┼────────────────┼────────────────┼────────────────┼────────────────┨ +┃ 7113ba9b-efdd-4a0a-94dc-fb67926e58a1 │ aws-generic │ 🔶 aws │ 🔶 aws-generic │ eu-central-1 ┃ +┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━┛ +``` +{% endcode %} + +As a final step, you can use the AWS Image Builder in a ZenML Stack: + +```sh +# Register and set a stack with the new image builder +zenml stack register -i ... --set +``` +{% endtab %} +{% endtabs %} + +#### Customizing AWS CodeBuild builds + +The AWS Image Builder can be customized to a certain extent by providing additional configuration options when registering the image builder. The following additional attributes can be set: + +* `build_image`: The Docker image used to build the Docker image. The default is `bentolor/docker-dind-awscli`, +which is a Docker image that includes both Docker-in-Docker and the AWS CLI. + + +{% hint style="info" %} +If you are running into Docker Hub rate-limits, it might be a good idea to copy this image to your own container registry and customize the `build_image` attribute to point to your own image. +{% endhint %} + +* `compute_type`: The compute type used for the CodeBuild project. The default is `BUILD_GENERAL1_SMALL`. +* `custom_env_vars`: A dictionary of custom environment variables to be set in the CodeBuild project. +* `implicit_container_registry_auth`: A boolean flag that indicates whether to use implicit or explicit authentication when authenticating the AWS CodeBuild build to the target container registry: + + * when this is set to `true` (default), the builds will be configured to use whatever implicit authentication credentials are already available within the build container. As a special case for ECR registries, the service IAM role attached to the CodeBuild project is used to authenticate to the target ECR container registry and therefore the service role must include the necessary permissions to push images to the target ECR registry. + * when set to `false`, the credentials attached to the ZenML Container Registry stack component in the active stack will be set as build environment variables and used to authenticate to the target container registry. This is useful when the target container registry is not an ECR registry or when the service role attached to the CodeBuild project does not have the necessary permissions to push images to the target ECR registry. This works best when the ZenML Container Registry stack component is also linked to the external container registry via a Service Connector. + + +
ZenML Scarf
diff --git a/docs/book/component-guide/image-builders/image-builders.md b/docs/book/component-guide/image-builders/image-builders.md index fe813f0bd4d..1a20f8b113c 100644 --- a/docs/book/component-guide/image-builders/image-builders.md +++ b/docs/book/component-guide/image-builders/image-builders.md @@ -26,6 +26,7 @@ image builders are provided by integrations: | [LocalImageBuilder](local.md) | `local` | _built-in_ | Builds your Docker images locally. | | [KanikoImageBuilder](kaniko.md) | `kaniko` | `kaniko` | Builds your Docker images in Kubernetes using Kaniko. | | [GCPImageBuilder](gcp.md) | `gcp` | `gcp` | Builds your Docker images using Google Cloud Build. | +| [AWSImageBuilder](aws.md) | `aws` | `aws` | Builds your Docker images using AWS Code Build. | | [Custom Implementation](custom.md) | _custom_ | | Extend the image builder abstraction and provide your own implementation | If you would like to see the available flavors of image builders, you can use the command: diff --git a/docs/book/how-to/infrastructure-deployment/stack-deployment/deploy-a-cloud-stack-with-terraform.md b/docs/book/how-to/infrastructure-deployment/stack-deployment/deploy-a-cloud-stack-with-terraform.md index c0919b8d7a5..c9734c349e4 100644 --- a/docs/book/how-to/infrastructure-deployment/stack-deployment/deploy-a-cloud-stack-with-terraform.md +++ b/docs/book/how-to/infrastructure-deployment/stack-deployment/deploy-a-cloud-stack-with-terraform.md @@ -249,7 +249,8 @@ following components: * a local Orchestrator, if `orchestrator` is set to `local`. This can be used in combination with the SageMaker Step Operator to selectively run some steps locally and some on SageMaker. * if `orchestrator` is set to `sagemaker` (default): a SageMaker Orchestrator linked to the AWS account via an AWS Service Connector configured with IAM role credentials * if `orchestrator` is set to `skypilot`: a SkyPilot Orchestrator linked to the AWS account via an AWS Service Connector configured with IAM role credentials -4. a SageMaker Step Operator linked to the AWS account via an AWS Service Connector configured with IAM role credentials +4. an AWS CodeBuild Image Builder linked to the AWS account via an AWS Service Connector configured with IAM role credentials +5. a SageMaker Step Operator linked to the AWS account via an AWS Service Connector configured with IAM role credentials To use the ZenML stack, you will need to install the required integrations: diff --git a/docs/book/how-to/infrastructure-deployment/stack-deployment/deploy-a-cloud-stack.md b/docs/book/how-to/infrastructure-deployment/stack-deployment/deploy-a-cloud-stack.md index c677bb3c249..548d4fc3d53 100644 --- a/docs/book/how-to/infrastructure-deployment/stack-deployment/deploy-a-cloud-stack.md +++ b/docs/book/how-to/infrastructure-deployment/stack-deployment/deploy-a-cloud-stack.md @@ -335,6 +335,7 @@ prepare for you based on your cloud provider: - An S3 bucket that will be used as a ZenML Artifact Store. - An ECR container registry that will be used as a ZenML Container Registry. +- A CloudBuild project that will be used as a ZenML Image Builder. - Permissions to use SageMaker as a ZenML Orchestrator and Step Operator. - An IAM user and IAM role with the minimum necessary permissions to access the resources listed above. @@ -367,6 +368,24 @@ following AWS permissions in your AWS account: * ecr:CompleteLayerUpload * ecr:PutImage * ecr:GetAuthorizationToken +* CloudBuild (Client): + * codebuild:CreateProject + * codebuild:BatchGetBuilds +* CloudBuild (Service): + * s3:GetObject + * s3:GetObjectVersion + * logs:CreateLogGroup + * logs:CreateLogStream + * logs:PutLogEvents + * ecr:BatchGetImage + * ecr:DescribeImages + * ecr:BatchCheckLayerAvailability + * ecr:GetDownloadUrlForLayer + * ecr:InitiateLayerUpload + * ecr:UploadLayerPart + * ecr:CompleteLayerUpload + * ecr:PutImage + * ecr:GetAuthorizationToken * SageMaker (Client): * sagemaker:CreatePipeline * sagemaker:StartPipelineExecution diff --git a/docs/book/toc.md b/docs/book/toc.md index 1211cdb9d82..aff3ce0c7b9 100644 --- a/docs/book/toc.md +++ b/docs/book/toc.md @@ -293,6 +293,7 @@ * [Image Builders](component-guide/image-builders/image-builders.md) * [Local Image Builder](component-guide/image-builders/local.md) * [Kaniko Image Builder](component-guide/image-builders/kaniko.md) + * [AWS Image Builder](component-guide/image-builders/aws.md) * [Google Cloud Image Builder](component-guide/image-builders/gcp.md) * [Develop a Custom Image Builder](component-guide/image-builders/custom.md) * [Annotators](component-guide/annotators/annotators.md) diff --git a/infra/aws/aws-ecr-s3-sagemaker.yaml b/infra/aws/aws-ecr-s3-sagemaker.yaml index 12b45a26b9a..a97ba513439 100644 --- a/infra/aws/aws-ecr-s3-sagemaker.yaml +++ b/infra/aws/aws-ecr-s3-sagemaker.yaml @@ -51,11 +51,23 @@ Parameters: Description: "The value of the tag to apply to all resources" Default: "zenml" + CodeBuild: + Type: String + AllowedValues: + - true + - false + Description: | + Whether to provision a CodeBuild project as the image builder for the + stack. Only supported for ZenML Server versions above 0.70.0. + Default: false + Conditions: RegisterZenMLStack: !And - !Not [ !Equals [ !Ref ZenMLServerURL, "" ] ] - !Not [ !Equals [ !Ref ZenMLServerAPIToken, "" ] ] + RegisterCodeBuild: !Equals [ !Ref CodeBuild, true ] + Resources: S3Bucket: Type: AWS::S3::Bucket @@ -73,6 +85,28 @@ Resources: Tags: - Key: !Ref TagName Value: !Sub TagValue + + CodeBuildProject: + Condition: RegisterCodeBuild + Type: AWS::CodeBuild::Project + Properties: + Name: !Sub '${ResourceName}' + ServiceRole: !GetAtt CodeBuildRole.Arn + Artifacts: + Type: NO_ARTIFACTS + Environment: + Type: LINUX_CONTAINER + ComputeType: BUILD_GENERAL1_SMALL + Image: bentolor/docker-dind-awscli + PrivilegedMode: false + Source: + Type: S3 + Location: !Sub '${S3Bucket}/codebuild' + TimeoutInMinutes: 20 + LogsConfig: + CloudWatchLogs: + Status: ENABLED + GroupName: !Sub '/aws/codebuild/${ResourceName}' IAMUser: Type: AWS::IAM::User @@ -176,6 +210,19 @@ Resources: - Effect: Allow Action: iam:PassRole Resource: !Sub 'arn:aws:iam::${AWS::AccountId}:role/${ResourceName}-sagemaker' + - !If + - RegisterCodeBuild + - PolicyName: CodeBuildPolicy + PolicyDocument: + Version: '2012-10-17' + Statement: + # Allow this role to start and monitor CodeBuild project builds + - Effect: Allow + Action: + - 'codebuild:StartBuild' + - 'codebuild:BatchGetBuilds' + Resource: !Sub 'arn:aws:codebuild:${AWS::Region}:${AWS::AccountId}:project/${ResourceName}' + - !Ref 'AWS::NoValue' SageMakerRuntimeRole: Type: AWS::IAM::Role @@ -205,6 +252,53 @@ Resources: ManagedPolicyArns: - 'arn:aws:iam::aws:policy/AmazonSageMakerFullAccess' + CodeBuildRole: + Type: AWS::IAM::Role + Condition: RegisterCodeBuild + Properties: + RoleName: !Sub '${ResourceName}-codebuild' + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: codebuild.amazonaws.com + Action: 'sts:AssumeRole' + Policies: + - PolicyName: CodeBuildPolicy + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - 'logs:CreateLogGroup' + - 'logs:CreateLogStream' + - 'logs:PutLogEvents' + Resource: + - !Sub 'arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/codebuild/${ResourceName}' + - !Sub 'arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/codebuild/${ResourceName}:*' + - Effect: Allow + Action: + - 's3:GetObject' + - 's3:GetObjectVersion' + Resource: + - !Sub '${S3Bucket.Arn}/*' + - Effect: Allow + Action: + - 'ecr:BatchGetImage' + - 'ecr:DescribeImages' + - 'ecr:BatchCheckLayerAvailability' + - 'ecr:GetDownloadUrlForLayer' + - 'ecr:InitiateLayerUpload' + - 'ecr:UploadLayerPart' + - 'ecr:CompleteLayerUpload' + - 'ecr:PutImage' + Resource: !Sub '${ECRRepository.Arn}' + - Effect: Allow + Action: + - 'ecr:GetAuthorizationToken' + Resource: '*' + InvokeZenMLAPIFunction: Type: AWS::Serverless::Function Condition: RegisterZenMLStack @@ -308,63 +402,77 @@ Resources: Properties: ServiceToken: !GetAtt InvokeZenMLAPIFunction.Arn ServiceTimeout: 300 - Payload: !Sub | - { - "name": "${AWS::StackName}", - "description": "Deployed by AWS CloudFormation stack ${AWS::StackName} in the ${AWS::AccountId} account and ${AWS::Region} region.", - "labels": { - "zenml:provider": "aws", - "zenml:deployment": "cloud-formation" - }, - "service_connectors": [ + Payload: !Join + - '' + - - !Sub | { - "type": "aws", - "auth_method": "iam-role", - "configuration": { - "aws_access_key_id": "${IAMUserAccessKey}", - "aws_secret_access_key": "${IAMUserAccessKey.SecretAccessKey}", - "role_arn": "${StackAccessRole.Arn}", - "region": "${AWS::Region}" - } - } - ], - "components": { - "artifact_store": [{ - "flavor": "s3", - "service_connector_index": 0, - "configuration": { - "path": "s3://${S3Bucket}" - } - }], - "container_registry":[{ - "flavor": "aws", - "service_connector_index": 0, - "configuration": { - "uri": "${AWS::AccountId}.dkr.ecr.${AWS::Region}.amazonaws.com", - "default_repository": "${ECRRepository}" - } - }], - "orchestrator": [{ - "flavor": "sagemaker", - "service_connector_index": 0, - "configuration": { - "execution_role": "${SageMakerRuntimeRole.Arn}", - "output_data_s3_uri": "s3://${S3Bucket}/sagemaker" + "name": "${AWS::StackName}", + "description": "Deployed by AWS CloudFormation stack ${AWS::StackName} in the ${AWS::AccountId} account and ${AWS::Region} region.", + "labels": { + "zenml:provider": "aws", + "zenml:deployment": "cloud-formation" + }, + "service_connectors": [ + { + "type": "aws", + "auth_method": "iam-role", + "configuration": { + "aws_access_key_id": "${IAMUserAccessKey}", + "aws_secret_access_key": "${IAMUserAccessKey.SecretAccessKey}", + "role_arn": "${StackAccessRole.Arn}", + "region": "${AWS::Region}" + } + } + ], + "components": { + "artifact_store": [{ + "flavor": "s3", + "service_connector_index": 0, + "configuration": { + "path": "s3://${S3Bucket}" + } + }], + "container_registry":[{ + "flavor": "aws", + "service_connector_index": 0, + "configuration": { + "uri": "${AWS::AccountId}.dkr.ecr.${AWS::Region}.amazonaws.com", + "default_repository": "${ECRRepository}" + } + }], + "orchestrator": [{ + "flavor": "sagemaker", + "service_connector_index": 0, + "configuration": { + "execution_role": "${SageMakerRuntimeRole.Arn}", + "output_data_s3_uri": "s3://${S3Bucket}/sagemaker" + } + }], + "step_operator": [{ + "flavor": "sagemaker", + "service_connector_index": 0, + "configuration": { + "role": "${SageMakerRuntimeRole.Arn}", + "bucket": "${S3Bucket}" + } + }], + - !If + - RegisterCodeBuild + - !Sub | + "image_builder": [{ + "flavor": "aws", + "service_connector_index": 0, + "configuration": { + "code_build_project": "${CodeBuildProject}" + } + }] + - | + "image_builder": [{ + "flavor": "local" + }] + - | } - }], - "step_operator": [{ - "flavor": "sagemaker", - "service_connector_index": 0, - "configuration": { - "role": "${SageMakerRuntimeRole.Arn}", - "bucket": "${S3Bucket}" } - }], - "image_builder": [{ - "flavor": "local" - }] - } - } Outputs: AWSRegion: @@ -387,62 +495,3 @@ Outputs: Description: "SageMaker execution IAM Role ARN" Value: !GetAtt SageMakerRuntimeRole.Arn - ZenMLStack: - Description: "ZenML Stack JSON (can be imported with `zenml stack import`)" - Value: !Sub | - { - "name": "${AWS::StackName}", - "description": "Deployed by AWS CloudFormation stack ${AWS::StackName} in the ${AWS::AccountId} account and ${AWS::Region} region.", - "labels": { - "zenml:provider": "aws", - "zenml:deployment": "aws-cloud-formation" - }, - "service_connectors": [ - { - "type": "aws", - "auth_method": "iam-role", - "configuration": { - "aws_access_key_id": "${IAMUserAccessKey}", - "aws_secret_access_key": "${IAMUserAccessKey.SecretAccessKey}", - "role_arn": "${StackAccessRole.Arn}", - "region": "${AWS::Region}" - } - } - ], - "components": { - "artifact_store": [{ - "flavor": "s3", - "service_connector_index": 0, - "configuration": { - "path": "s3://${S3Bucket}" - } - }], - "container_registry":[{ - "flavor": "aws", - "service_connector_index": 0, - "configuration": { - "uri": "${AWS::AccountId}.dkr.ecr.${AWS::Region}.amazonaws.com", - "default_repository": "${ECRRepository}" - } - }], - "orchestrator": [{ - "flavor": "sagemaker", - "service_connector_index": 0, - "configuration": { - "execution_role": "${SageMakerRuntimeRole.Arn}", - "output_data_s3_uri": "s3://${S3Bucket}/sagemaker" - } - }], - "step_operator": [{ - "flavor": "sagemaker", - "service_connector_index": 0, - "configuration": { - "role": "${SageMakerRuntimeRole.Arn}", - "bucket": "${S3Bucket}" - } - }], - "image_builder": [{ - "flavor": "local" - }] - } - } \ No newline at end of file diff --git a/src/zenml/image_builders/base_image_builder.py b/src/zenml/image_builders/base_image_builder.py index b99bb277ec8..4ad38cd8652 100644 --- a/src/zenml/image_builders/base_image_builder.py +++ b/src/zenml/image_builders/base_image_builder.py @@ -25,6 +25,7 @@ from zenml.logger import get_logger from zenml.stack import Flavor, StackComponent from zenml.stack.stack_component import StackComponentConfig +from zenml.utils.archivable import ArchiveType if TYPE_CHECKING: from zenml.container_registries import BaseContainerRegistry @@ -100,6 +101,7 @@ def build( def _upload_build_context( build_context: "BuildContext", parent_path_directory_name: str, + archive_type: ArchiveType = ArchiveType.TAR_GZ, ) -> str: """Uploads a Docker image build context to a remote location. @@ -109,6 +111,7 @@ def _upload_build_context( the build context to. It will be appended to the artifact store path to create the parent path where the build context will be uploaded to. + archive_type: The type of archive to create. Returns: The path to the uploaded build context. @@ -119,7 +122,7 @@ def _upload_build_context( hash_ = hashlib.sha1() # nosec with tempfile.NamedTemporaryFile(mode="w+b", delete=False) as f: - build_context.write_archive(f, use_gzip=True) + build_context.write_archive(f, archive_type) while True: data = f.read(64 * 1024) @@ -127,7 +130,7 @@ def _upload_build_context( break hash_.update(data) - filename = f"{hash_.hexdigest()}.tar.gz" + filename = f"{hash_.hexdigest()}.{archive_type.value}" filepath = f"{parent_path}/{filename}" if not fileio.exists(filepath): logger.info("Uploading build context to `%s`.", filepath) diff --git a/src/zenml/image_builders/build_context.py b/src/zenml/image_builders/build_context.py index e8284cfb446..610348ef1a1 100644 --- a/src/zenml/image_builders/build_context.py +++ b/src/zenml/image_builders/build_context.py @@ -20,7 +20,7 @@ from zenml.io import fileio from zenml.logger import get_logger from zenml.utils import io_utils, string_utils -from zenml.utils.archivable import Archivable +from zenml.utils.archivable import Archivable, ArchiveType logger = get_logger(__name__) @@ -69,28 +69,19 @@ def dockerignore_file(self) -> Optional[str]: return None def write_archive( - self, output_file: IO[bytes], use_gzip: bool = True + self, + output_file: IO[bytes], + archive_type: ArchiveType = ArchiveType.TAR_GZ, ) -> None: """Writes an archive of the build context to the given file. Args: output_file: The file to write the archive to. - use_gzip: Whether to use `gzip` to compress the file. + archive_type: The type of archive to create. """ - from docker.utils import build as docker_build_utils - - files = self.get_files() - extra_files = self.get_extra_files() - - context_archive = docker_build_utils.create_archive( - fileobj=output_file, - root=self._root, - files=sorted(files.keys()), - gzip=use_gzip, - extra_files=list(extra_files.items()), - ) + super().write_archive(output_file, archive_type) - build_context_size = os.path.getsize(context_archive.name) + build_context_size = os.path.getsize(output_file.name) if ( self._root and build_context_size > 50 * 1024 * 1024 diff --git a/src/zenml/integrations/aws/__init__.py b/src/zenml/integrations/aws/__init__.py index 0b8849e24f9..c18c90f4deb 100644 --- a/src/zenml/integrations/aws/__init__.py +++ b/src/zenml/integrations/aws/__init__.py @@ -33,6 +33,7 @@ AWS_CONNECTOR_TYPE = "aws" AWS_RESOURCE_TYPE = "aws-generic" S3_RESOURCE_TYPE = "s3-bucket" +AWS_IMAGE_BUILDER_FLAVOR = "aws" class AWSIntegration(Integration): """Definition of AWS integration for ZenML.""" @@ -59,12 +60,14 @@ def flavors(cls) -> List[Type[Flavor]]: """ from zenml.integrations.aws.flavors import ( AWSContainerRegistryFlavor, + AWSImageBuilderFlavor, SagemakerOrchestratorFlavor, SagemakerStepOperatorFlavor, ) return [ AWSContainerRegistryFlavor, + AWSImageBuilderFlavor, SagemakerStepOperatorFlavor, SagemakerOrchestratorFlavor, ] diff --git a/src/zenml/integrations/aws/flavors/__init__.py b/src/zenml/integrations/aws/flavors/__init__.py index 0e674dd9b5d..a2cdc428add 100644 --- a/src/zenml/integrations/aws/flavors/__init__.py +++ b/src/zenml/integrations/aws/flavors/__init__.py @@ -17,6 +17,10 @@ AWSContainerRegistryConfig, AWSContainerRegistryFlavor, ) +from zenml.integrations.aws.flavors.aws_image_builder_flavor import ( + AWSImageBuilderConfig, + AWSImageBuilderFlavor, +) from zenml.integrations.aws.flavors.sagemaker_orchestrator_flavor import ( SagemakerOrchestratorConfig, SagemakerOrchestratorFlavor, @@ -29,6 +33,8 @@ __all__ = [ "AWSContainerRegistryFlavor", "AWSContainerRegistryConfig", + "AWSImageBuilderConfig", + "AWSImageBuilderFlavor", "SagemakerStepOperatorFlavor", "SagemakerStepOperatorConfig", "SagemakerOrchestratorFlavor", diff --git a/src/zenml/integrations/aws/flavors/aws_image_builder_flavor.py b/src/zenml/integrations/aws/flavors/aws_image_builder_flavor.py new file mode 100644 index 00000000000..4fea01f3a52 --- /dev/null +++ b/src/zenml/integrations/aws/flavors/aws_image_builder_flavor.py @@ -0,0 +1,146 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""AWS Code Build image builder flavor.""" + +from typing import TYPE_CHECKING, Dict, Optional, Type + +from zenml.image_builders import BaseImageBuilderConfig, BaseImageBuilderFlavor +from zenml.integrations.aws import ( + AWS_CONNECTOR_TYPE, + AWS_IMAGE_BUILDER_FLAVOR, + AWS_RESOURCE_TYPE, +) +from zenml.models import ServiceConnectorRequirements + +if TYPE_CHECKING: + from zenml.integrations.aws.image_builders import AWSImageBuilder + + +DEFAULT_CLOUDBUILD_IMAGE = "bentolor/docker-dind-awscli" +DEFAULT_CLOUDBUILD_COMPUTE_TYPE = "BUILD_GENERAL1_SMALL" + + +class AWSImageBuilderConfig(BaseImageBuilderConfig): + """AWS Code Build image builder configuration. + + Attributes: + code_build_project: The name of an existing AWS CodeBuild project to use + to build the image. The CodeBuild project must exist in the AWS + account and region inferred from the AWS service connector + credentials or implicitly from the local AWS config. + build_image: The Docker image to use for the AWS CodeBuild environment. + The image must have Docker installed and be able to run Docker + commands. The default image is bentolor/docker-dind-awscli. + This can be customized to use a mirror, if needed, in case the + Dockerhub image is not accessible or rate-limited. + custom_env_vars: Custom environment variables to pass to the AWS + CodeBuild build. + compute_type: The compute type to use for the AWS CodeBuild build. + The default is BUILD_GENERAL1_SMALL. + implicit_container_registry_auth: Whether to use implicit authentication + to authenticate the AWS Code Build build to the container registry + when pushing container images. If set to False, the container + registry credentials must be explicitly configured for the container + registry stack component or the container registry stack component + must be linked to a service connector. + NOTE: When implicit_container_registry_auth is set to False, the + container registry credentials will be passed to the AWS Code Build + build as environment variables. This is not recommended for + production use unless your service connector is configured to + generate short-lived credentials. + """ + + code_build_project: str + build_image: str = DEFAULT_CLOUDBUILD_IMAGE + custom_env_vars: Optional[Dict[str, str]] = None + compute_type: str = DEFAULT_CLOUDBUILD_COMPUTE_TYPE + implicit_container_registry_auth: bool = True + + +class AWSImageBuilderFlavor(BaseImageBuilderFlavor): + """AWS Code Build image builder flavor.""" + + @property + def name(self) -> str: + """The flavor name. + + Returns: + The name of the flavor. + """ + return AWS_IMAGE_BUILDER_FLAVOR + + @property + def service_connector_requirements( + self, + ) -> Optional[ServiceConnectorRequirements]: + """Service connector resource requirements for service connectors. + + Specifies resource requirements that are used to filter the available + service connector types that are compatible with this flavor. + + Returns: + Requirements for compatible service connectors, if a service + connector is required for this flavor. + """ + return ServiceConnectorRequirements( + connector_type=AWS_CONNECTOR_TYPE, + resource_type=AWS_RESOURCE_TYPE, + ) + + @property + def docs_url(self) -> Optional[str]: + """A url to point at docs explaining this flavor. + + Returns: + A flavor docs url. + """ + return self.generate_default_docs_url() + + @property + def sdk_docs_url(self) -> Optional[str]: + """A url to point at SDK docs explaining this flavor. + + Returns: + A flavor SDK docs url. + """ + return self.generate_default_sdk_docs_url() + + @property + def logo_url(self) -> str: + """A url to represent the flavor in the dashboard. + + Returns: + The flavor logo. + """ + return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/image_builder/aws.png" + + @property + def config_class(self) -> Type[BaseImageBuilderConfig]: + """The config class. + + Returns: + The config class. + """ + return AWSImageBuilderConfig + + @property + def implementation_class(self) -> Type["AWSImageBuilder"]: + """Implementation class. + + Returns: + The implementation class. + """ + from zenml.integrations.aws.image_builders import AWSImageBuilder + + return AWSImageBuilder diff --git a/src/zenml/integrations/aws/image_builders/__init__.py b/src/zenml/integrations/aws/image_builders/__init__.py new file mode 100644 index 00000000000..667ae28e50a --- /dev/null +++ b/src/zenml/integrations/aws/image_builders/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Initialization for the AWS image builder.""" + +from zenml.integrations.aws.image_builders.aws_image_builder import ( + AWSImageBuilder, +) + +__all__ = ["AWSImageBuilder"] diff --git a/src/zenml/integrations/aws/image_builders/aws_image_builder.py b/src/zenml/integrations/aws/image_builders/aws_image_builder.py new file mode 100644 index 00000000000..73319bb819c --- /dev/null +++ b/src/zenml/integrations/aws/image_builders/aws_image_builder.py @@ -0,0 +1,307 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""AWS Code Build image builder implementation.""" + +import time +from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, cast +from urllib.parse import urlparse +from uuid import uuid4 + +import boto3 + +from zenml.enums import StackComponentType +from zenml.image_builders import BaseImageBuilder +from zenml.integrations.aws import ( + AWS_CONTAINER_REGISTRY_FLAVOR, +) +from zenml.integrations.aws.flavors import AWSImageBuilderConfig +from zenml.logger import get_logger +from zenml.stack import StackValidator +from zenml.utils.archivable import ArchiveType + +if TYPE_CHECKING: + from zenml.container_registries import BaseContainerRegistry + from zenml.image_builders import BuildContext + from zenml.stack import Stack + +logger = get_logger(__name__) + + +class AWSImageBuilder(BaseImageBuilder): + """AWS Code Build image builder implementation.""" + + _code_build_client: Optional[Any] = None + + @property + def config(self) -> AWSImageBuilderConfig: + """The stack component configuration. + + Returns: + The configuration. + """ + return cast(AWSImageBuilderConfig, self._config) + + @property + def is_building_locally(self) -> bool: + """Whether the image builder builds the images on the client machine. + + Returns: + True if the image builder builds locally, False otherwise. + """ + return False + + @property + def validator(self) -> Optional["StackValidator"]: + """Validates the stack for the AWS Code Build Image Builder. + + The AWS Code Build Image Builder requires a container registry to + push the image to and an S3 Artifact Store to upload the build context, + so AWS Code Build can access it. + + Returns: + Stack validator. + """ + + def _validate_remote_components(stack: "Stack") -> Tuple[bool, str]: + if stack.artifact_store.flavor != "s3": + return False, ( + "The AWS Image Builder requires an S3 Artifact Store to " + "upload the build context, so AWS Code Build can access it." + "Please update your stack to include an S3 Artifact Store " + "and try again." + ) + + return True, "" + + return StackValidator( + required_components={StackComponentType.CONTAINER_REGISTRY}, + custom_validation_function=_validate_remote_components, + ) + + @property + def code_build_client(self) -> Any: + """The authenticated AWS Code Build client to use for interacting with AWS services. + + Returns: + The authenticated AWS Code Build client. + + Raises: + RuntimeError: If the AWS Code Build client cannot be created. + """ + if ( + self._code_build_client is not None + and self.connector_has_expired() + ): + self._code_build_client = None + if self._code_build_client is not None: + return self._code_build_client + + # Option 1: Service connector + if connector := self.get_connector(): + boto_session = connector.connect() + if not isinstance(boto_session, boto3.Session): + raise RuntimeError( + f"Expected to receive a `boto3.Session` object from the " + f"linked connector, but got type `{type(boto_session)}`." + ) + # Option 2: Implicit configuration + else: + boto_session = boto3.Session() + + self._code_build_client = boto_session.client("codebuild") + return self._code_build_client + + def build( + self, + image_name: str, + build_context: "BuildContext", + docker_build_options: Dict[str, Any], + container_registry: Optional["BaseContainerRegistry"] = None, + ) -> str: + """Builds and pushes a Docker image. + + Args: + image_name: Name of the image to build and push. + build_context: The build context to use for the image. + docker_build_options: Docker build options. + container_registry: Optional container registry to push to. + + Returns: + The Docker image name with digest. + + Raises: + RuntimeError: If no container registry is passed. + RuntimeError: If the Cloud Build build fails. + """ + if not container_registry: + raise RuntimeError( + "The AWS Image Builder requires a container registry to push " + "the image to. Please provide one and try again." + ) + + logger.info("Using AWS Code Build to build image `%s`", image_name) + cloud_build_context = self._upload_build_context( + build_context=build_context, + parent_path_directory_name=f"code-build-contexts/{str(self.id)}", + archive_type=ArchiveType.ZIP, + ) + + url_parts = urlparse(cloud_build_context) + bucket = url_parts.netloc + object_path = url_parts.path.lstrip("/") + logger.info( + "Build context located in bucket `%s` and object path `%s`", + bucket, + object_path, + ) + + # Pass authentication credentials as environment variables, if + # the container registry has credentials and if implicit authentication + # is disabled + environment_variables_override: Dict[str, str] = {} + pre_build_commands = [] + if not self.config.implicit_container_registry_auth: + credentials = container_registry.credentials + if credentials: + environment_variables_override = { + "CONTAINER_REGISTRY_USERNAME": credentials[0], + "CONTAINER_REGISTRY_PASSWORD": credentials[1], + } + pre_build_commands = [ + "echo Logging in to container registry", + 'echo "$CONTAINER_REGISTRY_PASSWORD" | docker login --username "$CONTAINER_REGISTRY_USERNAME" --password-stdin ' + f"{container_registry.config.uri}", + ] + elif container_registry.flavor == AWS_CONTAINER_REGISTRY_FLAVOR: + pre_build_commands = [ + "echo Logging in to EKS", + f"aws ecr get-login-password --region {self.code_build_client._client_config.region_name} | docker login --username AWS --password-stdin {container_registry.config.uri}", + ] + + # Convert the docker_build_options dictionary to a list of strings + docker_build_args = "" + for key, value in docker_build_options.items(): + option = f"--{key}" + if isinstance(value, list): + for val in value: + docker_build_args += f"{option} {val} " + elif value is not None and not isinstance(value, bool): + docker_build_args += f"{option} {value} " + elif value is not False: + docker_build_args += f"{option} " + + pre_build_commands_str = "\n".join( + [f" - {command}" for command in pre_build_commands] + ) + + # Generate and use a unique tag for the Docker image. This is easier + # than trying to parse the image digest from the Code Build logs. + build_id = str(uuid4()) + # Replace the tag in the image name with the unique build ID + repo_name = image_name.split(":")[0] + alt_image_name = f"{repo_name}:{build_id}" + + buildspec = f""" +version: 0.2 +phases: + pre_build: + commands: +{pre_build_commands_str} + build: + commands: + - echo Build started on `date` + - echo Building the Docker image... + - docker build -t {image_name} . {docker_build_args} + - echo Build completed on `date` + post_build: + commands: + - echo Pushing the Docker image... + - docker push {image_name} + - docker tag {image_name} {alt_image_name} + - docker push {alt_image_name} + - echo Pushed the Docker image +artifacts: + files: + - '**/*' +""" + + if self.config.custom_env_vars: + environment_variables_override.update(self.config.custom_env_vars) + + environment_variables_override_list = [ + { + "name": key, + "value": value, + "type": "PLAINTEXT", + } + for key, value in environment_variables_override.items() + ] + + # Override the build project with the parameters needed to run a + # docker-in-docker build, as covered here: https://docs.aws.amazon.com/codebuild/latest/userguide/sample-docker-section.html + response = self.code_build_client.start_build( + projectName=self.config.code_build_project, + environmentTypeOverride="LINUX_CONTAINER", + imageOverride=self.config.build_image, + computeTypeOverride=self.config.compute_type, + privilegedModeOverride=False, + sourceTypeOverride="S3", + sourceLocationOverride=f"{bucket}/{object_path}", + buildspecOverride=buildspec, + environmentVariablesOverride=environment_variables_override_list, + # no artifacts + artifactsOverride={"type": "NO_ARTIFACTS"}, + ) + + build_arn = response["build"]["arn"] + + # Parse the AWS region, account, codebuild project and build name from the ARN + aws_region, aws_account, build = build_arn.split(":", maxsplit=5)[3:6] + codebuild_project = build.split("/")[1].split(":")[0] + + logs_url = f"https://{aws_region}.console.aws.amazon.com/codesuite/codebuild/{aws_account}/projects/{codebuild_project}/{build}/log" + logger.info( + f"Running Code Build to build the Docker image. Cloud Build logs: `{logs_url}`", + ) + + # Wait for the build to complete + code_build_id = response["build"]["id"] + while True: + build_status = self.code_build_client.batch_get_builds( + ids=[code_build_id] + ) + build = build_status["builds"][0] + status = build["buildStatus"] + if status in [ + "SUCCEEDED", + "FAILED", + "FAULT", + "TIMED_OUT", + "STOPPED", + ]: + break + time.sleep(10) + + if status != "SUCCEEDED": + raise RuntimeError( + f"The Code Build run to build the Docker image has failed. More " + f"information can be found in the Cloud Build logs: {logs_url}." + ) + + logger.info( + f"The Docker image has been built successfully. More information can " + f"be found in the Cloud Build logs: `{logs_url}`." + ) + + return alt_image_name diff --git a/src/zenml/integrations/kaniko/image_builders/kaniko_image_builder.py b/src/zenml/integrations/kaniko/image_builders/kaniko_image_builder.py index 311a8f3da34..22074472521 100644 --- a/src/zenml/integrations/kaniko/image_builders/kaniko_image_builder.py +++ b/src/zenml/integrations/kaniko/image_builders/kaniko_image_builder.py @@ -25,6 +25,7 @@ from zenml.integrations.kaniko.flavors import KanikoImageBuilderConfig from zenml.logger import get_logger from zenml.stack import StackValidator +from zenml.utils.archivable import ArchiveType if TYPE_CHECKING: from zenml.container_registries import BaseContainerRegistry @@ -295,7 +296,7 @@ def _write_build_context( logger.debug("Writing build context to process stdin.") assert process.stdin with process.stdin as _, tempfile.TemporaryFile(mode="w+b") as f: - build_context.write_archive(f, use_gzip=True) + build_context.write_archive(f, archive_type=ArchiveType.TAR_GZ) while True: data = f.read(1024) if not data: diff --git a/src/zenml/service_connectors/service_connector_utils.py b/src/zenml/service_connectors/service_connector_utils.py index d97f097faf5..a20f4847ee3 100644 --- a/src/zenml/service_connectors/service_connector_utils.py +++ b/src/zenml/service_connectors/service_connector_utils.py @@ -60,15 +60,9 @@ def _raise_specific_cloud_exception_if_needed( orchestrators: List[ResourcesInfo], container_registries: List[ResourcesInfo], ) -> None: - AWS_DOCS = ( - "https://docs.zenml.io/how-to/infrastructure-deployment/auth-management/aws-service-connector" - ) - GCP_DOCS = ( - "https://docs.zenml.io/how-to/infrastructure-deployment/auth-management/gcp-service-connector" - ) - AZURE_DOCS = ( - "https://docs.zenml.io/how-to/infrastructure-deployment/auth-management/azure-service-connector" - ) + AWS_DOCS = "https://docs.zenml.io/how-to/infrastructure-deployment/auth-management/aws-service-connector" + GCP_DOCS = "https://docs.zenml.io/how-to/infrastructure-deployment/auth-management/gcp-service-connector" + AZURE_DOCS = "https://docs.zenml.io/how-to/infrastructure-deployment/auth-management/azure-service-connector" if not artifact_stores: error_msg = ( diff --git a/src/zenml/stack_deployments/aws_stack_deployment.py b/src/zenml/stack_deployments/aws_stack_deployment.py index 8bebb51ea83..d06d19495ea 100644 --- a/src/zenml/stack_deployments/aws_stack_deployment.py +++ b/src/zenml/stack_deployments/aws_stack_deployment.py @@ -73,6 +73,7 @@ def instructions(cls) -> str: - An ECR repository registered as a [ZenML container registry](https://docs.zenml.io/stack-components/container-registries/aws). - Sagemaker registered as a [ZenML orchestrator](https://docs.zenml.io/stack-components/orchestrators/sagemaker) as well as a [ZenML step operator](https://docs.zenml.io/stack-components/step-operators/sagemaker). +- A CodeBuild project registered as a [ZenML image builder](https://docs.zenml.io/stack-components/image-builder/aws). - An IAM user and IAM role with the minimum necessary permissions to access the above resources. - An AWS access key used to give access to ZenML to connect to the above @@ -158,6 +159,26 @@ def permissions(cls) -> Dict[str, List[str]]: "ecr:PutImage", "ecr:GetAuthorizationToken", ], + "CloudBuild (Client)": [ + "codebuild:CreateProject", + "codebuild:BatchGetBuilds", + ], + "CloudBuild (Service)": [ + "s3:GetObject", + "s3:GetObjectVersion", + "logs:CreateLogGroup", + "logs:CreateLogStream", + "logs:PutLogEvents", + "ecr:BatchGetImage", + "ecr:DescribeImages", + "ecr:BatchCheckLayerAvailability", + "ecr:GetDownloadUrlForLayer", + "ecr:InitiateLayerUpload", + "ecr:UploadLayerPart", + "ecr:CompleteLayerUpload", + "ecr:PutImage", + "ecr:GetAuthorizationToken", + ], "SageMaker (Client)": [ "sagemaker:CreatePipeline", "sagemaker:StartPipelineExecution", @@ -243,6 +264,7 @@ def get_deployment_config( param_ResourceName=f"zenml-{random_str(6).lower()}", param_ZenMLServerURL=self.zenml_server_url, param_ZenMLServerAPIToken=self.zenml_server_api_token, + param_CodeBuild="true", ) # Encode the parameters as URL query parameters query_params = "&".join([f"{k}={v}" for k, v in params.items()]) diff --git a/src/zenml/utils/archivable.py b/src/zenml/utils/archivable.py index c2d7b83c422..23804016cce 100644 --- a/src/zenml/utils/archivable.py +++ b/src/zenml/utils/archivable.py @@ -15,11 +15,21 @@ import io import tarfile +import zipfile from abc import ABC, abstractmethod from pathlib import Path -from typing import IO, Any, Dict +from typing import IO, Any, Dict, Optional from zenml.io import fileio +from zenml.utils.enum_utils import StrEnum + + +class ArchiveType(StrEnum): + """Archive types supported by the ZenML build context.""" + + TAR = "tar" + TAR_GZ = "tar.gz" + ZIP = "zip" class Archivable(ABC): @@ -81,52 +91,71 @@ def add_directory(self, source: str, destination: str) -> None: self._extra_files[file_destination.as_posix()] = f.read() def write_archive( - self, output_file: IO[bytes], use_gzip: bool = True + self, + output_file: IO[bytes], + archive_type: ArchiveType = ArchiveType.TAR_GZ, ) -> None: """Writes an archive of the build context to the given file. Args: output_file: The file to write the archive to. - use_gzip: Whether to use `gzip` to compress the file. + archive_type: The type of archive to create. """ files = self.get_files() extra_files = self.get_extra_files() + close_fileobj: Optional[Any] = None + fileobj: Any = output_file - if use_gzip: - from gzip import GzipFile - - # We don't use the builtin gzip functionality of the `tarfile` - # library as that one includes the tar filename and creation - # timestamp in the archive which causes the hash of the resulting - # file to be different each time. We use this hash to avoid - # duplicate uploads, which is why we pass empty values for filename - # and mtime here. - fileobj: Any = GzipFile( - filename="", mode="wb", fileobj=output_file, mtime=0.0 - ) + if archive_type == ArchiveType.ZIP: + fileobj = zipfile.ZipFile(output_file, "w", zipfile.ZIP_DEFLATED) else: - fileobj = output_file - - with tarfile.open(mode="w", fileobj=fileobj) as tf: - for archive_path, file_path in files.items(): - if archive_path in extra_files: - continue - - if info := tf.gettarinfo(file_path, arcname=archive_path): - if info.isfile(): - with open(file_path, "rb") as f: - tf.addfile(info, f) + if archive_type == ArchiveType.TAR_GZ: + from gzip import GzipFile + + # We don't use the builtin gzip functionality of the `tarfile` + # library as that one includes the tar filename and creation + # timestamp in the archive which causes the hash of the resulting + # file to be different each time. We use this hash to avoid + # duplicate uploads, which is why we pass empty values for filename + # and mtime here. + close_fileobj = fileobj = GzipFile( + filename="", mode="wb", fileobj=output_file, mtime=0.0 + ) + fileobj = tarfile.open(mode="w", fileobj=fileobj) + + try: + with fileobj as af: + for archive_path, file_path in files.items(): + if archive_path in extra_files: + continue + if archive_type == ArchiveType.ZIP: + assert isinstance(af, zipfile.ZipFile) + af.write(file_path, arcname=archive_path) else: - tf.addfile(info, None) - - for archive_path, contents in extra_files.items(): - info = tarfile.TarInfo(archive_path) - contents_encoded = contents.encode("utf-8") - info.size = len(contents_encoded) - tf.addfile(info, io.BytesIO(contents_encoded)) - - if use_gzip: - fileobj.close() + assert isinstance(af, tarfile.TarFile) + if info := af.gettarinfo( + file_path, arcname=archive_path + ): + if info.isfile(): + with open(file_path, "rb") as f: + af.addfile(info, f) + else: + af.addfile(info, None) + + for archive_path, contents in extra_files.items(): + contents_encoded = contents.encode("utf-8") + + if archive_type == ArchiveType.ZIP: + assert isinstance(af, zipfile.ZipFile) + af.writestr(archive_path, contents_encoded) + else: + assert isinstance(af, tarfile.TarFile) + info = tarfile.TarInfo(archive_path) + info.size = len(contents_encoded) + af.addfile(info, io.BytesIO(contents_encoded)) + finally: + if close_fileobj: + close_fileobj.close() output_file.seek(0) diff --git a/src/zenml/utils/code_utils.py b/src/zenml/utils/code_utils.py index d38888aa399..d5d66664a3e 100644 --- a/src/zenml/utils/code_utils.py +++ b/src/zenml/utils/code_utils.py @@ -25,7 +25,7 @@ from zenml.io import fileio from zenml.logger import get_logger from zenml.utils import source_utils, string_utils -from zenml.utils.archivable import Archivable +from zenml.utils.archivable import Archivable, ArchiveType if TYPE_CHECKING: from git.repo.base import Repo @@ -152,15 +152,19 @@ def get_files(self) -> Dict[str, str]: return all_files def write_archive( - self, output_file: IO[bytes], use_gzip: bool = True + self, + output_file: IO[bytes], + archive_type: ArchiveType = ArchiveType.TAR_GZ, ) -> None: """Writes an archive of the build context to the given file. Args: output_file: The file to write the archive to. - use_gzip: Whether to use `gzip` to compress the file. + archive_type: The type of archive to create. """ - super().write_archive(output_file=output_file, use_gzip=use_gzip) + super().write_archive( + output_file=output_file, archive_type=archive_type + ) archive_size = os.path.getsize(output_file.name) if archive_size > 20 * 1024 * 1024: logger.warning(