diff --git a/.gitignore b/.gitignore index f40463a..364e889 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ setup-scripts/dump.rx4-partial.csv setup-scripts/road_config.csv -setup-scripts/space_and_time_continuum.csv \ No newline at end of file +setup-scripts/space_and_time_continuum.csv +venv/ +**/__pycache__ \ No newline at end of file diff --git a/add-dags-to-composer.cloudbuild.yaml b/add-dags-to-composer.cloudbuild.yaml new file mode 100644 index 0000000..b843aa9 --- /dev/null +++ b/add-dags-to-composer.cloudbuild.yaml @@ -0,0 +1,45 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START composer_cicd_dagsync_yaml] +steps: + # install dependencies for production runtime + + - name: python:3.8-slim + entrypoint: pip + args: ["install", "-r", "requirements.txt", "--user"] + + # install dependencies for testing + - name: python:3.8-slim + entrypoint: pip + args: ["install", "-r", "requirements-test.txt", "--user"] + + # run in python 3.8 which is latest version in Cloud Composer + - name: python:3.8-slim + entrypoint: python3.8 + args: ["-m", "pytest", "-s", "dags/"] + + # install dependencies for copying + - name: python + entrypoint: pip + args: ["install", "-r", "utils/requirements.txt", "--user"] + + # run + - name: python + entrypoint: python + args: ["utils/add_dags_to_composer.py", "--dags_directory=${_DAGS_DIRECTORY}", "--dags_bucket=${_DAGS_BUCKET}"] + +options: + logging: CLOUD_LOGGING_ONLY +# [END composer_cicd_dagsync_yaml] \ No newline at end of file diff --git a/dags/first_dag.py b/dags/first_dag.py index da9ac43..a347ff5 100644 --- a/dags/first_dag.py +++ b/dags/first_dag.py @@ -24,14 +24,16 @@ from airflow.operators import bash, email, python_operator from airflow.providers.google.cloud.operators import bigquery from airflow.providers.google.cloud.transfers import bigquery_to_gcs +from airflow.models.variable import Variable from airflow.utils import trigger_rule +from RGCustomOperator import RGCustomOperator target_dataset_name = "greenhat_summary" target_table_name = "readings_by_street" location = "us-central1" -project_id = "composer-workshop" -gcs_bucket = "{{params.output_gcs_bucket}}" +project_id = "qwiklabs-gcp-00-f71c1a9b29a4" +gcs_bucket = "{{ params.output_gcs_bucket }}" csv_output_file = f"gs://{gcs_bucket}/street_readings.csv" avro_output_file = f"gs://{gcs_bucket}/street_readings.avro" @@ -50,10 +52,10 @@ AVG(readings.co_ppm) AS avg_co_ppm, MIN(readings.timestamp) AS min_timestamp, MAX(readings.timestamp) AS max_timestamp, - FROM `composer-workshop.greenhat.readings` AS readings - LEFT JOIN `composer-workshop.greenhat.space_and_time_continuum` AS space_and_time + FROM `qwiklabs-gcp-00-f71c1a9b29a4.greenhat.readings` AS readings + LEFT JOIN `qwiklabs-gcp-00-f71c1a9b29a4.greenhat.space_and_time_continuum` AS space_and_time ON readings.timestamp = space_and_time.timestamp - LEFT JOIN `composer-workshop.greenhat.road_config` AS roads + LEFT JOIN `qwiklabs-gcp-00-f71c1a9b29a4.greenhat.road_config` AS roads ON space_and_time.road_id = roads.road_id WHERE readings.timestamp >= CAST('{{ params.min_query_date }}' AS TIMESTAMP) AND readings.timestamp < CAST('{{ params.max_query_date }}' AS TIMESTAMP) @@ -82,14 +84,10 @@ "min_query_date": Param("2022-01-01", type="string"), "bq_dataset_name": Param("greenhat_summary", type="string"), "business_datetime": Param(default_business_datetime, type="string"), - "gcp_project": Param("composer-workshop", type="string"), - "output_gcs_bucket": Param("composer-workshop-data-output", type="string") + "gcp_project": Param("qwiklabs-gcp-00-f71c1a9b29a4", type="string"), + "output_gcs_bucket": Param("qwiklabs-gcp-00-f71c1a9b29a4-data-output", type="string") }, ) as dag: - def greeting(): - import logging - logging.info("Goodbye!") - # Create BigQuery output dataset. make_bq_dataset = bash.BashOperator( task_id="make_bq_dataset", @@ -128,9 +126,10 @@ def greeting(): export_format="AVRO", ) - this_is_the_end = python_operator.PythonOperator( - task_id="goodbye", - python_callable=greeting, + this_is_the_end = RGCustomOperator( + task_id="this_is_the_end", + connection="{{ var.value.greeting }}--" + Variable.get("greeting"), + param="hard-coded param", ) diff --git a/dags/first_dag_test.py b/dags/first_dag_test.py new file mode 100644 index 0000000..7b8d4dd --- /dev/null +++ b/dags/first_dag_test.py @@ -0,0 +1,7 @@ +import internal_unit_testing + + +def test_dag_import(): + from dags import first_dag + + internal_unit_testing.assert_has_valid_dag(first_dag) \ No newline at end of file diff --git a/plugins/RGCustomOperator.py b/plugins/RGCustomOperator.py index 1d44a82..a0b3c80 100644 --- a/plugins/RGCustomOperator.py +++ b/plugins/RGCustomOperator.py @@ -24,4 +24,4 @@ def execute(self, context): Execution function of RGCustomOperator """ #Write logs if needed - self.log.info(f"Simulate some connection to a random off-cloud Mongo Cluster; using connection {self.connection} and a random param of {self.param}") + self.log.info(f"Batman Says: Simulate some connection to a random off-cloud Mongo Cluster; using connection {self.connection} and a random param of {self.param}") diff --git a/requirements-test.txt b/requirements-test.txt new file mode 100644 index 0000000..3acfa4f --- /dev/null +++ b/requirements-test.txt @@ -0,0 +1,2 @@ +pytest==7.0.1 +cloud-composer-dag-test-utils==1.0.0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a714d6b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +# be sure to update the constraints file to match +# see https://airflow.apache.org/docs/apache-airflow/stable/installation/installing-from-pypi.html#constraints-files +apache-airflow==2.5.3 \ No newline at end of file diff --git a/terraform/.terraform.lock.hcl b/terraform/.terraform.lock.hcl new file mode 100644 index 0000000..a57cd4d --- /dev/null +++ b/terraform/.terraform.lock.hcl @@ -0,0 +1,21 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/google" { + version = "5.2.0" + hashes = [ + "h1:GuKgYrg5q36jxuqrntYHEhnCSsHoNm0tb1af8x8+WLc=", + "zh:1d4c5b154d4764a0e3e8893193dc71ba5a4cdb2d9d9dd20f69312cc75399b038", + "zh:26c5c6ad5edc27c643f43d950ffe982267b732723a09fef74c672ede7a7459f7", + "zh:2b48824692ecc7fe8ae3366010a7cf8b441aa2ecb4b6e9777638952844eff19e", + "zh:2f77cbb0528e58228117c7976e8864e7604614123c8b33d7329ffb0d084505b9", + "zh:408e6a680c4b7235dc677b8ba6ccbda0bf07ffcbd3d13767474eea2c5177488f", + "zh:68c2e914cf71ff490b4dbc6487900c35f702285cb0047614eccafb6ff057b748", + "zh:849052c81c2ea4c703b22af9ae524d3f45e42c7e9a3553c1ff7a95f49fde6886", + "zh:8f764a4ddcd5eea9f81cc72bb2fd29e2549a91b66faf8df8583c584298a26a86", + "zh:dddc597b4af5e2dc772ec4291e39daffb4dc46f2cccde1d3a6d2cbe8d291743d", + "zh:de9752d744bd91fd35e589fea0d8a72f983fe6fc872cfd19841758dcb8629a3b", + "zh:ec40d112e5022e2ba408bdfab1fd2d4f30c0183db02a771fdf26cd3a8c7e9949", + "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + ] +} diff --git a/terraform/.terraform/providers/registry.terraform.io/hashicorp/google/5.2.0/linux_amd64/terraform-provider-google_v5.2.0_x5 b/terraform/.terraform/providers/registry.terraform.io/hashicorp/google/5.2.0/linux_amd64/terraform-provider-google_v5.2.0_x5 new file mode 100755 index 0000000..eb6e4fd Binary files /dev/null and b/terraform/.terraform/providers/registry.terraform.io/hashicorp/google/5.2.0/linux_amd64/terraform-provider-google_v5.2.0_x5 differ diff --git a/terraform/main.tf b/terraform/main.tf index fc0a54d..d7f3779 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -53,9 +53,23 @@ resource "google_service_account_iam_member" "custom_service_account" { } resource "google_composer_environment" "example_environment" { - // ---- Challenge ---- // - // Create a cloud composer 2 instance following the documenation below: - // https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/composer_environment#with-gke-and-compute-resource-dependencies + provider = google + name = "terraform-composer-environment" + + config { + software_config { + image_version = "composer-2.4.5-airflow-2.5.3" + } + + environment_size = "ENVIRONMENT_SIZE_SMALL" + + node_config { + service_account = google_service_account.custom_service_account.email + network = google_compute_network.test.id + subnetwork = google_compute_subnetwork.test.id + } + + } } resource "google_compute_network" "test" { diff --git a/terraform/terraform.tfstate b/terraform/terraform.tfstate new file mode 100644 index 0000000..b00ce6e --- /dev/null +++ b/terraform/terraform.tfstate @@ -0,0 +1,395 @@ +{ + "version": 4, + "terraform_version": "1.6.0", + "serial": 10, + "lineage": "3a1343f0-29fb-7a83-4762-a0309850d634", + "outputs": {}, + "resources": [ + { + "mode": "data", + "type": "google_project", + "name": "project", + "provider": "provider[\"registry.terraform.io/hashicorp/google\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "auto_create_network": null, + "billing_account": "01FC4B-769C43-7DEE71", + "effective_labels": { + "fleet": "gcpd" + }, + "folder_id": "559694388641", + "id": "projects/qwiklabs-gcp-00-f71c1a9b29a4", + "labels": { + "fleet": "gcpd" + }, + "name": "qwiklabs-gcp-00-f71c1a9b29a4", + "number": "240840094949", + "org_id": "", + "project_id": "qwiklabs-gcp-00-f71c1a9b29a4", + "skip_delete": null, + "terraform_labels": { + "fleet": "gcpd" + } + }, + "sensitive_attributes": [] + } + ] + }, + { + "mode": "managed", + "type": "google_composer_environment", + "name": "example_environment", + "provider": "provider[\"registry.terraform.io/hashicorp/google\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "config": [ + { + "airflow_uri": "https://0c57b1eeb19d424a8407e11d4ca8e519-dot-us-central1.composer.googleusercontent.com", + "dag_gcs_prefix": "gs://us-central1-terraform-compo-b7c5c91f-bucket/dags", + "database_config": [ + { + "machine_type": "" + } + ], + "encryption_config": [ + { + "kms_key_name": "" + } + ], + "environment_size": "ENVIRONMENT_SIZE_SMALL", + "gke_cluster": "projects/qwiklabs-gcp-00-f71c1a9b29a4/locations/us-central1/clusters/us-central1-terraform-compo-b7c5c91f-gke", + "maintenance_window": [ + { + "end_time": "1970-01-01T04:00:00Z", + "recurrence": "FREQ=WEEKLY;BYDAY=FR,SA,SU", + "start_time": "1970-01-01T00:00:00Z" + } + ], + "master_authorized_networks_config": [], + "node_config": [ + { + "disk_size_gb": 0, + "enable_ip_masq_agent": false, + "ip_allocation_policy": [ + { + "cluster_ipv4_cidr_block": "", + "cluster_secondary_range_name": "", + "services_ipv4_cidr_block": "", + "services_secondary_range_name": "", + "use_ip_aliases": false + } + ], + "machine_type": "", + "network": "projects/qwiklabs-gcp-00-f71c1a9b29a4/global/networks/composer-test-network", + "oauth_scopes": [], + "service_account": "custom-service-account@qwiklabs-gcp-00-f71c1a9b29a4.iam.gserviceaccount.com", + "subnetwork": "projects/qwiklabs-gcp-00-f71c1a9b29a4/regions/us-central1/subnetworks/composer-test-subnetwork", + "tags": null, + "zone": "" + } + ], + "node_count": 0, + "private_environment_config": [ + { + "cloud_composer_connection_subnetwork": "", + "cloud_composer_network_ipv4_cidr_block": "172.31.245.0/24", + "cloud_sql_ipv4_cidr_block": "10.0.0.0/12", + "connection_type": "", + "enable_private_endpoint": false, + "enable_privately_used_public_ips": false, + "master_ipv4_cidr_block": "", + "web_server_ipv4_cidr_block": "" + } + ], + "recovery_config": [], + "resilience_mode": "STANDARD_RESILIENCE", + "software_config": [ + { + "airflow_config_overrides": null, + "env_variables": null, + "image_version": "composer-2.4.5-airflow-2.5.3", + "pypi_packages": null, + "python_version": "", + "scheduler_count": 0 + } + ], + "web_server_config": [], + "web_server_network_access_control": [ + { + "allowed_ip_range": [ + { + "description": "Allows access from all IPv4 addresses (default value)", + "value": "0.0.0.0/0" + }, + { + "description": "Allows access from all IPv6 addresses (default value)", + "value": "::0/0" + } + ] + } + ], + "workloads_config": [ + { + "scheduler": [ + { + "count": 1, + "cpu": 0.5, + "memory_gb": 2, + "storage_gb": 1 + } + ], + "web_server": [ + { + "cpu": 0.5, + "memory_gb": 2, + "storage_gb": 1 + } + ], + "worker": [ + { + "cpu": 0.5, + "max_count": 3, + "memory_gb": 2, + "min_count": 1, + "storage_gb": 1 + } + ] + } + ] + } + ], + "effective_labels": {}, + "id": "projects/qwiklabs-gcp-00-f71c1a9b29a4/locations/us-central1/environments/terraform-composer-environment", + "labels": null, + "name": "terraform-composer-environment", + "project": "qwiklabs-gcp-00-f71c1a9b29a4", + "region": "us-central1", + "terraform_labels": {}, + "timeouts": null + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjo3MjAwMDAwMDAwMDAwLCJkZWxldGUiOjE4MDAwMDAwMDAwMDAsInVwZGF0ZSI6NzIwMDAwMDAwMDAwMH19", + "dependencies": [ + "google_compute_network.test", + "google_compute_subnetwork.test", + "google_service_account.custom_service_account" + ] + } + ] + }, + { + "mode": "managed", + "type": "google_compute_network", + "name": "test", + "provider": "provider[\"registry.terraform.io/hashicorp/google\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "auto_create_subnetworks": false, + "delete_default_routes_on_create": false, + "description": "", + "enable_ula_internal_ipv6": false, + "gateway_ipv4": "", + "id": "projects/qwiklabs-gcp-00-f71c1a9b29a4/global/networks/composer-test-network", + "internal_ipv6_range": "", + "mtu": 0, + "name": "composer-test-network", + "network_firewall_policy_enforcement_order": "AFTER_CLASSIC_FIREWALL", + "project": "qwiklabs-gcp-00-f71c1a9b29a4", + "routing_mode": "REGIONAL", + "self_link": "https://www.googleapis.com/compute/v1/projects/qwiklabs-gcp-00-f71c1a9b29a4/global/networks/composer-test-network", + "timeouts": null + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjoxMjAwMDAwMDAwMDAwLCJkZWxldGUiOjEyMDAwMDAwMDAwMDAsInVwZGF0ZSI6MTIwMDAwMDAwMDAwMH19" + } + ] + }, + { + "mode": "managed", + "type": "google_compute_subnetwork", + "name": "test", + "provider": "provider[\"registry.terraform.io/hashicorp/google\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "creation_timestamp": "2023-10-19T02:33:23.427-07:00", + "description": "", + "external_ipv6_prefix": "", + "fingerprint": null, + "gateway_address": "10.2.0.1", + "id": "projects/qwiklabs-gcp-00-f71c1a9b29a4/regions/us-central1/subnetworks/composer-test-subnetwork", + "internal_ipv6_prefix": "", + "ip_cidr_range": "10.2.0.0/16", + "ipv6_access_type": "", + "ipv6_cidr_range": "", + "log_config": [], + "name": "composer-test-subnetwork", + "network": "https://www.googleapis.com/compute/v1/projects/qwiklabs-gcp-00-f71c1a9b29a4/global/networks/composer-test-network", + "private_ip_google_access": false, + "private_ipv6_google_access": "DISABLE_GOOGLE_ACCESS", + "project": "qwiklabs-gcp-00-f71c1a9b29a4", + "purpose": "PRIVATE", + "region": "us-central1", + "role": "", + "secondary_ip_range": [], + "self_link": "https://www.googleapis.com/compute/v1/projects/qwiklabs-gcp-00-f71c1a9b29a4/regions/us-central1/subnetworks/composer-test-subnetwork", + "stack_type": "IPV4_ONLY", + "timeouts": null + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjoxMjAwMDAwMDAwMDAwLCJkZWxldGUiOjEyMDAwMDAwMDAwMDAsInVwZGF0ZSI6MTIwMDAwMDAwMDAwMH19", + "dependencies": [ + "google_compute_network.test" + ] + } + ] + }, + { + "mode": "managed", + "type": "google_project_iam_member", + "name": "custom_service_account", + "provider": "provider[\"registry.terraform.io/hashicorp/google\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "condition": [], + "etag": "BwYIDnF2sTw=", + "id": "qwiklabs-gcp-00-f71c1a9b29a4/roles/composer.worker/serviceAccount:custom-service-account@qwiklabs-gcp-00-f71c1a9b29a4.iam.gserviceaccount.com", + "member": "serviceAccount:custom-service-account@qwiklabs-gcp-00-f71c1a9b29a4.iam.gserviceaccount.com", + "project": "qwiklabs-gcp-00-f71c1a9b29a4", + "role": "roles/composer.worker" + }, + "sensitive_attributes": [], + "private": "bnVsbA==", + "dependencies": [ + "google_service_account.custom_service_account" + ] + } + ] + }, + { + "mode": "managed", + "type": "google_project_service", + "name": "cloud_build_api", + "provider": "provider[\"registry.terraform.io/hashicorp/google\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "disable_dependent_services": null, + "disable_on_destroy": false, + "id": "qwiklabs-gcp-00-f71c1a9b29a4/cloudbuild.googleapis.com", + "project": "qwiklabs-gcp-00-f71c1a9b29a4", + "service": "cloudbuild.googleapis.com", + "timeouts": null + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjoxMjAwMDAwMDAwMDAwLCJkZWxldGUiOjEyMDAwMDAwMDAwMDAsInJlYWQiOjYwMDAwMDAwMDAwMCwidXBkYXRlIjoxMjAwMDAwMDAwMDAwfX0=" + } + ] + }, + { + "mode": "managed", + "type": "google_project_service", + "name": "composer_api", + "provider": "provider[\"registry.terraform.io/hashicorp/google\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "disable_dependent_services": null, + "disable_on_destroy": false, + "id": "qwiklabs-gcp-00-f71c1a9b29a4/composer.googleapis.com", + "project": "qwiklabs-gcp-00-f71c1a9b29a4", + "service": "composer.googleapis.com", + "timeouts": null + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjoxMjAwMDAwMDAwMDAwLCJkZWxldGUiOjEyMDAwMDAwMDAwMDAsInJlYWQiOjYwMDAwMDAwMDAwMCwidXBkYXRlIjoxMjAwMDAwMDAwMDAwfX0=" + } + ] + }, + { + "mode": "managed", + "type": "google_project_service", + "name": "secret_manager_api", + "provider": "provider[\"registry.terraform.io/hashicorp/google\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "disable_dependent_services": null, + "disable_on_destroy": false, + "id": "qwiklabs-gcp-00-f71c1a9b29a4/secretmanager.googleapis.com", + "project": "qwiklabs-gcp-00-f71c1a9b29a4", + "service": "secretmanager.googleapis.com", + "timeouts": null + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjoxMjAwMDAwMDAwMDAwLCJkZWxldGUiOjEyMDAwMDAwMDAwMDAsInJlYWQiOjYwMDAwMDAwMDAwMCwidXBkYXRlIjoxMjAwMDAwMDAwMDAwfX0=" + } + ] + }, + { + "mode": "managed", + "type": "google_service_account", + "name": "custom_service_account", + "provider": "provider[\"registry.terraform.io/hashicorp/google\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "account_id": "custom-service-account", + "description": "", + "disabled": false, + "display_name": "Terraform Custom Service Account", + "email": "custom-service-account@qwiklabs-gcp-00-f71c1a9b29a4.iam.gserviceaccount.com", + "id": "projects/qwiklabs-gcp-00-f71c1a9b29a4/serviceAccounts/custom-service-account@qwiklabs-gcp-00-f71c1a9b29a4.iam.gserviceaccount.com", + "member": "serviceAccount:custom-service-account@qwiklabs-gcp-00-f71c1a9b29a4.iam.gserviceaccount.com", + "name": "projects/qwiklabs-gcp-00-f71c1a9b29a4/serviceAccounts/custom-service-account@qwiklabs-gcp-00-f71c1a9b29a4.iam.gserviceaccount.com", + "project": "qwiklabs-gcp-00-f71c1a9b29a4", + "timeouts": null, + "unique_id": "106830394426121340517" + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjozMDAwMDAwMDAwMDB9fQ==" + } + ] + }, + { + "mode": "managed", + "type": "google_service_account_iam_member", + "name": "custom_service_account", + "provider": "provider[\"registry.terraform.io/hashicorp/google\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "condition": [], + "etag": "BwYIDnFsZ4M=", + "id": "projects/qwiklabs-gcp-00-f71c1a9b29a4/serviceAccounts/custom-service-account@qwiklabs-gcp-00-f71c1a9b29a4.iam.gserviceaccount.com/roles/composer.ServiceAgentV2Ext/serviceAccount:service-240840094949@cloudcomposer-accounts.iam.gserviceaccount.com", + "member": "serviceAccount:service-240840094949@cloudcomposer-accounts.iam.gserviceaccount.com", + "role": "roles/composer.ServiceAgentV2Ext", + "service_account_id": "projects/qwiklabs-gcp-00-f71c1a9b29a4/serviceAccounts/custom-service-account@qwiklabs-gcp-00-f71c1a9b29a4.iam.gserviceaccount.com" + }, + "sensitive_attributes": [], + "private": "bnVsbA==", + "dependencies": [ + "data.google_project.project", + "google_project_service.composer_api", + "google_service_account.custom_service_account" + ] + } + ] + } + ], + "check_results": null +} diff --git a/terraform/variables.tf b/terraform/variables.tf index 615e97c..1dc1511 100644 --- a/terraform/variables.tf +++ b/terraform/variables.tf @@ -1,7 +1,7 @@ variable "my_gcp_project" { - default = "your-gcp-project-id" + default = "qwiklabs-gcp-00-f71c1a9b29a4" } variable "region" { - default = "gcp region" # example "us-central1" + default = "us-central1" # example "us-central1" } \ No newline at end of file diff --git a/test-dags.cloudbuild.yaml b/test-dags.cloudbuild.yaml new file mode 100644 index 0000000..ff1cf32 --- /dev/null +++ b/test-dags.cloudbuild.yaml @@ -0,0 +1,15 @@ + +steps: + # install dependencies + - name: python:3.8-slim + entrypoint: pip + args: ["install", "-r", "requirements.txt", "--user"] + + - name: python:3.8-slim + entrypoint: pip + args: ["install", "-r", "requirements-test.txt", "--user"] + + # run in python 3.8 which is latest version in Cloud Composer + - name: python:3.8-slim + entrypoint: python3.8 + args: ["-m", "pytest", "-s", "dags/"] \ No newline at end of file diff --git a/utils/add_dags_to_composer.py b/utils/add_dags_to_composer.py new file mode 100644 index 0000000..e4aa1bf --- /dev/null +++ b/utils/add_dags_to_composer.py @@ -0,0 +1,101 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START composer_cicd_add_dags_to_composer_utility] +from __future__ import annotations + +import argparse +import glob +import os +from shutil import copytree, ignore_patterns +import tempfile + +# Imports the Google Cloud client library +from google.cloud import storage + + +def _create_dags_list(dags_directory: str) -> tuple[str, list[str]]: + temp_dir = tempfile.mkdtemp() + + # ignore non-DAG Python files + files_to_ignore = ignore_patterns("__init__.py", "*_test.py") + + # Copy everything but the ignored files to a temp directory + copytree(dags_directory, f"{temp_dir}/", ignore=files_to_ignore, dirs_exist_ok=True) + + # The only Python files left in our temp directory are DAG files + # so we can exclude all non Python files + dags = glob.glob(f"{temp_dir}/*.py") + return (temp_dir, dags) + + +def upload_dags_to_composer( + dags_directory: str, bucket_name: str, name_replacement: str = "dags/" +) -> None: + """ + Given a directory, this function moves all DAG files from that directory + to a temporary directory, then uploads all contents of the temporary directory + to a given cloud storage bucket + Args: + dags_directory (str): a fully qualified path to a directory that contains a "dags/" subdirectory + bucket_name (str): the GCS bucket of the Cloud Composer environment to upload DAGs to + name_replacement (str, optional): the name of the "dags/" subdirectory that will be used when constructing the temporary directory path name Defaults to "dags/". + """ + temp_dir, dags = _create_dags_list(dags_directory) + + if len(dags) > 0: + # Note - the GCS client library does not currently support batch requests on uploads + # if you have a large number of files, consider using + # the Python subprocess module to run gsutil -m cp -r on your dags + # See https://cloud.google.com/storage/docs/gsutil/commands/cp for more info + storage_client = storage.Client() + bucket = storage_client.bucket(bucket_name) + + for dag in dags: + # Remove path to temp dir + dag = dag.replace(f"{temp_dir}/", name_replacement) + + try: + # Upload to your bucket + blob = bucket.blob(dag) + blob.upload_from_filename(dag) + print(f"File {dag} uploaded to {bucket_name}/{dag}.") + except FileNotFoundError: + current_directory = os.listdir() + print( + f"{name_replacement} directory not found in {current_directory}, you may need to override the default value of name_replacement to point to a relative directory" + ) + raise + + else: + print("No DAGs to upload.") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument( + "--dags_directory", + help="Relative path to the source directory containing your DAGs", + ) + parser.add_argument( + "--dags_bucket", + help="Name of the DAGs bucket of your Composer environment without the gs:// prefix", + ) + + args = parser.parse_args() + + upload_dags_to_composer(args.dags_directory, args.dags_bucket) +# [END composer_cicd_add_dags_to_composer_utility] \ No newline at end of file diff --git a/utils/requirements.txt b/utils/requirements.txt new file mode 100644 index 0000000..da71b29 --- /dev/null +++ b/utils/requirements.txt @@ -0,0 +1 @@ +google-cloud-storage==2.9.0