Skip to content

Commit

Permalink
DPR2-52: Domain maintenance jobs
Browse files Browse the repository at this point in the history
  • Loading branch information
tom-ogle-moj committed Oct 5, 2023
1 parent 30b4e14 commit 6c7fe98
Show file tree
Hide file tree
Showing 3 changed files with 158 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
"compact_curated_job_num_workers": 2,
"compact_curated_job_log_level": "INFO",
"compact_curated_job_schedule": "cron(0 22 * * ? *)",
"compact_domain_job_worker_type": "G.1X",
"compact_domain_job_num_workers": 2,
"compact_domain_job_log_level": "INFO",
"compact_domain_job_schedule": "cron(0 22 * * ? *)",
"retention_raw_job_worker_type": "G.1X",
"retention_raw_job_num_workers": 2,
"retention_raw_job_log_level": "INFO",
Expand All @@ -39,6 +43,10 @@
"retention_curated_job_num_workers": 2,
"retention_curated_job_log_level": "INFO",
"retention_curated_job_schedule": "cron(0 2 * * ? *)",
"retention_domain_job_worker_type": "G.1X",
"retention_domain_job_num_workers": 2,
"retention_domain_job_log_level": "INFO",
"retention_domain_job_schedule": "cron(0 2 * * ? *)",
"create_security_conf": true,
"setup_buckets": true,
"create_kinesis_streams": true,
Expand Down Expand Up @@ -93,6 +101,10 @@
"compact_curated_job_num_workers": 2,
"compact_curated_job_log_level": "INFO",
"compact_curated_job_schedule": "cron(0 22 * * ? *)",
"compact_domain_job_worker_type": "G.1X",
"compact_domain_job_num_workers": 2,
"compact_domain_job_log_level": "INFO",
"compact_domain_job_schedule": "cron(0 22 * * ? *)",
"retention_raw_job_worker_type": "G.1X",
"retention_raw_job_num_workers": 2,
"retention_raw_job_log_level": "INFO",
Expand All @@ -105,6 +117,10 @@
"retention_curated_job_num_workers": 2,
"retention_curated_job_log_level": "INFO",
"retention_curated_job_schedule": "cron(0 2 * * ? *)",
"retention_domain_job_worker_type": "G.1X",
"retention_domain_job_num_workers": 2,
"retention_domain_job_log_level": "INFO",
"retention_domain_job_schedule": "cron(0 2 * * ? *)",
"create_security_conf": true,
"setup_buckets": true,
"create_kinesis_streams": true,
Expand Down Expand Up @@ -159,6 +175,10 @@
"compact_curated_job_num_workers": 2,
"compact_curated_job_log_level": "INFO",
"compact_curated_job_schedule": "cron(0 22 * * ? *)",
"compact_domain_job_worker_type": "G.1X",
"compact_domain_job_num_workers": 2,
"compact_domain_job_log_level": "INFO",
"compact_domain_job_schedule": "cron(0 22 * * ? *)",
"retention_raw_job_worker_type": "G.1X",
"retention_raw_job_num_workers": 2,
"retention_raw_job_log_level": "INFO",
Expand All @@ -171,6 +191,10 @@
"retention_curated_job_num_workers": 2,
"retention_curated_job_log_level": "INFO",
"retention_curated_job_schedule": "cron(0 2 * * ? *)",
"retention_domain_job_worker_type": "G.1X",
"retention_domain_job_num_workers": 2,
"retention_domain_job_log_level": "INFO",
"retention_domain_job_schedule": "cron(0 2 * * ? *)",
"create_security_conf": true,
"setup_buckets": true,
"create_kinesis_streams": true,
Expand Down Expand Up @@ -225,6 +249,10 @@
"compact_curated_job_num_workers": 2,
"compact_curated_job_log_level": "INFO",
"compact_curated_job_schedule": "cron(0 22 * * ? *)",
"compact_domain_job_worker_type": "G.1X",
"compact_domain_job_num_workers": 2,
"compact_domain_job_log_level": "INFO",
"compact_domain_job_schedule": "cron(0 22 * * ? *)",
"retention_raw_job_worker_type": "G.1X",
"retention_raw_job_num_workers": 2,
"retention_raw_job_log_level": "INFO",
Expand All @@ -237,6 +265,10 @@
"retention_curated_job_num_workers": 2,
"retention_curated_job_log_level": "INFO",
"retention_curated_job_schedule": "cron(0 2 * * ? *)",
"retention_domain_job_worker_type": "G.1X",
"retention_domain_job_num_workers": 2,
"retention_domain_job_log_level": "INFO",
"retention_domain_job_schedule": "cron(0 2 * * ? *)",
"create_security_conf": true,
"setup_buckets": true,
"create_kinesis_streams": true,
Expand Down
12 changes: 12 additions & 0 deletions terraform/environments/digital-prison-reporting/locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ locals {
compact_curated_job_log_level = local.application_data.accounts[local.environment].compact_curated_job_log_level
compact_curated_job_schedule = local.application_data.accounts[local.environment].compact_curated_job_schedule

# Compact Domain Job
compact_domain_job_worker_type = local.application_data.accounts[local.environment].compact_domain_job_worker_type
compact_domain_job_num_workers = local.application_data.accounts[local.environment].compact_domain_job_num_workers
compact_domain_job_log_level = local.application_data.accounts[local.environment].compact_domain_job_log_level
compact_domain_job_schedule = local.application_data.accounts[local.environment].compact_domain_job_schedule

# Retention (vacuum) Raw Job
retention_raw_job_worker_type = local.application_data.accounts[local.environment].retention_raw_job_worker_type
retention_raw_job_num_workers = local.application_data.accounts[local.environment].retention_raw_job_num_workers
Expand All @@ -96,6 +102,12 @@ locals {
retention_curated_job_log_level = local.application_data.accounts[local.environment].retention_curated_job_log_level
retention_curated_job_schedule = local.application_data.accounts[local.environment].retention_curated_job_schedule

# Retention (vacuum) Domain Job
retention_domain_job_worker_type = local.application_data.accounts[local.environment].retention_domain_job_worker_type
retention_domain_job_num_workers = local.application_data.accounts[local.environment].retention_domain_job_num_workers
retention_domain_job_log_level = local.application_data.accounts[local.environment].retention_domain_job_log_level
retention_domain_job_schedule = local.application_data.accounts[local.environment].retention_domain_job_schedule

# Common Policies
kms_read_access_policy = "${local.project}_kms_read_policy"
s3_read_access_policy = "${local.project}_s3_read_policy"
Expand Down
120 changes: 114 additions & 6 deletions terraform/environments/digital-prison-reporting/maintenance_jobs.tf
Original file line number Diff line number Diff line change
@@ -1,22 +1,27 @@
locals {
compact_raw_job_name = "${local.project}-maintenance-compact-raw-${local.env}"
compact_raw_job_short_name = "${local.project}-maintenance-compact-raw"
compact_structured_job_name = "${local.project}-maintenance-compact-structured-${local.env}"
compact_structured_job_short_name = "${local.project}-maintenance-compact-structured"
compact_curated_job_name = "${local.project}-maintenance-compact-curated-${local.env}"
compact_curated_job_short_name = "${local.project}-maintenance-compact-curated"
compact_domain_job_short_name = "${local.project}-maintenance-compact-domain"
compact_raw_job_name = "${local.compact_raw_job_short_name}-${local.env}"
compact_structured_job_name = "${local.compact_curated_job_short_name}-${local.env}"
compact_curated_job_name = "${local.compact_curated_job_short_name}-${local.env}"
compact_domain_job_name = "${local.compact_domain_job_short_name}-${local.env}"

retention_raw_job_name = "${local.project}-maintenance-retention-raw-${local.env}"
retention_raw_job_short_name = "${local.project}-maintenance-retention-raw"
retention_structured_job_name = "${local.project}-maintenance-retention-structured-${local.env}"
retention_structured_job_short_name = "${local.project}-maintenance-retention-structured"
retention_curated_job_name = "${local.project}-maintenance-retention-curated-${local.env}"
retention_curated_job_short_name = "${local.project}-maintenance-retention-curated"
retention_domain_job_short_name = "${local.project}-maintenance-retention-domain"
retention_raw_job_name = "${local.retention_raw_job_short_name}-${local.env}"
retention_structured_job_name = "${local.retention_structured_job_short_name}-${local.env}"
retention_curated_job_name = "${local.retention_curated_job_short_name}-${local.env}"
retention_domain_job_name = "${local.retention_domain_job_short_name}-${local.env}"


raw_zone_nomis_path = "s3://${module.s3_raw_bucket.bucket_id}/nomis/"
structured_zone_nomis_path = "s3://${module.s3_structured_bucket.bucket_id}/nomis/"
curated_zone_nomis_path = "s3://${module.s3_curated_bucket.bucket_id}/nomis/"
domain_zone_nomis_path = "s3://${module.s3_domain_bucket.bucket_id}/nomis/"

compact_job_class = "uk.gov.justice.digital.job.CompactionJob"
retention_job_class = "uk.gov.justice.digital.job.VacuumJob"
Expand Down Expand Up @@ -145,6 +150,48 @@ module "glue_compact_curated_job" {
"--dpr.log.level" = local.compact_curated_job_log_level
}
}
# Glue Job, Compact Domain zone
module "glue_compact_domain_job" {
source = "./modules/glue_job"
create_job = local.create_job
name = local.compact_domain_job_name
short_name = local.compact_domain_job_short_name
command_type = "glueetl"
description = "Runs compaction on tables in the domain layer"
create_security_configuration = local.create_sec_conf
job_language = "scala"
temp_dir = "s3://${module.s3_glue_job_bucket.bucket_id}/tmp/${local.compact_domain_job_name}/"
checkpoint_dir = "s3://${module.s3_glue_job_bucket.bucket_id}/checkpoint/${local.compact_domain_job_name}/"
spark_event_logs = "s3://${module.s3_glue_job_bucket.bucket_id}/spark-logs/${local.compact_domain_job_name}/"
# Placeholder Script Location
script_location = local.glue_placeholder_script_location
enable_continuous_log_filter = false
project_id = local.project
aws_kms_key = local.s3_kms_arn
execution_class = "FLEX"
worker_type = local.compact_domain_job_worker_type
number_of_workers = local.compact_domain_job_num_workers
max_concurrent = 1
region = local.account_region
account = local.account_id

tags = merge(
local.all_tags,
{
Name = local.compact_domain_job_name
Resource_Type = "Glue Job"
}
)

arguments = {
"--extra-jars" = local.glue_jobs_latest_jar_location
"--class" = local.compact_job_class
"--dpr.maintenance.root.path" = local.domain_zone_nomis_path
"--datalake-formats" = "delta"
"--dpr.log.level" = local.compact_domain_job_log_level
}
}

# Glue Job, Retention (vacuum) Raw zone
module "glue_retention_raw_job" {
source = "./modules/glue_job"
Expand Down Expand Up @@ -268,6 +315,47 @@ module "glue_retention_curated_job" {
"--dpr.log.level" = local.retention_curated_job_log_level
}
}
# Glue Job, Retention (vacuum) Domain zone
module "glue_retention_domain_job" {
source = "./modules/glue_job"
create_job = local.create_job
name = local.retention_domain_job_name
short_name = local.retention_domain_job_short_name
command_type = "glueetl"
description = "Runs the vacuum retention job on tables in the domain layer"
create_security_configuration = local.create_sec_conf
job_language = "scala"
temp_dir = "s3://${module.s3_glue_job_bucket.bucket_id}/tmp/${local.retention_domain_job_name}/"
checkpoint_dir = "s3://${module.s3_glue_job_bucket.bucket_id}/checkpoint/${local.retention_domain_job_name}/"
spark_event_logs = "s3://${module.s3_glue_job_bucket.bucket_id}/spark-logs/${local.retention_domain_job_name}/"
# Placeholder Script Location
script_location = local.glue_placeholder_script_location
enable_continuous_log_filter = false
project_id = local.project
aws_kms_key = local.s3_kms_arn
execution_class = "FLEX"
worker_type = local.retention_domain_job_worker_type
number_of_workers = local.retention_domain_job_num_workers
max_concurrent = 1
region = local.account_region
account = local.account_id

tags = merge(
local.all_tags,
{
Name = local.retention_domain_job_name
Resource_Type = "Glue Job"
}
)

arguments = {
"--extra-jars" = local.glue_jobs_latest_jar_location
"--class" = local.retention_job_class
"--dpr.maintenance.root.path" = local.domain_zone_nomis_path
"--datalake-formats" = "delta"
"--dpr.log.level" = local.retention_domain_job_log_level
}
}

# Maintenance Job Schedules (triggers)
resource "aws_glue_trigger" "compact_raw_job" {
Expand Down Expand Up @@ -300,6 +388,16 @@ resource "aws_glue_trigger" "compact_curated_job" {
}
}

resource "aws_glue_trigger" "compact_domain_job" {
name = "${local.compact_domain_job_name}-trigger"
schedule = local.compact_domain_job_schedule
type = "SCHEDULED"

actions {
job_name = module.glue_compact_domain_job.name
}
}

resource "aws_glue_trigger" "retention_raw_job" {
name = "${local.retention_raw_job_name}-trigger"
schedule = local.retention_raw_job_schedule
Expand Down Expand Up @@ -328,4 +426,14 @@ resource "aws_glue_trigger" "retention_curated_job" {
actions {
job_name = module.glue_retention_curated_job.name
}
}

resource "aws_glue_trigger" "retention_domain_job" {
name = "${local.retention_domain_job_name}-trigger"
schedule = local.retention_domain_job_schedule
type = "SCHEDULED"

actions {
job_name = module.glue_retention_domain_job.name
}
}

0 comments on commit 6c7fe98

Please sign in to comment.