Skip to content

Commit

Permalink
DPR2-110: Update glue data hub job params. (#3509)
Browse files Browse the repository at this point in the history
* DPR2-110: Update glue data hub job params.

* DPR2-110: Remove dynamo table that previously managed offsets for streaming reporting hub glue job.
  • Loading branch information
tom-ogle-moj authored Sep 29, 2023
1 parent d570cf8 commit 55496d1
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 58 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"reporting_hub_spark_log_level": "INFO",
"reporting_hub_worker_type": "G.1X",
"reporting_hub_num_workers": 4,
"reporting_hub_kinesis_reader_batch_duration_seconds": 30,
"reporting_hub_batch_duration_seconds": 30,
"refresh_job_worker_type": "G.1X",
"refresh_job_num_workers": 2,
"refresh_job_log_level": "INFO",
Expand Down Expand Up @@ -77,7 +77,7 @@
"reporting_hub_spark_log_level": "INFO",
"reporting_hub_worker_type": "G.1X",
"reporting_hub_num_workers": 4,
"reporting_hub_kinesis_reader_batch_duration_seconds": 30,
"reporting_hub_batch_duration_seconds": 30,
"refresh_job_worker_type": "G.1X",
"refresh_job_num_workers": 2,
"refresh_job_log_level": "INFO",
Expand Down Expand Up @@ -143,7 +143,7 @@
"reporting_hub_spark_log_level": "WARN",
"reporting_hub_worker_type": "G.2X",
"reporting_hub_num_workers": 4,
"reporting_hub_kinesis_reader_batch_duration_seconds": 30,
"reporting_hub_batch_duration_seconds": 30,
"refresh_job_worker_type": "G.1X",
"refresh_job_num_workers": 2,
"refresh_job_log_level": "INFO",
Expand Down Expand Up @@ -209,7 +209,7 @@
"reporting_hub_spark_log_level": "WARN",
"reporting_hub_worker_type": "G.2X",
"reporting_hub_num_workers": 4,
"reporting_hub_kinesis_reader_batch_duration_seconds": 40,
"reporting_hub_batch_duration_seconds": 40,
"refresh_job_worker_type": "G.1X",
"refresh_job_num_workers": 2,
"refresh_job_log_level": "INFO",
Expand Down
2 changes: 1 addition & 1 deletion terraform/environments/digital-prison-reporting/locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ locals {
reporting_hub_num_workers = local.application_data.accounts[local.environment].reporting_hub_num_workers
reporting_hub_log_level = local.application_data.accounts[local.environment].reporting_hub_spark_log_level

reporting_hub_kinesis_reader_batch_duration_seconds = local.application_data.accounts[local.environment].reporting_hub_kinesis_reader_batch_duration_seconds
reporting_hub_batch_duration_seconds = local.application_data.accounts[local.environment].reporting_hub_batch_duration_seconds

# Refresh Job
refresh_job_worker_type = local.application_data.accounts[local.environment].refresh_job_worker_type
Expand Down
76 changes: 23 additions & 53 deletions terraform/environments/digital-prison-reporting/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -43,30 +43,28 @@ module "glue_reporting_hub_job" {
)

arguments = {
"--extra-jars" = local.glue_jobs_latest_jar_location
"--job-bookmark-option" = "job-bookmark-disable"
"--class" = "uk.gov.justice.digital.job.DataHubJob"
"--dpr.aws.kinesis.endpointUrl" = "https://kinesis.${local.account_region}.amazonaws.com"
"--dpr.aws.region" = local.account_region
"--dpr.curated.s3.path" = "s3://${module.s3_curated_bucket.bucket_id}/"
"--dpr.kinesis.reader.batchDurationSeconds" = local.reporting_hub_kinesis_reader_batch_duration_seconds
"--dpr.kinesis.reader.streamName" = local.kinesis_stream_ingestor
"--dpr.raw.s3.path" = "s3://${module.s3_raw_bucket.bucket_id}/"
"--dpr.structured.s3.path" = "s3://${module.s3_structured_bucket.bucket_id}/"
"--dpr.violations.s3.path" = "s3://${module.s3_violation_bucket.bucket_id}/"
"--enable-metrics" = true
"--enable-spark-ui" = false
"--enable-auto-scaling" = true
"--enable-job-insights" = true
"--dpr.aws.kinesis.endpointUrl" = "https://kinesis.${local.account_region}.amazonaws.com"
"--dpr.aws.dynamodb.endpointUrl" = "https://dynamodb.${local.account_region}.amazonaws.com"
"--dpr.contract.registryName" = trimprefix(module.glue_registry_avro.registry_name, "${local.glue_avro_registry[0]}/")
"--dpr.domain.registry" = "${local.project}-domain-registry-${local.environment}"
"--dpr.domain.target.path" = "s3://${module.s3_domain_bucket.bucket_id}"
"--dpr.domain.catalog.db" = module.glue_data_domain_database.db_name
"--dpr.redshift.secrets.name" = "${local.project}-redshift-secret-${local.environment}"
"--dpr.datamart.db.name" = "datamart"
"--dpr.log.level" = local.reporting_hub_log_level
"--extra-jars" = local.glue_jobs_latest_jar_location
"--job-bookmark-option" = "job-bookmark-disable"
"--class" = "uk.gov.justice.digital.job.DataHubJob"
"--dpr.kinesis.stream.arn" = module.kinesis_stream_ingestor.kinesis_stream_arn
"--dpr.aws.region" = local.account_region
"--dpr.curated.s3.path" = "s3://${module.s3_curated_bucket.bucket_id}/"
"--dpr.batchDurationSeconds" = local.reporting_hub_batch_duration_seconds
"--dpr.raw.s3.path" = "s3://${module.s3_raw_bucket.bucket_id}/"
"--dpr.structured.s3.path" = "s3://${module.s3_structured_bucket.bucket_id}/"
"--dpr.violations.s3.path" = "s3://${module.s3_violation_bucket.bucket_id}/"
"--enable-metrics" = true
"--enable-spark-ui" = false
"--enable-auto-scaling" = true
"--enable-job-insights" = true
"--dpr.aws.dynamodb.endpointUrl" = "https://dynamodb.${local.account_region}.amazonaws.com"
"--dpr.contract.registryName" = trimprefix(module.glue_registry_avro.registry_name, "${local.glue_avro_registry[0]}/")
"--dpr.domain.registry" = "${local.project}-domain-registry-${local.environment}"
"--dpr.domain.target.path" = "s3://${module.s3_domain_bucket.bucket_id}"
"--dpr.domain.catalog.db" = module.glue_data_domain_database.db_name
"--dpr.redshift.secrets.name" = "${local.project}-redshift-secret-${local.environment}"
"--dpr.datamart.db.name" = "datamart"
"--dpr.log.level" = local.reporting_hub_log_level
}
}

Expand Down Expand Up @@ -722,7 +720,7 @@ module "dms_nomis_ingestor" {

module "dms_fake_data_ingestor" {
source = "./modules/dms"
setup_dms_instance = local.setup_fake_data_dms_instance
setup_dms_instance = local.setup_fake_data_dms_instance
enable_replication_task = local.enable_fake_data_replication_task # Disable Replication Task
name = "${local.project}-dms-fake-data-ingestor-${local.env}"
vpc_cidr = [data.aws_vpc.shared.cidr_block]
Expand Down Expand Up @@ -824,34 +822,6 @@ module "dynamo_tab_domain_registry" {
)
}

# Dynamo Reporting HUB (DPR-340, DPR-378)
module "dynamo_tab_reporting_hub" {
source = "./modules/dynamo_tables"
create_table = true
autoscaling_enabled = false
name = "${local.project}-reporting-hub-${local.environment}"

hash_key = "leaseKey" # Hash
range_key = "" # Sort
table_class = "STANDARD"
ttl_enabled = false

attributes = [
{
name = "leaseKey"
type = "S"
}
]

tags = merge(
local.all_tags,
{
Name = "${local.project}-reporting-hub-${local.environment}"
Resource_Type = "Dynamo Table"
}
)
}

##########################
# Application Backend TF #
##########################
Expand Down

0 comments on commit 55496d1

Please sign in to comment.