Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dpl 1193 metadata api #3278

Merged
merged 14 commits into from
Sep 8, 2023
47 changes: 44 additions & 3 deletions terraform/environments/data-platform/api.tf
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,20 @@ resource "aws_api_gateway_deployment" "deployment" {
# resources will show a difference after the initial implementation.
# It will stabilize to only change when resources change afterwards.
redeployment = sha1(jsonencode([
aws_api_gateway_resource.upload_data.id,
aws_api_gateway_resource.upload_data,
LavMatt marked this conversation as resolved.
Show resolved Hide resolved
aws_api_gateway_resource.get_glue_metadata,
aws_api_gateway_method.upload_data_get.id,
aws_api_gateway_resource.docs,
aws_api_gateway_resource.create_data_product_metadata,
aws_api_gateway_method.upload_data_get,
aws_api_gateway_method.docs,
aws_api_gateway_method.get_glue_metadata,
aws_api_gateway_method.create_data_product_metadata_post,
aws_api_gateway_integration.docs_to_lambda,
aws_api_gateway_integration.upload_data_to_lambda,
aws_api_gateway_integration.proxy_to_lambda,
aws_api_gateway_integration.docs_lambda_root,
aws_api_gateway_integration.get_glue_metadata,
aws_api_gateway_integration.create_data_product_metadata_to_lambda
]))
}

Expand Down Expand Up @@ -186,4 +192,39 @@ resource "aws_api_gateway_integration" "get_glue_metadata" {
"integration.request.querystring.database" = "method.request.querystring.database",
"integration.request.querystring.table" = "method.request.querystring.table"
}
}
}


# create data product metadata API endpoint

resource "aws_api_gateway_resource" "create_data_product_metadata" {
parent_id = aws_api_gateway_rest_api.data_platform.root_resource_id
LavMatt marked this conversation as resolved.
Show resolved Hide resolved
path_part = "register_data_product"
rest_api_id = aws_api_gateway_rest_api.data_platform.id
}

resource "aws_api_gateway_method" "create_data_product_metadata_post" {
authorization = "CUSTOM"
authorizer_id = aws_api_gateway_authorizer.authorizer.id
http_method = "POST"
resource_id = aws_api_gateway_resource.create_data_product_metadata.id
rest_api_id = aws_api_gateway_rest_api.data_platform.id

request_parameters = {
"method.request.header.Authorization" = true
"method.request.querystring.metadata" = true,
}
}

resource "aws_api_gateway_integration" "create_data_product_metadata_to_lambda" {
http_method = aws_api_gateway_method.create_data_product_metadata_post.http_method
resource_id = aws_api_gateway_resource.create_data_product_metadata.id
rest_api_id = aws_api_gateway_rest_api.data_platform.id
integration_http_method = "POST"
type = "AWS_PROXY"
uri = module.data_product_create_metadata_lambda.lambda_function_invoke_arn

request_parameters = {
"integration.request.querystring.metadata" = "method.request.querystring.metadata"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,11 @@
"test": "1.1.1",
"preproduction": "1.1.1",
"production": "1.1.1"
},
"create_metadata_versions": {
"development": "1.0.3",
"test": "1.0.3",
"preproduction": "1.0.3",
"production": "1.0.3"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"type": "object",
"title": "Data Product Metadata",
"description": "Specification for MoJ Data Platform Data Product metadata",
"required": [
"name",
"description",
"domain",
"dataProductOwner",
"dataProductOwnerDisplayName",
"email",
"status",
"dpiaRequired"
],
"properties": {
"name": {
"type": "string",
"description": "The name of the Data Product. Must contain only lowercase letters, numbers, and the underscore character.",
"pattern": "^[a-z0-9_]+$",
"example": "my_data_product"
},
"description": {
"type": "string",
"description": "Detailed description about what functional area this Data Product is representing, what purpose it has and business related information.",
"example": "this data product hold lots of useful information I want to share with those who may have use for it."
},
"domain": {
"type": "string",
"description": "The identifier of the domain this Data Product belongs to. Should be one of HQ, HMPPS, OPG, LAA, HMCTS, CICA, or Platforms",
"example": "HMPPS"
},
"dataProductOwner": {
"type": "string",
"description": "Data Product owner, the unique identifier of the actual user that owns, manages, and receives notifications about the Data Product. To make it technology independent it is usually the email address of the owner.",
"example": "[email protected]"
},
"dataProductOwnerDisplayName": {
"type": "string",
"description": "The human-readable version of dataProductOwner",
"example": "Jane Doe"
},
"dataProductMaintainer": {
"type": "string",
"description": "Secondary party who is able to approve DPIA access requests, but who may or may not be legally responsible for the data",
"example": "[email protected]"
},
"dataProductMaintainerDisplayName": {
"type": "string",
"description": "The human-readable version of dataProductMaintainer",
"example": "Jonny Data"
},
"email": {
"type": "string",
"description": "point of contact between consumers and maintainers of the Data Product. It could be the owner or a distribution list, but must be reliable and responsive.",
"example": "[email protected]"
},
"status": {
"type": "string",
"description": "this is an enum representing the status of this version of the Data Product. Allowed values are: [draft|published|retired]. This is a metadata that communicates the overall status of the Data Product but is not reflected to the actual deployment status.",
"enum": ["draft", "published", "retired"]
},
"dpiaRequired": {
"type": "boolean",
"description": "Bool for if a data privacy impact assessment (dpia) is required to access this data product",
"example": true
},
"dpiaLocation": {
"type": "string",
"description": "Data Privacy Impact Assessment (DPIA) file s3 location for this data product. Generated by data platform."
},
"retentionPeriod": {
"type": "integer",
"description": "Retention period of the data in this data product in days.",
"example": 3650
},
"tags": {
"type": "object",
"description": "Additional tags to add.",
"example": {"sandbox" : true}
},
"version": {
"type": "string",
"description": "Data product version of form [major].[minor]. Generated by data platform."
},
"id": {
"type": "string",
"description": "Data product unique id. Generated by data platform.",
"example": "dp:civil-courts-data:v1.1"
},
"lastUpdated": {
"type": "string",
"description": "Last data upload date to this data product. Generated by data platform."
},
"creationDate": {
"type": "string",
"description": "Creation date of the data product. Generated by data platform."
},
"s3Location": {
"type": "string",
"description": "S3 path to data in this data product. Generated by data platform."
},
"rowCount": {
"type": "object",
"description": "Total row count of all tables in the data product, as a heuristic. Generated by data platform."
}
},
"additionalProperties": false
}
51 changes: 51 additions & 0 deletions terraform/environments/data-platform/iam.tf
Original file line number Diff line number Diff line change
Expand Up @@ -227,3 +227,54 @@ data "aws_iam_policy_document" "data_platform_product_bucket_policy_document" {
}

}

# api gateway create data product metdata permissions
data "aws_iam_policy_document" "iam_policy_document_for_create_metadata_lambda" {
statement {
sid = "GetPutDataObject"
effect = "Allow"
actions = ["s3:GetObject", "s3:PutObject"]
resources = [
"${module.s3-bucket.bucket.arn}/metadata/*",
"${module.s3-bucket.bucket.arn}/data_product_metadata_spec/*"
]
}

statement {
LavMatt marked this conversation as resolved.
Show resolved Hide resolved
sid = "PutDataObject"
effect = "Allow"
actions = ["s3:PutObject"]
resources = [
"${module.s3-bucket.bucket.arn}/logs/*"
]
}

statement {
sid = "ListBucket"
effect = "Allow"
actions = ["s3:ListBucket"]
resources = [module.s3-bucket.bucket.arn, "${module.s3-bucket.bucket.arn}/*"]
}

statement {
sid = "AllowLambdaToCreateLogGroup"
effect = "Allow"
actions = [
"logs:CreateLogGroup"
]
resources = [
format("arn:aws:logs:eu-west-2:%s:*", data.aws_caller_identity.current.account_id)
]
}
statement {
sid = "AllowLambdaToWriteLogsToGroup"
effect = "Allow"
actions = [
"logs:CreateLogStream",
"logs:PutLogEvents"
]
resources = [
format("arn:aws:logs:eu-west-2:%s:*", data.aws_caller_identity.current.account_id)
]
}
}
36 changes: 35 additions & 1 deletion terraform/environments/data-platform/lambda.tf
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ module "data_product_athena_load_lambda" {
policy_json = data.aws_iam_policy_document.athena_load_lambda_function_policy.json
function_name = "data_product_athena_load_${local.environment}"
create_role = true
reserved_concurrent_executions = 1
reserved_concurrent_executions = 100

image_uri = "374269020027.dkr.ecr.eu-west-2.amazonaws.com/data-platform-athena-load-lambda-ecr-repo:${local.athena_load_version}"
timeout = 600
Expand All @@ -151,3 +151,37 @@ module "data_product_athena_load_lambda" {
}

}


module "data_product_create_metadata_lambda" {
source = "github.com/ministryofjustice/modernisation-platform-terraform-lambda-function?ref=v2.0.1"
application_name = "data_product_create_metadata"
tags = local.tags
description = "Lambda to create the first version of a json metadata file for a data product"
role_name = "data_product_metadata_lambda_role_${local.environment}"
policy_json = data.aws_iam_policy_document.iam_policy_document_for_create_metadata_lambda.json
function_name = "data_product_create_metadata_${local.environment}"
create_role = true
reserved_concurrent_executions = 1

image_uri = "374269020027.dkr.ecr.eu-west-2.amazonaws.com/data-platform-create-metadata-lambda-ecr-repo:${local.create_metadata_version}"
timeout = 600
tracing_mode = "Active"
memory_size = 128

environment_variables = {
ENVIRONMENT = local.environment
BUCKET_NAME = module.s3-bucket.bucket.id
}

allowed_triggers = {

AllowExecutionFromAPIGateway = {
action = "lambda:InvokeFunction"
function_name = "data_product_create_metadata_${local.environment}"
principal = "apigateway.amazonaws.com"
source_arn = "arn:aws:execute-api:${local.region}:${local.account_id}:${aws_api_gateway_rest_api.data_platform.id}/*/${aws_api_gateway_method.create_data_product_metadata_post.http_method}${aws_api_gateway_resource.create_data_product_metadata.path}"
}
}

}
1 change: 1 addition & 0 deletions terraform/environments/data-platform/locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,5 @@ locals {
get_glue_metadata_version = lookup(var.get_glue_metadata_versions, local.environment)
presigned_url_version = lookup(var.presigned_url_versions, local.environment)
athena_load_version = lookup(var.athena_load_versions, local.environment)
create_metadata_version = lookup(var.create_metadata_versions, local.environment)
}
12 changes: 11 additions & 1 deletion terraform/environments/data-platform/s3.tf
Original file line number Diff line number Diff line change
Expand Up @@ -121,4 +121,14 @@ module "s3_athena_query_results_bucket" { #tfsec:ignore:aws-s3-enable-versioning
resource "aws_s3_bucket_notification" "bucket_notification" {
bucket = module.s3-bucket.bucket.id
eventbridge = true
}
}

# load the json schema for data product metadata
resource "aws_s3_object" "object" {
bucket = module.s3-bucket.bucket.id
key = "data_product_metadata_spec/v1.0.0/moj_data_product_metadata_spec.json"
source = "data-product-metadata-json-schema/v1.0.0/moj_data_product_metadata_spec.json"
etag = filemd5("data-product-metadata-json-schema/v1.0.0/moj_data_product_metadata_spec.json")
acl = "bucket-owner-full-control"
server_side_encryption = "AES256"
}
6 changes: 5 additions & 1 deletion terraform/environments/data-platform/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,8 @@ variable "presigned_url_versions" {

variable "athena_load_versions" {
type = map(any)
}
}

variable "create_metadata_versions" {
type = map(any)
}
Loading