diff --git a/doc/howto/configuration.md b/doc/howto/configuration.md index 5fd5cd61..112b63a7 100644 --- a/doc/howto/configuration.md +++ b/doc/howto/configuration.md @@ -48,7 +48,8 @@ An example manifest entry may look like "nextflow": { "enabled": true, "job-image-whitelist": [ - "quay.io/cdis/*:*" + "quay.io/cdis/*:*", + "1234.ecr.aws/nextflow-approved/{{username}}:*" ], "s3-bucket-whitelist": [ "ngi-igenomes" @@ -98,6 +99,10 @@ An example manifest entry may look like * `authz` describes access rules for this container. See the [Authorization documentation](/doc/explanation/authorization.md) for more details. * `nextflow` is for configuration specific to Nextflow containers. See the [Nextflow workspaces documentation](/doc/explanation/nextflow.md) for more details. * `enabled` is false by default; if true, automatically create AWS resources required to run Nextflow workflows in AWS Batch. - * `job-image-whitelist` are the only images that are allowed as Nextflow workflow containers. It supports wildcards `?` for a single character and `*` for multiple characters. Warning: setting the whitelist as an empty list allows all images! + * `job-image-whitelist` are the only images that are allowed as Nextflow workflow containers. + * Supports wildcards `?` for a single character and `*` for multiple characters. + * `{{username}}` can be used as a placeholder for the user's actual (escaped) username. + * **Warning:** setting the whitelist as an empty list allows all images! + * **Warning:** on the ECR side, tags are ignored and users are allowed access to the whole repo. * `s3-bucket-whitelist` are public buckets that Nextflow jobs are allowed to get data objects from. Access to actions "s3:GetObject" and "s3:ListBucket" for `arn:aws:s3:::` and `arn:aws:s3:::/*` will be granted. * `compute-environment-type` ("EC2", "SPOT", "FARGATE" or "FARGATE_SPOT"), `instance-ami`, `instance-type` ("optimal", "g4dn.xlarge"...), `instance-min-vcpus` and `instance-max-vcpus` are AWS Batch Compute Environment settings. diff --git a/hatchery/hatchery.go b/hatchery/hatchery.go index a76f438a..7e49261f 100644 --- a/hatchery/hatchery.go +++ b/hatchery/hatchery.go @@ -63,7 +63,7 @@ func home(w http.ResponseWriter, r *http.Request) { func getCurrentUserName(r *http.Request) (userName string) { user := r.Header.Get("REMOTE_USER") if user == "" { - Config.Logger.Printf("Warning: No username in header REMOTE_USER!") + Config.Logger.Print("Warning: No username in header REMOTE_USER!") } // escape username to sanitize input from http header diff --git a/hatchery/nextflow.go b/hatchery/nextflow.go index 10d4f5d9..f4afbd84 100644 --- a/hatchery/nextflow.go +++ b/hatchery/nextflow.go @@ -250,7 +250,7 @@ func createNextflowResources(userName string, nextflowConfig NextflowConfig) (st policyName = fmt.Sprintf("%s-nf-%s", hostname, userName) jobImageWhitelistCondition := "" // if not configured, all images are allowed if len(nextflowConfig.JobImageWhitelist) > 0 { - jobImageWhitelist := fmt.Sprintf(`"%v"`, strings.Join(nextflowConfig.JobImageWhitelist, "\", \"")) + jobImageWhitelist := fmt.Sprintf(`"%v"`, strings.Join(replaceAllUsernamePlaceholders(nextflowConfig.JobImageWhitelist, userName), "\", \"")) jobImageWhitelistCondition = fmt.Sprintf(`, "Condition": { "StringLike": { @@ -528,13 +528,10 @@ func setupVpcAndSquid(ec2Svc *ec2.EC2, userName string, hostname string) (*strin // Function to make sure launch template is created, and configured correctly // We need a launch template since we need a user data script to authenticate with private ECR repositories -func ensureLaunchTemplate(ec2Svc *ec2.EC2, userName string, hostname string) (*string, error) { +func ensureLaunchTemplate(ec2Svc *ec2.EC2, userName string, hostname string, jobImageWhitelist []string) (*string, error) { // user data script to authenticate with private ECR repositories - userData, err := generateUserData(userName) - if err != nil { - return nil, err - } + userData := generateEcrLoginUserData(jobImageWhitelist, userName) launchTemplateName := fmt.Sprintf("%s-nf-%s", hostname, userName) @@ -585,7 +582,7 @@ func createBatchComputeEnvironment(userName string, hostname string, tagsMap map } // the launch template for the compute envrionment must be user-specific as well - launchTemplateName, err := ensureLaunchTemplate(ec2Svc, userName, hostname) + launchTemplateName, err := ensureLaunchTemplate(ec2Svc, userName, hostname, nextflowConfig.JobImageWhitelist) if err != nil { return "", err } @@ -1553,13 +1550,33 @@ workDir = '%s'`, return configContents, nil } +func replaceAllUsernamePlaceholders(strArray []string, userName string) []string { + var result []string + for _, str := range strArray { + result = append(result, strings.Replace(str, "{{username}}", userName, -1)) + } + return result +} + // function to generate user data -func generateUserData(userName string) (string, error) { - // TODO: read repo from config - approvedRepo := "143731057154.dkr.ecr.us-east-1.amazonaws.com/nextflow-approved" +func generateEcrLoginUserData(jobImageWhitelist []string, userName string) string { + var ecrRepos []string + for _, image := range replaceAllUsernamePlaceholders(jobImageWhitelist, userName) { + if strings.Contains(image, ".ecr.") { + // NOTE: on the ECR side, tags are ignored and users are allowed access to the whole repo. + repo := strings.Split(image, ":")[0] + ecrRepos = append(ecrRepos, repo) + } + } // TODO: read region from config - userData := base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf(`MIME-Version: 1.0 + runCmd := "" + for _, approvedRepo := range ecrRepos { + runCmd += fmt.Sprintf(` +- aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin %s`, approvedRepo) + } + + userData := fmt.Sprintf(`MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="==MYBOUNDARY==" --==MYBOUNDARY== @@ -1567,8 +1584,8 @@ Content-Type: text/cloud-config; charset="us-ascii" packages: - aws-cli -runcmd: -- aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin %s/%s ---==MYBOUNDARY==--`, approvedRepo, userName))) - return userData, nil +runcmd:%s +--==MYBOUNDARY==--`, runCmd) + + return base64.StdEncoding.EncodeToString([]byte(userData)) } diff --git a/hatchery/nextflow_test.go b/hatchery/nextflow_test.go new file mode 100644 index 00000000..1c824f5e --- /dev/null +++ b/hatchery/nextflow_test.go @@ -0,0 +1,51 @@ +package hatchery + +import ( + "encoding/base64" + "fmt" + "testing" +) + +func TestReplaceAllUsernamePlaceholders(t *testing.T) { + defer SetupAndTeardownTest()() + + initialArray := []string{"quay.io/cdis/*:*", "1234.ecr.aws/nextflow-repo/{{username}}"} + userName := "test-escaped-username" + replacedArray := replaceAllUsernamePlaceholders(initialArray, userName) + expectedOutput := []string{"quay.io/cdis/*:*", fmt.Sprintf("1234.ecr.aws/nextflow-repo/%s", userName)} + + errMsg := fmt.Sprintf("The 'replaceUsernamePlaceholder' function should have returned the expected output '%v', but it returned: '%v'", expectedOutput, replacedArray) + if len(replacedArray) != len(expectedOutput) { + t.Error(errMsg) + } + for i := range replacedArray { + if replacedArray[i] != expectedOutput[i] { + t.Error(errMsg) + } + } +} + +func TestGenerateEcrLoginUserData(t *testing.T) { + defer SetupAndTeardownTest()() + + jobImageWhitelist := []string{"1234.ecr.aws/repo1:tagA", "1234.ecr.aws/repo/without/tag", "quay.io/cdis/*:*", "1234.ecr.aws/nextflow-repo/{{username}}:tagB"} + userName := "test-escaped-username" + userData := generateEcrLoginUserData(jobImageWhitelist, userName) + expectedOutput := `MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary="==MYBOUNDARY==" + +--==MYBOUNDARY== +Content-Type: text/cloud-config; charset="us-ascii" + +packages: +- aws-cli +runcmd: +- aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 1234.ecr.aws/repo1 +- aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 1234.ecr.aws/repo/without/tag +- aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 1234.ecr.aws/nextflow-repo/test-escaped-username +--==MYBOUNDARY==--` + + if userData != base64.StdEncoding.EncodeToString([]byte(expectedOutput)) { + t.Errorf("The 'generateEcrLoginUserData' function should have returned the expected output '%v', but it returned: '%v'", expectedOutput, userData) + } +}