Skip to content

Commit

Permalink
Support for username placeholder in whitelist (#93)
Browse files Browse the repository at this point in the history
  • Loading branch information
paulineribeyre authored Feb 1, 2024
1 parent 9a19c1b commit b9ac428
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 18 deletions.
9 changes: 7 additions & 2 deletions doc/howto/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ An example manifest entry may look like
"nextflow": {
"enabled": true,
"job-image-whitelist": [
"quay.io/cdis/*:*"
"quay.io/cdis/*:*",
"1234.ecr.aws/nextflow-approved/{{username}}:*"
],
"s3-bucket-whitelist": [
"ngi-igenomes"
Expand Down Expand Up @@ -98,6 +99,10 @@ An example manifest entry may look like
* `authz` describes access rules for this container. See the [Authorization documentation](/doc/explanation/authorization.md) for more details.
* `nextflow` is for configuration specific to Nextflow containers. See the [Nextflow workspaces documentation](/doc/explanation/nextflow.md) for more details.
* `enabled` is false by default; if true, automatically create AWS resources required to run Nextflow workflows in AWS Batch.
* `job-image-whitelist` are the only images that are allowed as Nextflow workflow containers. It supports wildcards `?` for a single character and `*` for multiple characters. Warning: setting the whitelist as an empty list allows all images!
* `job-image-whitelist` are the only images that are allowed as Nextflow workflow containers.
* Supports wildcards `?` for a single character and `*` for multiple characters.
* `{{username}}` can be used as a placeholder for the user's actual (escaped) username.
* **Warning:** setting the whitelist as an empty list allows all images!
* **Warning:** on the ECR side, tags are ignored and users are allowed access to the whole repo.
* `s3-bucket-whitelist` are public buckets that Nextflow jobs are allowed to get data objects from. Access to actions "s3:GetObject" and "s3:ListBucket" for `arn:aws:s3:::<bucket>` and `arn:aws:s3:::<bucket>/*` will be granted.
* `compute-environment-type` ("EC2", "SPOT", "FARGATE" or "FARGATE_SPOT"), `instance-ami`, `instance-type` ("optimal", "g4dn.xlarge"...), `instance-min-vcpus` and `instance-max-vcpus` are AWS Batch Compute Environment settings.
2 changes: 1 addition & 1 deletion hatchery/hatchery.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ func home(w http.ResponseWriter, r *http.Request) {
func getCurrentUserName(r *http.Request) (userName string) {
user := r.Header.Get("REMOTE_USER")
if user == "" {
Config.Logger.Printf("Warning: No username in header REMOTE_USER!")
Config.Logger.Print("Warning: No username in header REMOTE_USER!")
}

// escape username to sanitize input from http header
Expand Down
47 changes: 32 additions & 15 deletions hatchery/nextflow.go
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ func createNextflowResources(userName string, nextflowConfig NextflowConfig) (st
policyName = fmt.Sprintf("%s-nf-%s", hostname, userName)
jobImageWhitelistCondition := "" // if not configured, all images are allowed
if len(nextflowConfig.JobImageWhitelist) > 0 {
jobImageWhitelist := fmt.Sprintf(`"%v"`, strings.Join(nextflowConfig.JobImageWhitelist, "\", \""))
jobImageWhitelist := fmt.Sprintf(`"%v"`, strings.Join(replaceAllUsernamePlaceholders(nextflowConfig.JobImageWhitelist, userName), "\", \""))
jobImageWhitelistCondition = fmt.Sprintf(`,
"Condition": {
"StringLike": {
Expand Down Expand Up @@ -528,13 +528,10 @@ func setupVpcAndSquid(ec2Svc *ec2.EC2, userName string, hostname string) (*strin

// Function to make sure launch template is created, and configured correctly
// We need a launch template since we need a user data script to authenticate with private ECR repositories
func ensureLaunchTemplate(ec2Svc *ec2.EC2, userName string, hostname string) (*string, error) {
func ensureLaunchTemplate(ec2Svc *ec2.EC2, userName string, hostname string, jobImageWhitelist []string) (*string, error) {

// user data script to authenticate with private ECR repositories
userData, err := generateUserData(userName)
if err != nil {
return nil, err
}
userData := generateEcrLoginUserData(jobImageWhitelist, userName)

launchTemplateName := fmt.Sprintf("%s-nf-%s", hostname, userName)

Expand Down Expand Up @@ -585,7 +582,7 @@ func createBatchComputeEnvironment(userName string, hostname string, tagsMap map
}

// the launch template for the compute envrionment must be user-specific as well
launchTemplateName, err := ensureLaunchTemplate(ec2Svc, userName, hostname)
launchTemplateName, err := ensureLaunchTemplate(ec2Svc, userName, hostname, nextflowConfig.JobImageWhitelist)
if err != nil {
return "", err
}
Expand Down Expand Up @@ -1553,22 +1550,42 @@ workDir = '%s'`,
return configContents, nil
}

func replaceAllUsernamePlaceholders(strArray []string, userName string) []string {
var result []string
for _, str := range strArray {
result = append(result, strings.Replace(str, "{{username}}", userName, -1))
}
return result
}

// function to generate user data
func generateUserData(userName string) (string, error) {
// TODO: read repo from config
approvedRepo := "143731057154.dkr.ecr.us-east-1.amazonaws.com/nextflow-approved"
func generateEcrLoginUserData(jobImageWhitelist []string, userName string) string {
var ecrRepos []string
for _, image := range replaceAllUsernamePlaceholders(jobImageWhitelist, userName) {
if strings.Contains(image, ".ecr.") {
// NOTE: on the ECR side, tags are ignored and users are allowed access to the whole repo.
repo := strings.Split(image, ":")[0]
ecrRepos = append(ecrRepos, repo)
}
}

// TODO: read region from config
userData := base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf(`MIME-Version: 1.0
runCmd := ""
for _, approvedRepo := range ecrRepos {
runCmd += fmt.Sprintf(`
- aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin %s`, approvedRepo)
}

userData := fmt.Sprintf(`MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="==MYBOUNDARY=="
--==MYBOUNDARY==
Content-Type: text/cloud-config; charset="us-ascii"
packages:
- aws-cli
runcmd:
- aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin %s/%s
--==MYBOUNDARY==--`, approvedRepo, userName)))
return userData, nil
runcmd:%s
--==MYBOUNDARY==--`, runCmd)

return base64.StdEncoding.EncodeToString([]byte(userData))
}
51 changes: 51 additions & 0 deletions hatchery/nextflow_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package hatchery

import (
"encoding/base64"
"fmt"
"testing"
)

func TestReplaceAllUsernamePlaceholders(t *testing.T) {
defer SetupAndTeardownTest()()

initialArray := []string{"quay.io/cdis/*:*", "1234.ecr.aws/nextflow-repo/{{username}}"}
userName := "test-escaped-username"
replacedArray := replaceAllUsernamePlaceholders(initialArray, userName)
expectedOutput := []string{"quay.io/cdis/*:*", fmt.Sprintf("1234.ecr.aws/nextflow-repo/%s", userName)}

errMsg := fmt.Sprintf("The 'replaceUsernamePlaceholder' function should have returned the expected output '%v', but it returned: '%v'", expectedOutput, replacedArray)
if len(replacedArray) != len(expectedOutput) {
t.Error(errMsg)
}
for i := range replacedArray {
if replacedArray[i] != expectedOutput[i] {
t.Error(errMsg)
}
}
}

func TestGenerateEcrLoginUserData(t *testing.T) {
defer SetupAndTeardownTest()()

jobImageWhitelist := []string{"1234.ecr.aws/repo1:tagA", "1234.ecr.aws/repo/without/tag", "quay.io/cdis/*:*", "1234.ecr.aws/nextflow-repo/{{username}}:tagB"}
userName := "test-escaped-username"
userData := generateEcrLoginUserData(jobImageWhitelist, userName)
expectedOutput := `MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="==MYBOUNDARY=="
--==MYBOUNDARY==
Content-Type: text/cloud-config; charset="us-ascii"
packages:
- aws-cli
runcmd:
- aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 1234.ecr.aws/repo1
- aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 1234.ecr.aws/repo/without/tag
- aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 1234.ecr.aws/nextflow-repo/test-escaped-username
--==MYBOUNDARY==--`

if userData != base64.StdEncoding.EncodeToString([]byte(expectedOutput)) {
t.Errorf("The 'generateEcrLoginUserData' function should have returned the expected output '%v', but it returned: '%v'", expectedOutput, userData)
}
}

0 comments on commit b9ac428

Please sign in to comment.