Skip to content

Commit

Permalink
[Integ-test]Fix test_dynamic_file_systems_update sporadic failures
Browse files Browse the repository at this point in the history
The failure was produced from CLI validation, like: "There is no existing Mount Target for EFS \'fs-0901ff973c69b0469\' in these Availability Zones: \'[\'eu-west-2a\']\'. Please create an EFS Mount Target for those availability zones.".
The root cause was the EFS created by integration tests does not have mount targets in all availability zones.

The fix is to create mount targets in all availability zones. To achieve this, `storage-stack.yaml` becomes a Jinja template on top of CloudFormation template.

Signed-off-by: Hanwen <[email protected]>
  • Loading branch information
hanwen-cluster committed Feb 22, 2023
1 parent 00153e4 commit d6758d5
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 55 deletions.
79 changes: 39 additions & 40 deletions tests/integration-tests/tests/update/test_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import os.path
import re
import time
from collections import defaultdict

import boto3
import pytest
Expand Down Expand Up @@ -841,46 +842,44 @@ def _test_update_queue_strategy_with_running_job(
def external_shared_storage_stack(request, test_datadir, region, vpc_stack: CfnVpcStack, cfn_stacks_factory):
def create_stack(bucket_name):
template_path = os.path.join(str(test_datadir), "storage-stack.yaml")
with open(template_path, encoding="utf-8") as template_file:
template = template_file.read()
option = "external_shared_storage_stack_name"
if request.config.getoption(option):
stack = CfnStack(name=request.config.getoption(option), region=region, template=template)
else:
# Choose subnets from different availability zones
subnet_ids = vpc_stack.get_all_public_subnets() + vpc_stack.get_all_private_subnets()
subnets = boto3.client("ec2").describe_subnets(SubnetIds=subnet_ids)["Subnets"]
available_subnet_ids = [subnets[0]["SubnetId"]]
for subnet in subnets:
if subnet["AvailabilityZone"] != subnets[0]["AvailabilityZone"]:
available_subnet_ids.append(subnet["SubnetId"])
break

vpc = vpc_stack.cfn_outputs["VpcId"]
public_subnet_id = vpc_stack.get_public_subnet()
subnet_id0 = available_subnet_ids[0]
subnet_id1 = available_subnet_ids[1]
import_path = "s3://{0}".format(bucket_name)
export_path = "s3://{0}/export_dir".format(bucket_name)
params = [
{"ParameterKey": "vpc", "ParameterValue": vpc},
{"ParameterKey": "PublicSubnetId", "ParameterValue": public_subnet_id},
{"ParameterKey": "SubnetId0", "ParameterValue": subnet_id0},
{"ParameterKey": "SubnetId1", "ParameterValue": subnet_id1},
{"ParameterKey": "ImportPathParam", "ParameterValue": import_path},
{"ParameterKey": "ExportPathParam", "ParameterValue": export_path},
]
stack = CfnStack(
name=utils.generate_stack_name(
"integ-tests-external-shared-storage", request.config.getoption("stackname_suffix")
),
region=region,
parameters=params,
template=template,
capabilities=["CAPABILITY_IAM"],
)
cfn_stacks_factory.create_stack(stack)
return stack
option = "external_shared_storage_stack_name"
if request.config.getoption(option):
stack = CfnStack(name=request.config.getoption(option), region=region, template=None)
else:
# Choose subnets from different availability zones
subnet_ids = vpc_stack.get_all_public_subnets() + vpc_stack.get_all_private_subnets()
subnets = boto3.client("ec2").describe_subnets(SubnetIds=subnet_ids)["Subnets"]
subnets_by_az = defaultdict(list)
for subnet in subnets:
subnets_by_az[subnet["AvailabilityZone"]].append(subnet["SubnetId"])

utils.render_jinja_template(
template_path, one_subnet_per_az=[subnets[0] for subnets in subnets_by_az.values()]
)

vpc = vpc_stack.cfn_outputs["VpcId"]
public_subnet_id = vpc_stack.get_public_subnet()
import_path = "s3://{0}".format(bucket_name)
export_path = "s3://{0}/export_dir".format(bucket_name)
params = [
{"ParameterKey": "vpc", "ParameterValue": vpc},
{"ParameterKey": "PublicSubnetId", "ParameterValue": public_subnet_id},
{"ParameterKey": "ImportPathParam", "ParameterValue": import_path},
{"ParameterKey": "ExportPathParam", "ParameterValue": export_path},
]
with open(template_path, encoding="utf-8") as template_file:
template = template_file.read()
stack = CfnStack(
name=utils.generate_stack_name(
"integ-tests-external-shared-storage", request.config.getoption("stackname_suffix")
),
region=region,
parameters=params,
template=template,
capabilities=["CAPABILITY_IAM"],
)
cfn_stacks_factory.create_stack(stack)
return stack

yield create_stack

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,6 @@ AWSTemplateFormatVersion: 2010-09-09
Description: >-
This template creates EFS, FSX Lustre, FSX Ontap, FSX Open ZFS storage that can be attached to a cluster.
Parameters:
SubnetId0:
Type: String
Default: ''
SubnetId1:
Type: String
Default: ''
PublicSubnetId:
Type: String
Default: ''
Expand All @@ -23,20 +17,15 @@ Parameters:
Resources:
FileSystemResource0:
Type: 'AWS::EFS::FileSystem'
MountTargetResourceEfs0Subnet0:
Properties:
FileSystemId: !Ref FileSystemResource0
SecurityGroups:
- !Ref SecurityGroupResource
SubnetId: !Ref SubnetId0
Type: 'AWS::EFS::MountTarget'
MountTargetResourceEfs0Subnet1:
{% for subnet in one_subnet_per_az %}
MountTargetResourceEfs0Subnet{{loop.index}}:
Properties:
FileSystemId: !Ref FileSystemResource0
SecurityGroups:
- !Ref SecurityGroupResource
SubnetId: !Ref SubnetId1
SubnetId: {{subnet}}
Type: 'AWS::EFS::MountTarget'
{% endfor %}
SecurityGroupIngressResource0:
Properties:
CidrIp: 192.168.0.0/17
Expand Down

0 comments on commit d6758d5

Please sign in to comment.