Skip to content

Commit

Permalink
[Login Nodes] Add lifecycle hook for node launch in login nodes ASG
Browse files Browse the repository at this point in the history
Signed-off-by: Giacomo Marciani <[email protected]>
  • Loading branch information
hehe7318 authored and gmarciani committed Jul 21, 2023
1 parent 8ab2c74 commit 30ff88b
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 20 deletions.
4 changes: 4 additions & 0 deletions cli/src/pcluster/resources/login_node/user_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -216,5 +216,9 @@ else
timeout ${Timeout} /tmp/bootstrap.sh || error_exit
fi

# Notify the AutoScalingGroup about the successful bootstrap
IMDS_TOKEN=$(curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 300")
INSTANCE_ID=$(curl -H "X-aws-ec2-metadata-token: $IMDS_TOKEN" -v http://169.254.169.254/latest/meta-data/instance-id)
aws autoscaling complete-lifecycle-action --auto-scaling-group-name "${AutoScalingGroupName}" --lifecycle-hook-name "${LaunchingLifecycleHookName}" --instance-id "$INSTANCE_ID" --lifecycle-action-result CONTINUE --region "${AWS::Region}"
# End of file
--==BOUNDARY==
15 changes: 15 additions & 0 deletions cli/src/pcluster/templates/cdk_builder_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -848,7 +848,9 @@ def __init__(
node: Union[HeadNode, BaseQueue, LoginNodesPool],
shared_storage_infos: dict,
name: str,
auto_scaling_group_name: str,
):
self._auto_scaling_group_name = auto_scaling_group_name
super().__init__(scope, id, config, node, shared_storage_infos, name)

def _build_policy(self) -> List[iam.PolicyStatement]:
Expand All @@ -872,6 +874,19 @@ def _build_policy(self) -> List[iam.PolicyStatement]:
)
],
),
iam.PolicyStatement(
sid="Autoscaling",
actions=[
"autoscaling:CompleteLifecycleAction",
],
effect=iam.Effect.ALLOW,
resources=[
self._format_arn(
service="autoscaling",
resource=f"autoScalingGroupName/{self._auto_scaling_group_name}",
)
],
),
]


Expand Down
28 changes: 23 additions & 5 deletions cli/src/pcluster/templates/login_nodes_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def _add_login_node_iam_resources(self):
self._pool,
self._shared_storage_infos,
self._pool.name,
f"{self._login_nodes_stack_id}-AutoScalingGroup",
)
self._instance_profile = self._iam_resource.instance_profile
self._instance_role = self._iam_resource.instance_role
Expand Down Expand Up @@ -193,6 +194,10 @@ def _add_login_nodes_pool_launch_template(self):
NODE_BOOTSTRAP_TIMEOUT,
)
),
"AutoScalingGroupName": f"{self._login_nodes_stack_id}-AutoScalingGroup",
"LaunchingLifecycleHookName": (
f"{self._login_nodes_stack_id}-LoginNodesLaunchingLifecycleHook"
),
},
**get_common_user_data_env(self._pool, self._config),
},
Expand Down Expand Up @@ -225,27 +230,40 @@ def _add_login_nodes_pool_auto_scaling_group(self):
auto_scaling_group = autoscaling.CfnAutoScalingGroup(
self,
f"{self._login_nodes_stack_id}-AutoScalingGroup",
auto_scaling_group_name=f"{self._login_nodes_stack_id}-AutoScalingGroup",
launch_template=launch_template_specification,
min_size=str(self._pool.count),
max_size=str(self._pool.count),
desired_capacity=str(self._pool.count),
target_group_arns=[self._login_nodes_pool_target_group.node.default_child.ref],
vpc_zone_identifier=self._pool.networking.subnet_ids,
)

self._add_lifecycle_hook(auto_scaling_group)
self.terminating_lifecycle_hook = self._add_terminating_lifecycle_hook(auto_scaling_group)
self.launching_lifecycle_hook = self._add_launching_lifecycle_hook(auto_scaling_group)

return auto_scaling_group

def _add_lifecycle_hook(self, auto_scaling_group):
def _add_terminating_lifecycle_hook(self, auto_scaling_group):
return autoscaling.CfnLifecycleHook(
self,
"LoginNodesASGLifecycleHook",
"LoginNodesASGLifecycleHookTerminating",
auto_scaling_group_name=auto_scaling_group.ref,
lifecycle_transition="autoscaling:EC2_INSTANCE_TERMINATING",
lifecycle_hook_name=f"{self._login_nodes_stack_id}-LoginNodesTerminatingLifecycleHook",
heartbeat_timeout=self._pool.gracetime_period * 60,
)

def _add_launching_lifecycle_hook(self, auto_scaling_group):
return autoscaling.CfnLifecycleHook(
self,
"LoginNodesASGLifecycleHookLaunching",
auto_scaling_group_name=auto_scaling_group.ref,
lifecycle_hook_name=f"{self._login_nodes_stack_id}-LoginNodesLaunchingLifecycleHook",
lifecycle_transition="autoscaling:EC2_INSTANCE_LAUNCHING",
default_result="ABANDON",
heartbeat_timeout=600,
)

def _add_login_nodes_pool_target_group(self):
return elbv2.NetworkTargetGroup(
self,
Expand Down Expand Up @@ -321,7 +339,7 @@ def _add_resources(self):
for pool in self._login_nodes.pools:
pool_construct = Pool(
self,
f"Pool{pool.name}",
f"{self._config.cluster_name}-{pool.name}",
pool,
self._config,
self._log_group,
Expand Down
66 changes: 51 additions & 15 deletions cli/tests/pcluster/templates/test_cluster_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,8 @@ def assert_lifecycle_hook_properties(self, template, resource_name: str):
assert resource["Type"] == "AWS::AutoScaling::LifecycleHook"
properties = resource["Properties"]
assert properties["LifecycleTransition"] == self.expected_lifecycle_transition
assert properties["HeartbeatTimeout"] == self.expected_heartbeat_timeout
if "HeartbeatTimeout" in properties:
assert properties["HeartbeatTimeout"] == self.expected_heartbeat_timeout


class IamRoleAssertion:
Expand Down Expand Up @@ -570,6 +571,10 @@ def assert_iam_policy_properties(self, template, resource_name: str):
expected_lifecycle_transition="autoscaling:EC2_INSTANCE_TERMINATING",
expected_heartbeat_timeout=7200,
),
LifecycleHookAssertion(
expected_lifecycle_transition="autoscaling:EC2_INSTANCE_LAUNCHING",
expected_heartbeat_timeout=600,
),
IamRoleAssertion(expected_managed_policy_arn="arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy"),
IamPolicyAssertion(
expected_statements=[
Expand All @@ -596,6 +601,25 @@ def assert_iam_policy_properties(self, template, resource_name: str):
},
"Sid": "S3GetObj",
},
{
"Action": "autoscaling:CompleteLifecycleAction",
"Effect": "Allow",
"Resource": {
"Fn::Join": [
"",
[
"arn:",
{"Ref": "AWS::Partition"},
":autoscaling:",
{"Ref": "AWS::Region"},
":",
{"Ref": "AWS::AccountId"},
":autoScalingGroupName/clustername-testloginnodespool1-AutoScalingGroup",
],
]
},
"Sid": "Autoscaling",
},
]
),
],
Expand All @@ -615,21 +639,25 @@ def test_login_nodes_traffic_management_resources_values_properties(
)

asset_content_asg = get_asset_content_with_resource_name(
cdk_assets, "Pooltestloginnodespool1Pooltestloginnodespool1AutoScalingGroup41053D91"
cdk_assets, "clusternametestloginnodespool1clusternametestloginnodespool1AutoScalingGroup5EBA3937"
)
asset_content_nlb = get_asset_content_with_resource_name(
cdk_assets, "Pooltestloginnodespool1testloginnodespool1LoadBalancer18C3DA82"
cdk_assets, "clusternametestloginnodespool1testloginnodespool1LoadBalancerE1D4FCC7"
)
asset_content_target_group = get_asset_content_with_resource_name(
cdk_assets, "Pooltestloginnodespool1testloginnodespool1TargetGroupD150DBF2"
cdk_assets, "clusternametestloginnodespool1testloginnodespool1TargetGroup713F5EC5"
)
asset_content_nlb_listener = get_asset_content_with_resource_name(
cdk_assets,
"Pooltestloginnodespool1testloginnodespool1LoadBalancerLoginNodesListenertestloginnodespool1727E619B",
"clusternametestloginnodespool1testloginnodespool1LoadBalancerLoginNodesListenertestloginnodespool165B4D3DC",
)
asset_content_lifecycle_hook_terminating = get_asset_content_with_resource_name(
cdk_assets,
"clusternametestloginnodespool1LoginNodesASGLifecycleHookTerminating51CA6203",
)
asset_content_lifecycle_hook = get_asset_content_with_resource_name(
asset_content_lifecycle_hook_launching = get_asset_content_with_resource_name(
cdk_assets,
"Pooltestloginnodespool1LoginNodesASGLifecycleHookE54B2467",
"clusternametestloginnodespool1LoginNodesASGLifecycleHookLaunching879DBA56",
)
asset_content_iam_role = get_asset_content_with_resource_name(
cdk_assets,
Expand All @@ -639,29 +667,37 @@ def test_login_nodes_traffic_management_resources_values_properties(
cdk_assets,
"ParallelClusterPoliciesA50bdea9651dc48c",
)
print(cdk_assets)
for lt_assertion in lt_assertions:
if isinstance(lt_assertion, AutoScalingGroupAssertion):
lt_assertion.assert_asg_properties(
asset_content_asg, "Pooltestloginnodespool1Pooltestloginnodespool1AutoScalingGroup41053D91"
asset_content_asg,
"clusternametestloginnodespool1clusternametestloginnodespool1AutoScalingGroup5EBA3937",
)
elif isinstance(lt_assertion, NetworkLoadBalancerAssertion):
lt_assertion.assert_nlb_properties(
asset_content_nlb, "Pooltestloginnodespool1testloginnodespool1LoadBalancer18C3DA82"
asset_content_nlb, "clusternametestloginnodespool1testloginnodespool1LoadBalancerE1D4FCC7"
)
elif isinstance(lt_assertion, TargetGroupAssertion):
lt_assertion.assert_tg_properties(
asset_content_target_group, "Pooltestloginnodespool1testloginnodespool1TargetGroupD150DBF2"
asset_content_target_group, "clusternametestloginnodespool1testloginnodespool1TargetGroup713F5EC5"
)
elif isinstance(lt_assertion, NetworkLoadBalancerListenerAssertion):
lt_assertion.assert_nlb_listener_properties(
asset_content_nlb_listener,
"Pooltestloginnodespool1testloginnodespool1LoadBalancerLoginNodesListenertestloginnodespool1727E619B",
"clusternametestloginnodespool1testloginnodespool1"
"LoadBalancerLoginNodesListenertestloginnodespool165B4D3DC",
)
elif isinstance(lt_assertion, LifecycleHookAssertion):
lt_assertion.assert_lifecycle_hook_properties(
asset_content_lifecycle_hook, "Pooltestloginnodespool1LoginNodesASGLifecycleHookE54B2467"
)
if lt_assertion.expected_lifecycle_transition == "autoscaling:EC2_INSTANCE_TERMINATING":
lt_assertion.assert_lifecycle_hook_properties(
asset_content_lifecycle_hook_terminating,
"clusternametestloginnodespool1LoginNodesASGLifecycleHookTerminating51CA6203",
)
else:
lt_assertion.assert_lifecycle_hook_properties(
asset_content_lifecycle_hook_launching,
"clusternametestloginnodespool1LoginNodesASGLifecycleHookLaunching879DBA56",
)
elif isinstance(lt_assertion, IamRoleAssertion):
lt_assertion.assert_iam_role_properties(asset_content_iam_role, "RoleA50bdea9651dc48c")
elif isinstance(lt_assertion, IamPolicyAssertion):
Expand Down

0 comments on commit 30ff88b

Please sign in to comment.