Skip to content

Commit

Permalink
Add options to install FSx Lustre and Nvidia software
Browse files Browse the repository at this point in the history
Signed-off-by: Hanwen <[email protected]>
  • Loading branch information
hanwen-cluster committed Nov 11, 2024
1 parent c389847 commit 284fa43
Show file tree
Hide file tree
Showing 7 changed files with 172 additions and 49 deletions.
50 changes: 45 additions & 5 deletions cli/src/pcluster/config/imagebuilder_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,41 @@ def __init__(
self.enabled = enabled


class LustreClient(Resource):
"""Represent the LustreClient configuration for the ImageBuilder."""

def __init__(
self,
enabled: bool = None,
):
super().__init__()
self.enabled = Resource.init_param(enabled, default=True)


class NvidiaSoftware(Resource):
"""Represent the NvidiaSoftware configuration for the ImageBuilder."""

def __init__(
self,
enabled: bool = None,
):
super().__init__()
self.enabled = Resource.init_param(enabled, default=False)


class Installation(Resource):
"""Represent the installation configuration for the ImageBuilder."""

def __init__(
self,
lustre_client: LustreClient = None,
nvidia_software: NvidiaSoftware = None,
):
super().__init__()
self.lustre_client = lustre_client or LustreClient()
self.nvidia_software = nvidia_software or NvidiaSoftware()


class Build(Resource):
"""Represent the build configuration for the ImageBuilder."""

Expand All @@ -157,6 +192,7 @@ def __init__(
components: List[Component] = None,
update_os_packages: UpdateOsPackages = None,
imds: Imds = None,
installation: Installation = None,
):
super().__init__()
self.instance_type = Resource.init_param(instance_type)
Expand All @@ -168,6 +204,7 @@ def __init__(
self.components = components
self.update_os_packages = update_os_packages
self.imds = imds or Imds(implied="v2.0")
self.installation = installation or Installation()

def _register_validators(self, context: ValidatorContext = None): # noqa: D102 #pylint: disable=unused-argument
self._register_validator(
Expand Down Expand Up @@ -282,21 +319,24 @@ def lambda_functions_vpc_config(self):
class ImageBuilderExtraChefAttributes(ExtraChefAttributes):
"""Extra Attributes for ImageBuilder Chef Client."""

def __init__(self, dev_settings: ImagebuilderDevSettings):
super().__init__(dev_settings)
def __init__(self, config: ImageBuilderConfig):
super().__init__(config.dev_settings)
self.region = None
self.nvidia = None
self.lustre = None
self.is_official_ami_build = None
self.custom_node_package = None
self.custom_awsbatchcli_package = None
self.base_os = None
self.disable_kernel_update = None
self.slurm_patches_s3_archive = None
self._set_default(dev_settings)
self._set_default(config)

def _set_default(self, dev_settings: ImagebuilderDevSettings):
def _set_default(self, config: ImageBuilderConfig):
dev_settings = config.dev_settings
self.region = "{{ build.AWSRegion.outputs.stdout }}"
self.nvidia = {"enabled": "no"}
self.nvidia = {"enabled": "yes"} if config.build.installation.nvidia_software.enabled else {"enabled": "no"}
self.lustre = {"enabled": "yes"} if config.build.installation.lustre_client.enabled else {"enabled": "no"}
self.is_official_ami_build = "false"
self.custom_node_package = dev_settings.node_package if dev_settings and dev_settings.node_package else ""
self.custom_awsbatchcli_package = (
Expand Down
38 changes: 38 additions & 0 deletions cli/src/pcluster/schemas/imagebuilder_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@
ImageBuilderConfig,
ImagebuilderDeploymentSettings,
ImagebuilderDevSettings,
Installation,
LustreClient,
NvidiaSoftware,
UpdateOsPackages,
Volume,
)
Expand Down Expand Up @@ -167,6 +170,40 @@ def make_resource(self, data, **kwargs):
return UpdateOsPackages(**data)


class LustreClientSchema(BaseSchema):
"""Represent the schema of the ImageBuilder NvidiaSoftware."""

enabled = fields.Bool()

@post_load
def make_resource(self, data, **kwargs):
"""Generate resource."""
return LustreClient(**data)


class NvidiaSoftwareSchema(BaseSchema):
"""Represent the schema of the ImageBuilder NvidiaSoftware."""

enabled = fields.Bool()

@post_load
def make_resource(self, data, **kwargs):
"""Generate resource."""
return NvidiaSoftware(**data)


class InstallationSchema(BaseSchema):
"""Represent the schema of the ImageBuilder Installation."""

lustre_client = fields.Nested(LustreClientSchema)
nvidia_software = fields.Nested(NvidiaSoftwareSchema)

@post_load
def make_resource(self, data, **kwargs):
"""Generate resource."""
return Installation(**data)


class BuildSchema(BaseSchema):
"""Represent the schema of the ImageBuilder Build."""

Expand All @@ -179,6 +216,7 @@ class BuildSchema(BaseSchema):
subnet_id = fields.Str(validate=get_field_validator("subnet_id"))
update_os_packages = fields.Nested(UpdateOsPackagesSchema)
imds = fields.Nested(ImdsSchema)
installation = fields.Nested(InstallationSchema)

@post_load
def make_resource(self, data, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion cli/src/pcluster/templates/imagebuilder_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def _add_cfn_parameters(self):
self,
"CfnParamChefDnaJson",
type="String",
default=ImageBuilderExtraChefAttributes(self.config.dev_settings).dump_json(),
default=ImageBuilderExtraChefAttributes(self.config).dump_json(),
description="ChefAttributes",
)
CfnParameter(
Expand Down
6 changes: 6 additions & 0 deletions cli/tests/pcluster/config/dummy_imagebuilder_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
ImageBuilderConfig,
ImagebuilderDeploymentSettings,
ImagebuilderDevSettings,
Installation,
LustreClient,
NvidiaSoftware,
UpdateOsPackages,
Volume,
)
Expand All @@ -39,6 +42,9 @@
"additional_iam_policies": AdditionalIamPolicy,
"update_os_packages": UpdateOsPackages,
"imds": Imds,
"installation": Installation,
"lustre_client": LustreClient,
"nvidia_software": NvidiaSoftware,
}


Expand Down
113 changes: 74 additions & 39 deletions cli/tests/pcluster/models/test_imagebuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,16 +157,18 @@ def test_imagebuilder_url_validator(
[
(
{
"build": {
"parent_image": "ami-0185634c5a8a37250",
"instance_type": "c5.xlarge",
"update_os_packages": {"enabled": True},
},
"dev_settings": {
"node_package": "s3://test/aws-parallelcluster-node-3.0.tgz",
"aws_batch_cli_package": "https://test/aws-parallelcluster-3.0.tgz",
"disable_kernel_update": "true",
},
"imagebuilder": {
"build": {
"parent_image": "ami-0185634c5a8a37250",
"instance_type": "c5.xlarge",
"update_os_packages": {"enabled": True},
},
"dev_settings": {
"node_package": "s3://test/aws-parallelcluster-node-3.0.tgz",
"aws_batch_cli_package": "https://test/aws-parallelcluster-3.0.tgz",
"disable_kernel_update": "true",
},
}
},
{
"cluster": {
Expand All @@ -176,20 +178,28 @@ def test_imagebuilder_url_validator(
"disable_kernel_update": "true",
"is_official_ami_build": "false",
"nvidia": {"enabled": "no"},
"lustre": {"enabled": "yes"},
"region": "{{ build.AWSRegion.outputs.stdout }}",
"slurm_patches_s3_archive": "",
}
},
),
(
{
"dev_settings": {
"cookbook": {
"chef_cookbook": "https://test/aws-parallelcluster-cookbook-3.0.tgz",
"extra_chef_attributes": '{"cluster": {"nvidia": { "enabled" : "yes" }, "dcv" :"no"}}',
"imagebuilder": {
"build": {
"parent_image": "ami-0185634c5a8a37250",
"instance_type": "c5.xlarge",
"installation": {"lustre_client": {"enabled": "False"}},
},
"node_package": "s3://test/aws-parallelcluster-node-3.0.tgz",
},
"dev_settings": {
"cookbook": {
"chef_cookbook": "https://test/aws-parallelcluster-cookbook-3.0.tgz",
"extra_chef_attributes": '{"cluster": {"nvidia": { "enabled" : "yes" }, "dcv" :"no"}}',
},
"node_package": "s3://test/aws-parallelcluster-node-3.0.tgz",
},
}
},
{
"cluster": {
Expand All @@ -199,6 +209,7 @@ def test_imagebuilder_url_validator(
"dcv": "no",
"disable_kernel_update": "false",
"is_official_ami_build": "false",
"lustre": {"enabled": "no"},
"nvidia": {"enabled": "yes"},
"region": "{{ build.AWSRegion.outputs.stdout }}",
"slurm_patches_s3_archive": "",
Expand All @@ -207,13 +218,20 @@ def test_imagebuilder_url_validator(
),
(
{
"dev_settings": {
"cookbook": {
"chef_cookbook": "https://test/aws-parallelcluster-cookbook-3.0.tgz",
"extra_chef_attributes": '{"cluster": {"nvidia": { "enabled" : "yes" }, "dcv" :"no"}, '
'"nfs": "true"}',
"imagebuilder": {
"build": {
"parent_image": "ami-0185634c5a8a37250",
"instance_type": "c5.xlarge",
"installation": {"lustre_client": {"enabled": "True"}},
},
"dev_settings": {
"cookbook": {
"chef_cookbook": "https://test/aws-parallelcluster-cookbook-3.0.tgz",
"extra_chef_attributes": '{"cluster": {"nvidia": { "enabled" : "yes" }, "dcv" :"no"}, '
'"nfs": "true"}',
},
"aws_batch_cli_package": "https://test/aws-parallelcluster-3.0.tgz",
},
"aws_batch_cli_package": "https://test/aws-parallelcluster-3.0.tgz",
},
},
{
Expand All @@ -225,6 +243,7 @@ def test_imagebuilder_url_validator(
"disable_kernel_update": "false",
"is_official_ami_build": "false",
"nvidia": {"enabled": "yes"},
"lustre": {"enabled": "yes"},
"region": "{{ build.AWSRegion.outputs.stdout }}",
"slurm_patches_s3_archive": "",
},
Expand All @@ -233,12 +252,19 @@ def test_imagebuilder_url_validator(
),
(
{
"dev_settings": {
"cookbook": {
"chef_cookbook": "https://test/aws-parallelcluster-cookbook-3.0.tgz",
"extra_chef_attributes": '{"cluster": {"is_official_ami_build": "true"},"nfs": "true"}',
"imagebuilder": {
"build": {
"parent_image": "ami-0185634c5a8a37250",
"instance_type": "c5.xlarge",
"installation": {"nvidia_software": {"enabled": True}},
},
"dev_settings": {
"cookbook": {
"chef_cookbook": "https://test/aws-parallelcluster-cookbook-3.0.tgz",
"extra_chef_attributes": '{"cluster": {"is_official_ami_build": "true"},"nfs": "true"}',
},
"aws_batch_cli_package": "https://test/aws-parallelcluster-3.0.tgz",
},
"aws_batch_cli_package": "https://test/aws-parallelcluster-3.0.tgz",
},
},
{
Expand All @@ -248,7 +274,8 @@ def test_imagebuilder_url_validator(
"custom_node_package": "",
"disable_kernel_update": "false",
"is_official_ami_build": "true",
"nvidia": {"enabled": "no"},
"nvidia": {"enabled": "yes"},
"lustre": {"enabled": "yes"},
"region": "{{ build.AWSRegion.outputs.stdout }}",
"slurm_patches_s3_archive": "",
},
Expand All @@ -258,15 +285,22 @@ def test_imagebuilder_url_validator(
# Test case with URL for Slurm patches from S3
(
{
"dev_settings": {
"cookbook": {
"extra_chef_attributes": "{"
'"cluster": {'
'"slurm_patches_s3_archive": "s3://example-s3-bucket/example-archive.tgz"'
"}"
"}"
"imagebuilder": {
"build": {
"parent_image": "ami-0185634c5a8a37250",
"instance_type": "c5.xlarge",
"installation": {"nvidia_software": {"enabled": False}},
},
},
"dev_settings": {
"cookbook": {
"extra_chef_attributes": "{"
'"cluster": {'
'"slurm_patches_s3_archive": "s3://example-s3-bucket/example-archive.tgz"'
"}"
"}"
},
},
}
},
{
"cluster": {
Expand All @@ -276,6 +310,7 @@ def test_imagebuilder_url_validator(
"disable_kernel_update": "false",
"is_official_ami_build": "false",
"nvidia": {"enabled": "no"},
"lustre": {"enabled": "yes"},
"region": "{{ build.AWSRegion.outputs.stdout }}",
"slurm_patches_s3_archive": "s3://example-s3-bucket/example-archive.tgz",
}
Expand All @@ -284,9 +319,9 @@ def test_imagebuilder_url_validator(
],
)
def test_imagebuilder_extra_chef_attributes(resource, dna_json):
dev_settings = imagebuilder_factory(resource).get("dev_settings")
chef_attributes = ImageBuilderExtraChefAttributes(dev_settings).dump_json()
assert_that(chef_attributes).is_equal_to(json.dumps(dna_json))
config = imagebuilder_factory(resource).get("imagebuilder")
chef_attributes = ImageBuilderExtraChefAttributes(config).dump_json()
assert_that(json.loads(chef_attributes)).is_equal_to(dna_json)


def _test_imagebuilder(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ Build:
SubnetId: subnet-0d03dc52
UpdateOsPackages:
Enabled: true
Installation:
NvidiaSoftware:
Enabled: true
LustreClient:
Enabled: true

DevSettings:
DisablePclusterComponent: False
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,9 @@ Build:
{% endif %}
{% if os in ["ubuntu2204", "rhel9", "rocky9"] %}
# Disable Lustre installation because these newer operating systems release new kernels more often. Lustre usually does not support the latest kernels
DevSettings:
Cookbook:
ExtraChefAttributes: |
{"cluster": {"lustre": {"enabled": "no" }}}
Installation:
LustreClient:
Enabled: false
{% endif %}

CustomS3Bucket: {{ bucket_name }}
Expand Down

0 comments on commit 284fa43

Please sign in to comment.