Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[integ-tests] Improve createami integration tests #6605

Merged
merged 3 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions tests/integration-tests/configs/develop.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -149,18 +149,18 @@ test-suites:
oss: [{{ OS_X86_3 }}]
test_createami.py::test_build_image:
dimensions:
- regions: ["eu-west-3"]
instances: {{ common.INSTANCES_DEFAULT_X86 }}
- regions: ["euw3-az1"]
instances: ["g4dn.2xlarge"]
schedulers: [ "slurm" ]
oss: {{ common.OSS_COMMERCIAL_X86 }}
- regions: ["cn-north-1"]
instances: {{ common.INSTANCES_DEFAULT_X86 }}
- regions: ["cnn1-az1"]
instances: ["g4dn.2xlarge"]
schedulers: ["slurm"]
oss: [{{ OS_X86_5 }}]
oss: ["alinux2023"]
- regions: ["us-gov-west-1"]
instances: {{ common.INSTANCES_DEFAULT_X86 }}
instances: ["g4dn.2xlarge"]
schedulers: ["slurm"]
oss: [{{ OS_X86_7 }}]
oss: ["alinux2023"]
test_createami.py::test_build_image_custom_components:
# Test arn custom component with combination (eu-west-1, m6g.xlarge, alinux2)
# Test script custom component with combination (ap-southeast-2, c5.xlarge, ubuntu2004)
Expand Down
12 changes: 8 additions & 4 deletions tests/integration-tests/tests/createami/test_createami.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from retrying import retry
from time_utils import minutes, seconds
from troposphere import Template, iam
from utils import generate_stack_name, get_arn_partition
from utils import generate_stack_name, get_arn_partition, get_gpu_count

from tests.common.assertions import (
assert_head_node_is_running,
Expand Down Expand Up @@ -94,10 +94,11 @@ def test_invalid_config(
assert_that(suppressed.message).contains("Request would have succeeded")


@pytest.mark.usefixtures("instance", "scheduler")
@pytest.mark.usefixtures("scheduler")
def test_build_image(
region,
os,
instance,
pcluster_config_reader,
architecture,
s3_bucket_factory,
Expand Down Expand Up @@ -138,9 +139,12 @@ def test_build_image(
else:
# Test vanilla AMIs.
base_ami = retrieve_latest_ami(region, os, ami_type="official", architecture=architecture)

image_config = pcluster_config_reader(
config_file="image.config.yaml", parent_image=base_ami, instance_role=instance_role, bucket_name=bucket_name
config_file="image.config.yaml",
parent_image=base_ami,
instance_role=instance_role,
bucket_name=bucket_name,
gpu_count=get_gpu_count(instance),
)

image = images_factory(image_id, image_config, region)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ Build:
UpdateOsPackages:
Enabled: true
{% endif %}
{% if os in ["ubuntu2204", "rhel9", "rocky9"] %}
# Disable Lustre installation because these newer operating systems release new kernels more often. Lustre usually does not support the latest kernels
Installation:
LustreClient:
Enabled: false
{% endif %}
# Disable Lustre installation because these newer operating systems release new kernels more often. Lustre usually does not support the latest kernels
Enabled: {% if os in ["ubuntu2204", "rhel9", "rocky9"] %} false {% else %} true {% endif %}
NvidiaSoftware:
Enabled: {% if gpu_count > 0 %} true {% else %} false {% endif %}

CustomS3Bucket: {{ bucket_name }}

Expand Down
12 changes: 12 additions & 0 deletions tests/integration-tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,18 @@ def get_network_interfaces_count(instance_type, region_name=None):
return get_instance_info(instance_type, region_name).get("NetworkInfo").get("MaximumNetworkCards", 1)


def get_gpu_count(instance_type, region_name=None):
"""Return the number of GPU for the provided instance type."""
gpu_info = get_instance_info(instance_type, region_name).get("GpuInfo", None)
gpu_count = 0
if gpu_info:
for gpu in gpu_info.get("Gpus", []):
manufacturer = gpu.get("Manufacturer", "")
if manufacturer.upper() == "NVIDIA":
gpu_count += gpu.get("Count", 0)
return gpu_count


def get_root_volume_id(instance_id, region, os):
"""Return the root EBS volume's ID for the given EC2 instance."""
logging.info("Getting root volume for instance %s", instance_id)
Expand Down
Loading