Skip to content

Commit

Permalink
Merge branch 'develop' into developoct3
Browse files Browse the repository at this point in the history
  • Loading branch information
hanwen-pcluste authored Oct 21, 2024
2 parents e2570c3 + 0d18c16 commit 7b637fb
Show file tree
Hide file tree
Showing 27 changed files with 317 additions and 42 deletions.
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ jobs:
python-version: '3.12'
- name: Install AWS CDK
run: |
pip install typeguard~=2.13
npm install -g aws-cdk
pip install -r cloudformation/external-slurmdbd/requirements.txt
- working-directory: cloudformation/external-slurmdbd
Expand Down
24 changes: 24 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,29 @@
CHANGELOG
=========
3.12.0
------

**BUG FIXES**
- When mounting an external OpenZFS, it is no longer required to set the outbound rules for ports 111, 2049, 20001, 20002, 20003

3.12.0
------

**CHANGES**

3.11.1
------

**CHANGES**
- Pyxis is now disabled by default, so it must be manually enabled as documented in the product documentation.
- Upgrade Python runtime to version 3.12 in ParallelCluster Lambda Layer.
- Remove version pinning for setuptools to version prior to 70.0.0.
- Upgrade libjwt to version 1.17.0.

**BUG FIXES**
- Fix an issue in the way we configure the Pyxis Slurm plugin in ParallelCluster that can lead to job submission failures.
https://github.com/aws/aws-parallelcluster/issues/6459
- Add missing permissions required by login nodes to the public template of policies.

3.11.0
------
Expand Down
4 changes: 2 additions & 2 deletions api/infrastructure/parallelcluster-api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ Resources:
- parallelcluster/${Version}/layers/aws-parallelcluster/lambda-layer.zip
- { Version: !FindInMap [ParallelCluster, Constants, Version]}
CompatibleRuntimes:
- python3.9
- python3.12

# We need to define three AWS::Serverless::Api due to an issue with the handling of AWS::NoValue
# See related GitHub issue: https://github.com/aws/serverless-application-model/issues/1435
Expand Down Expand Up @@ -294,7 +294,7 @@ Resources:
Value: api
- Key: 'parallelcluster:version'
Value: !FindInMap [ParallelCluster, Constants, Version]
Runtime: python3.9
Runtime: python3.12
Handler: pcluster.api.awslambda.entrypoint.lambda_handler
Layers:
- !Ref PclusterLayer
Expand Down
2 changes: 1 addition & 1 deletion awsbatch-cli/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def readme():

VERSION = "1.4.0"
REQUIRES = [
"setuptools<70.0.0",
"setuptools",
"boto3>=1.16.14",
"tabulate>=0.8.8,<=0.8.10",
]
Expand Down
2 changes: 1 addition & 1 deletion awsbatch-cli/src/awsbatch/awsbhosts.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def __init__(
mem_registered,
cpu_avail,
mem_avail,
):
): # pylint: disable=too-many-positional-arguments
"""Initialize the object."""
self.container_instance_arn = container_instance_arn
self.status = status
Expand Down
8 changes: 6 additions & 2 deletions awsbatch-cli/src/awsbatch/awsbout.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,9 @@ def __init__(self, log, boto3_factory):
self.log = log
self.boto3_factory = boto3_factory

def run(self, job_id, head=None, tail=None, stream=None, stream_period=None):
def run(
self, job_id, head=None, tail=None, stream=None, stream_period=None
): # pylint: disable=too-many-positional-arguments
"""Print job output."""
log_stream = self.__get_log_stream(job_id)
if log_stream:
Expand Down Expand Up @@ -124,7 +126,9 @@ def __get_log_stream(self, job_id):
fail("Error listing jobs from AWS Batch. Failed with exception: %s" % e)
return log_stream

def __print_log_stream(self, log_stream, head=None, tail=None, stream=None, stream_period=None): # noqa: C901 FIXME
def __print_log_stream( # noqa: C901 FIXME
self, log_stream, head=None, tail=None, stream=None, stream_period=None
): # pylint:disable=too-many-positional-arguments
"""
Ask for log stream and print it.
Expand Down
2 changes: 1 addition & 1 deletion awsbatch-cli/src/awsbatch/awsbqueues.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def _get_parser():
class Queue:
"""Generic queue object."""

def __init__(self, arn, name, priority, status, status_reason):
def __init__(self, arn, name, priority, status, status_reason): # pylint: disable=too-many-positional-arguments
"""Initialize the object."""
self.arn = arn
self.name = name
Expand Down
6 changes: 4 additions & 2 deletions awsbatch-cli/src/awsbatch/awsbstat.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def __init__(
log_stream,
log_stream_url,
s3_folder_url,
):
): # pylint: disable=too-many-positional-arguments
"""Initialize the object."""
self.id = job_id
self.name = name
Expand Down Expand Up @@ -282,7 +282,9 @@ def __init__(self, log, boto3_factory):
self.boto3_factory = boto3_factory
self.batch_client = boto3_factory.get_client("batch")

def run(self, job_status, expand_children, job_queue=None, job_ids=None, show_details=False):
def run(
self, job_status, expand_children, job_queue=None, job_ids=None, show_details=False
): # pylint: disable=too-many-positional-arguments
"""Print list of jobs, by filtering by queue or by ids."""
if job_ids:
self.__populate_output_by_job_ids(job_ids, show_details or len(job_ids) == 1, include_parents=True)
Expand Down
2 changes: 1 addition & 1 deletion awsbatch-cli/src/awsbatch/awsbsub.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ def run( # noqa: C901 FIXME
timeout=None,
dependencies=None,
env=None,
):
): # pylint: disable=too-many-positional-arguments
"""Submit the job."""
try:
# array properties
Expand Down
2 changes: 1 addition & 1 deletion cli/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def readme():
VERSION = "3.12.0"
CDK_VERSION = "1.164"
REQUIRES = [
"setuptools<70.0.0",
"setuptools",
"boto3>=1.16.14",
"tabulate>=0.8.8,<=0.8.10",
"PyYAML>=5.3.1,!=5.4",
Expand Down
28 changes: 23 additions & 5 deletions cli/src/pcluster/validators/cluster_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,9 @@ def _validate(
# --------------- Storage validators --------------- #


def _is_access_allowed(security_groups_ids, subnets, port, security_groups_by_nodes, protocol="tcp"):
def _is_access_allowed(
security_groups_ids, subnets, port, security_groups_by_nodes, protocol="tcp", check_outbound=True
):
"""
Verify given list of security groups to check if they allow in and out access on the given port.
Expand Down Expand Up @@ -508,7 +510,9 @@ def _is_access_allowed(security_groups_ids, subnets, port, security_groups_by_no
out_access = out_access or _are_ip_ranges_and_sg_accessible(
security_groups_by_nodes, dst_ip_ranges, dst_security_groups, subnets
)
return in_access and out_access
if check_outbound:
return in_access and out_access
return in_access


def _are_ip_ranges_and_sg_accessible(security_groups_by_nodes, allowed_ip_ranges, allowed_security_groups, subnets):
Expand Down Expand Up @@ -654,31 +658,45 @@ def _check_file_storage(self, security_groups_by_nodes, file_storages, subnet_id

for protocol, ports in FSX_PORTS[file_storage.file_storage_type].items():
missing_ports = self._get_missing_ports(
security_groups_by_nodes, subnet_ids, network_interfaces, ports, protocol
security_groups_by_nodes,
subnet_ids,
network_interfaces,
ports,
protocol,
file_storage.file_storage_type,
)

if missing_ports:
direction = "inbound and outbound"
if file_storage.file_storage_type == "OPENZFS":
direction = "inbound"
self._add_failure(
f"The current security group settings on file storage '{file_storage_id}' does not"
" satisfy mounting requirement. The file storage must be associated to a security group"
f" that allows inbound and outbound {protocol.upper()} traffic through ports {ports}. "
f" that allows {direction } {protocol.upper()} traffic through ports {ports}. "
f"Missing ports: {missing_ports}",
FailureLevel.ERROR,
)

def _get_missing_ports(self, security_groups_by_nodes, subnet_ids, network_interfaces, ports, protocol):
def _get_missing_ports(
self, security_groups_by_nodes, subnet_ids, network_interfaces, ports, protocol, storage_type
):
missing_ports = []
for port in ports:
fs_access = False
for network_interface in network_interfaces:
# Get list of security group IDs
sg_ids = [sg.get("GroupId") for sg in network_interface.get("Groups")]
check_outbound = True
if storage_type == "OPENZFS":
check_outbound = False
if _is_access_allowed(
sg_ids,
subnet_ids,
port=port,
security_groups_by_nodes=security_groups_by_nodes,
protocol=protocol,
check_outbound=check_outbound,
):
fs_access = True
break
Expand Down
Loading

0 comments on commit 7b637fb

Please sign in to comment.