From 7e048d5ec95f6859129255678d0285436b77bb97 Mon Sep 17 00:00:00 2001 From: Hanwen Date: Wed, 2 Aug 2023 11:30:00 -0700 Subject: [PATCH] [Integ-tests] Upload OSU benchmark results to CloudWatch metric Therefore, we will have a historical view of performance Signed-off-by: Hanwen --- tests/integration-tests/tests/efa/test_efa.py | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/integration-tests/tests/efa/test_efa.py b/tests/integration-tests/tests/efa/test_efa.py index 6117bfe383..2159da2da2 100644 --- a/tests/integration-tests/tests/efa/test_efa.py +++ b/tests/integration-tests/tests/efa/test_efa.py @@ -12,6 +12,7 @@ import logging import re +import boto3 import pytest from assertpy import assert_that from remote_command_executor import RemoteCommandExecutor @@ -20,7 +21,7 @@ from tests.common.assertions import assert_no_errors_in_logs from tests.common.mpi_common import _test_mpi from tests.common.osu_common import run_individual_osu_benchmark -from tests.common.utils import fetch_instance_slots, run_system_analyzer +from tests.common.utils import fetch_instance_slots, get_installed_parallelcluster_version, run_system_analyzer @pytest.mark.usefixtures("serial_execution_by_instance") @@ -244,6 +245,8 @@ def _check_osu_benchmarks_results(test_datadir, instance, mpi_version, benchmark logging.info(output) # Check avg latency for all packet sizes failures = 0 + metric_data = [] + metric_namespace = "ParallelCluster/test_efa" for packet_size, value in re.findall(r"(\d+)\s+(\d+)\.", output): with open( str(test_datadir / "osu_benchmarks" / "results" / instance / mpi_version / benchmark_name), encoding="utf-8" @@ -271,11 +274,28 @@ def _check_osu_benchmarks_results(test_datadir, instance, mpi_version, benchmark f"tolerated: {tolerated_value}, current: {value}" ) + dimensions = { + "PclusterVersion": get_installed_parallelcluster_version(), + "MpiVariant": mpi_version, + "Instance": instance, + "OsuBenchmarkName": benchmark_name, + "PacketSize": packet_size, + } + metric_data.append( + { + "MetricName": "Latency", + "Dimensions": [{"Name": name, "Value": str(value)} for name, value in dimensions.items()], + "Value": int(value), + "Unit": "Microseconds", + } + ) + if is_failure: failures = failures + 1 logging.error(message) else: logging.info(message) + boto3.client("cloudwatch").put_metric_data(Namespace=metric_namespace, MetricData=metric_data) return failures