Skip to content

Commit

Permalink
[Integ-tests] Upload OSU benchmark results to CloudWatch metric
Browse files Browse the repository at this point in the history
Therefore, we will have a historical view of performance

Signed-off-by: Hanwen <[email protected]>
  • Loading branch information
hanwen-cluster committed Aug 3, 2023
1 parent 2a45dda commit 7e048d5
Showing 1 changed file with 21 additions and 1 deletion.
22 changes: 21 additions & 1 deletion tests/integration-tests/tests/efa/test_efa.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import logging
import re

import boto3
import pytest
from assertpy import assert_that
from remote_command_executor import RemoteCommandExecutor
Expand All @@ -20,7 +21,7 @@
from tests.common.assertions import assert_no_errors_in_logs
from tests.common.mpi_common import _test_mpi
from tests.common.osu_common import run_individual_osu_benchmark
from tests.common.utils import fetch_instance_slots, run_system_analyzer
from tests.common.utils import fetch_instance_slots, get_installed_parallelcluster_version, run_system_analyzer


@pytest.mark.usefixtures("serial_execution_by_instance")
Expand Down Expand Up @@ -244,6 +245,8 @@ def _check_osu_benchmarks_results(test_datadir, instance, mpi_version, benchmark
logging.info(output)
# Check avg latency for all packet sizes
failures = 0
metric_data = []
metric_namespace = "ParallelCluster/test_efa"
for packet_size, value in re.findall(r"(\d+)\s+(\d+)\.", output):
with open(
str(test_datadir / "osu_benchmarks" / "results" / instance / mpi_version / benchmark_name), encoding="utf-8"
Expand Down Expand Up @@ -271,11 +274,28 @@ def _check_osu_benchmarks_results(test_datadir, instance, mpi_version, benchmark
f"tolerated: {tolerated_value}, current: {value}"
)

dimensions = {
"PclusterVersion": get_installed_parallelcluster_version(),
"MpiVariant": mpi_version,
"Instance": instance,
"OsuBenchmarkName": benchmark_name,
"PacketSize": packet_size,
}
metric_data.append(
{
"MetricName": "Latency",
"Dimensions": [{"Name": name, "Value": str(value)} for name, value in dimensions.items()],
"Value": int(value),
"Unit": "Microseconds",
}
)

if is_failure:
failures = failures + 1
logging.error(message)
else:
logging.info(message)
boto3.client("cloudwatch").put_metric_data(Namespace=metric_namespace, MetricData=metric_data)

return failures

Expand Down

0 comments on commit 7e048d5

Please sign in to comment.