From 8059fe16d3ac66a04c73f9e6b8094f720326e30a Mon Sep 17 00:00:00 2001 From: sousinha1997 Date: Thu, 12 Dec 2024 12:41:02 +0530 Subject: [PATCH 01/10] autohpl extract test cases --- quisby/benchmarks/auto_hpl/extract.py | 80 ++++++++++++++++++++------- 1 file changed, 59 insertions(+), 21 deletions(-) diff --git a/quisby/benchmarks/auto_hpl/extract.py b/quisby/benchmarks/auto_hpl/extract.py index bd922a8..76f23df 100644 --- a/quisby/benchmarks/auto_hpl/extract.py +++ b/quisby/benchmarks/auto_hpl/extract.py @@ -1,31 +1,69 @@ -import csv - -from quisby.pricing import cloud_pricing +import logging +from typing import List, Dict, Optional +from pathlib import Path from quisby.benchmarks.linpack.extract import linpack_format_data +logger = logging.getLogger(__name__) + + +def extract_auto_hpl_data( + path: str, + system_name: str +) -> Optional[List[Dict[str, str]]]: + """ + Extract Auto HPL benchmark data from a CSV file. + + Args: + path (str): Path to the CSV file + system_name (str): Name of the system being analyzed + + Returns: + Optional[List[Dict[str, str]]]: Processed benchmark results or None + + Raises: + FileNotFoundError: If the specified file does not exist + PermissionError: If there are insufficient permissions to read the file + ValueError: If the file format is incorrect + """ + # Validate input path + file_path = Path(path) + + # Check file existence and extension + if not file_path.exists(): + raise FileNotFoundError(f"File not found: {path}") + if file_path.suffix.lower() != '.csv': + raise ValueError(f"Invalid file type. Expected .csv, got {file_path.suffix}") -def extract_auto_hpl_data(path, system_name): + # Read file with proper error handling + with open(file_path, 'r', encoding='utf-8') as file: + file_data = file.readlines() - if path.endswith(".csv"): - with open(path) as file: - results = [] - file_data = file.readlines() + # Check for minimum required data + if len(file_data) < 2: + logger.warning(f"Insufficient data in file: {path}") + return None - if len(file_data) > 1: - header_row = file_data[-2].strip().split(":") - data_row = file_data[-1].strip().split(":") + # Extract header and data rows + header_row = file_data[-2].strip().split(":") + data_row = file_data[-1].strip().split(":") - data_dict = {} - for key, value in zip(header_row, data_row): - data_dict[key] = value + # Validate data extraction + if len(header_row) != len(data_row): + raise ValueError("Mismatched header and data lengths") - results = linpack_format_data( - results=results, system_name=system_name, gflops=data_dict["Gflops"] - ) + # Create dictionary from rows + data_dict = dict(zip(header_row, data_row)) - if results: - return results + # Validate required field + if 'Gflops' not in data_dict: + raise KeyError("Missing 'Gflops' in data") - else: - return None + # Process and format data + results: List[Dict[str, str]] = [] + formatted_results = linpack_format_data( + results=results, + system_name=system_name, + gflops=data_dict["Gflops"] + ) + return formatted_results if formatted_results else None From d26b5cb4ce5140c95db271d9da38fb84b2362a8b Mon Sep 17 00:00:00 2001 From: sousinha1997 Date: Thu, 12 Dec 2024 12:46:37 +0530 Subject: [PATCH 02/10] autohpl extract test cases --- tests/__init__.py | 0 tests/test_benchmarks/__init__.py | 0 .../data/auto_hpl/empty_data.csv | 0 .../data/auto_hpl/insufficient_data.csv | 1 + .../data/auto_hpl/invalid_data.csv | 2 + .../data/auto_hpl/invalid_data.txt | 1 + .../data/auto_hpl/mismatched_header_data.csv | 2 + .../data/auto_hpl/missing_gflops.csv | 2 + .../data/auto_hpl/permission_error_file.csv | 0 .../data/auto_hpl/valid_data.csv | 17 +++++ tests/test_benchmarks/test_auto_hpl.py | 67 +++++++++++++++++++ 11 files changed, 92 insertions(+) create mode 100644 tests/__init__.py create mode 100644 tests/test_benchmarks/__init__.py create mode 100644 tests/test_benchmarks/data/auto_hpl/empty_data.csv create mode 100644 tests/test_benchmarks/data/auto_hpl/insufficient_data.csv create mode 100644 tests/test_benchmarks/data/auto_hpl/invalid_data.csv create mode 100644 tests/test_benchmarks/data/auto_hpl/invalid_data.txt create mode 100644 tests/test_benchmarks/data/auto_hpl/mismatched_header_data.csv create mode 100644 tests/test_benchmarks/data/auto_hpl/missing_gflops.csv create mode 100644 tests/test_benchmarks/data/auto_hpl/permission_error_file.csv create mode 100644 tests/test_benchmarks/data/auto_hpl/valid_data.csv create mode 100644 tests/test_benchmarks/test_auto_hpl.py diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_benchmarks/__init__.py b/tests/test_benchmarks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_benchmarks/data/auto_hpl/empty_data.csv b/tests/test_benchmarks/data/auto_hpl/empty_data.csv new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_benchmarks/data/auto_hpl/insufficient_data.csv b/tests/test_benchmarks/data/auto_hpl/insufficient_data.csv new file mode 100644 index 0000000..322a34d --- /dev/null +++ b/tests/test_benchmarks/data/auto_hpl/insufficient_data.csv @@ -0,0 +1 @@ +T/V:N:NB:P:Q:Time:Gflops diff --git a/tests/test_benchmarks/data/auto_hpl/invalid_data.csv b/tests/test_benchmarks/data/auto_hpl/invalid_data.csv new file mode 100644 index 0000000..1bbb201 --- /dev/null +++ b/tests/test_benchmarks/data/auto_hpl/invalid_data.csv @@ -0,0 +1,2 @@ +T/V:N:NB:P:Q:Time:Gflops +WR12R2R4:9216:256:1:1:13.05:InvalidGflops diff --git a/tests/test_benchmarks/data/auto_hpl/invalid_data.txt b/tests/test_benchmarks/data/auto_hpl/invalid_data.txt new file mode 100644 index 0000000..0ed50ca --- /dev/null +++ b/tests/test_benchmarks/data/auto_hpl/invalid_data.txt @@ -0,0 +1 @@ +This is not a CSV file diff --git a/tests/test_benchmarks/data/auto_hpl/mismatched_header_data.csv b/tests/test_benchmarks/data/auto_hpl/mismatched_header_data.csv new file mode 100644 index 0000000..1929101 --- /dev/null +++ b/tests/test_benchmarks/data/auto_hpl/mismatched_header_data.csv @@ -0,0 +1,2 @@ +T/V:N:NB:P:Q:Time:Gflops +WR12R2R4:9216:256:1:1:13.05:40.0:extra_column diff --git a/tests/test_benchmarks/data/auto_hpl/missing_gflops.csv b/tests/test_benchmarks/data/auto_hpl/missing_gflops.csv new file mode 100644 index 0000000..6d35d6e --- /dev/null +++ b/tests/test_benchmarks/data/auto_hpl/missing_gflops.csv @@ -0,0 +1,2 @@ +T/V:N:NB:P:Q:Time +WR12R2R4:9216:256:1:1:13.05 diff --git a/tests/test_benchmarks/data/auto_hpl/permission_error_file.csv b/tests/test_benchmarks/data/auto_hpl/permission_error_file.csv new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_benchmarks/data/auto_hpl/valid_data.csv b/tests/test_benchmarks/data/auto_hpl/valid_data.csv new file mode 100644 index 0000000..e1582d1 --- /dev/null +++ b/tests/test_benchmarks/data/auto_hpl/valid_data.csv @@ -0,0 +1,17 @@ +# Test general meta start +# Test: auto_hpl +# Results version: 1.0 +# Host: c4a-standard-16 +# Sys environ: gcp +# Tuned: tuned_none +# OS: 5.14.0-503.11.1.el9_5.aarch64 +# Numa nodes: 1 +# CPU family: Neoverse-V2 +# Number cpus: 16 +# Memory: 65121408kB +# Test general meta end +# Test meta data start +# /usr/lib64/openmpi/bin/mpirun --allow-run-as-root -np 1 --mca btl self,vader --report-bindings --map-by l3cache -x OMP_NUM_THREADS=16 ./xhpl +# Test meta data end +T/V:N:NB:P:Q:Time:Gflops +WR12R2R4:78336:256:1:1:659.06:4.8627e+02 \ No newline at end of file diff --git a/tests/test_benchmarks/test_auto_hpl.py b/tests/test_benchmarks/test_auto_hpl.py new file mode 100644 index 0000000..3614a4e --- /dev/null +++ b/tests/test_benchmarks/test_auto_hpl.py @@ -0,0 +1,67 @@ +import unittest +import os +from unittest.mock import patch +from pathlib import Path +from quisby.benchmarks.auto_hpl.extract import extract_auto_hpl_data +from quisby.benchmarks.linpack.extract import linpack_format_data + +class TestAutoHPLExtract(unittest.TestCase): + + # Helper function to get the path for the sample data + def get_sample_data_path(self, filename): + return os.path.join(os.path.dirname(__file__), 'data', 'auto_hpl', filename) + + # Test when the file is correctly formatted + @patch("quisby.benchmarks.auto_hpl.extract.linpack_format_data") + def test_valid_file(self, mock_linpack_format_data): + valid_file_path = self.get_sample_data_path("valid_data.csv") + mock_linpack_format_data.return_value = [{"system": "TestSystem", "gflops": '4.8627e+02'}] + system_name = "TestSystem" + + result = extract_auto_hpl_data(valid_file_path, system_name) + mock_linpack_format_data.assert_called_with( + results=[], system_name=system_name, gflops="4.8627e+02" + ) + self.assertEqual(result, [{"system": "TestSystem", "gflops": "4.8627e+02"}]) + + # Test when the file does not exist (FileNotFoundError) + def test_file_not_found(self): + invalid_file_path = "/path/to/nonexistent/file.csv" + with self.assertRaises(FileNotFoundError): + extract_auto_hpl_data(invalid_file_path, "TestSystem") + + # Test when the file does not have the correct extension (ValueError) + def test_invalid_file_extension(self): + invalid_file_path = self.get_sample_data_path("invalid_data.txt") # A non-CSV file + with self.assertRaises(ValueError): + res = extract_auto_hpl_data(invalid_file_path, "TestSystem") + print(res) + + # Test when the file has insufficient data (less than two lines) + def test_insufficient_data(self): + insufficient_data_file_path = self.get_sample_data_path("insufficient_data.csv") + result = extract_auto_hpl_data(insufficient_data_file_path, "TestSystem") + self.assertIsNone(result) + + # Test when the Gflops field is missing (KeyError) + def test_missing_gflops(self): + missing_gflops_file_path = self.get_sample_data_path("missing_gflops.csv") + with self.assertRaises(KeyError): + extract_auto_hpl_data(missing_gflops_file_path, "TestSystem") + + # Test when there is a mismatch in header and data length (ValueError) + def test_mismatched_header_and_data(self): + mismatched_file_path = self.get_sample_data_path("mismatched_header_data.csv") + with self.assertRaises(ValueError): + extract_auto_hpl_data(mismatched_file_path, "TestSystem") + + # Test when there are permission issues with the file (PermissionError) + def test_permission_error(self): + permission_error_file_path = self.get_sample_data_path("permission_error_file.csv") + # Mocking os.path.exists and open to simulate a PermissionError + with patch("builtins.open", side_effect=PermissionError("Permission denied")): + with self.assertRaises(PermissionError): + extract_auto_hpl_data(permission_error_file_path, "TestSystem") + +if __name__ == '__main__': + unittest.main() From d17bf7d647fba20d6f108f97107cf6a5873bf6ec Mon Sep 17 00:00:00 2001 From: sousinha1997 Date: Mon, 6 Jan 2025 09:18:42 +0530 Subject: [PATCH 03/10] auto_hpl and linpack reformatting --- quisby/benchmarks/auto_hpl/comparison.py | 39 +++--- quisby/benchmarks/auto_hpl/summary.py | 22 ++- quisby/benchmarks/linpack/comparison.py | 89 +++++++----- quisby/benchmarks/linpack/extract.py | 166 +++++++++++++++-------- quisby/benchmarks/linpack/summary.py | 96 +++++++++---- 5 files changed, 278 insertions(+), 134 deletions(-) diff --git a/quisby/benchmarks/auto_hpl/comparison.py b/quisby/benchmarks/auto_hpl/comparison.py index eb4db6b..0c2d9f4 100644 --- a/quisby/benchmarks/auto_hpl/comparison.py +++ b/quisby/benchmarks/auto_hpl/comparison.py @@ -1,20 +1,25 @@ from quisby.benchmarks.linpack.comparison import compare_linpack_results -def compare_auto_hpl_results(spreadsheets, spreadsheetId, test_name): - compare_linpack_results(spreadsheets, spreadsheetId, test_name) - - - - - - - - - - - - - - - +def compare_auto_hpl_results(spreadsheets, spreadsheet_id, test_name): + """ + Compares AutoHPL results using Linpack benchmark data. + + This function calls the `compare_linpack_results` function to compare + Linpack results for AutoHPL tests. It uses provided spreadsheets and test + details to perform the comparison. + + Args: + spreadsheets (list): A list of spreadsheet data to compare. + spreadsheet_id (str): The ID of the spreadsheet containing the results. + test_name (str): The name of the test to compare. + + Returns: + None + """ + try: + # Call the Linpack comparison function with the provided arguments + compare_linpack_results(spreadsheets, spreadsheet_id, test_name) + except Exception as e: + # Handle errors that may occur during comparison + raise RuntimeError(f"Error comparing AutoHPL results: {str(e)}") diff --git a/quisby/benchmarks/auto_hpl/summary.py b/quisby/benchmarks/auto_hpl/summary.py index dbb2229..60616ae 100644 --- a/quisby/benchmarks/auto_hpl/summary.py +++ b/quisby/benchmarks/auto_hpl/summary.py @@ -1,5 +1,23 @@ from quisby.benchmarks.linpack.summary import create_summary_linpack_data -def create_summary_auto_hpl_data(results,OS_RELEASE): - return create_summary_linpack_data(results,OS_RELEASE) \ No newline at end of file +def create_summary_auto_hpl_data(results, os_release): + """ + Creates a summary of AutoHPL test results. + + This function calls `create_summary_linpack_data` to generate a summary of + the AutoHPL test results based on the provided data and OS release information. + + Args: + results (list): The test results to be summarized. + os_release (str): The operating system release version used for the test. + + Returns: + Any: Returns the summary data generated by `create_summary_linpack_data`. + """ + try: + # Call the function to create the summary for AutoHPL test results + return create_summary_linpack_data(results, os_release) + except Exception as e: + # Handle potential errors and raise with a descriptive message + raise RuntimeError(f"Error creating summary for AutoHPL data: {str(e)}") diff --git a/quisby/benchmarks/linpack/comparison.py b/quisby/benchmarks/linpack/comparison.py index e28cb71..c3e0453 100644 --- a/quisby/benchmarks/linpack/comparison.py +++ b/quisby/benchmarks/linpack/comparison.py @@ -1,5 +1,4 @@ from quisby import custom_logger - from quisby.sheet.sheet_util import ( read_sheet, append_to_sheet, @@ -10,18 +9,35 @@ ) from quisby.util import percentage_deviation +def compare_linpack_results(spreadsheets, spreadsheet_id, test_name): + """ + Compares Linpack test results from two spreadsheets and appends the comparison results + to the specified spreadsheet. + + This function compares the GFLOPS, scaling, and price-performance data between + two sets of test results, calculates the percentage differences, and updates the + results on a Google Sheet. -def compare_linpack_results(spreadsheets, spreadsheetId, test_name): + Args: + spreadsheets (list): A list of spreadsheets containing the test data to compare. + spreadsheet_id (str): The ID of the spreadsheet to append the results to. + test_name (str): The name of the test whose results are being compared. + + Returns: + str: The ID of the spreadsheet where the results were appended, or the same ID if the operation fails. + """ values = [] results = [] - spreadsheet_name = [] + spreadsheet_names = [] + # Read the test data from both spreadsheets for spreadsheet in spreadsheets: values.append(read_sheet(spreadsheet, test_name)) - spreadsheet_name.append( + spreadsheet_names.append( get_sheet(spreadsheet, test_name)["properties"]["title"] ) + # Initialize results with headers for value in values[0]: for ele in values[1]: if value[0] == "System" and ele[0] == "System": @@ -42,38 +58,43 @@ def compare_linpack_results(spreadsheets, spreadsheetId, test_name): ] ) break - else: - if value[0] == ele[0]: - price_perf = [] - price_perf.append(float(value[2]) / float(value[4])) - price_perf.append(float(ele[2]) / float(ele[4])) - price_perf_diff = percentage_deviation(price_perf[0], price_perf[1]) - percentage_diff = percentage_deviation(value[2], ele[2]) - gflop_diff = percentage_deviation(value[3], ele[3]) - results.append( - [ - value[0], - value[1], - value[2], - ele[2], - percentage_diff, - value[3], - ele[3], - gflop_diff, - value[4], - price_perf[0], - price_perf[1], - price_perf_diff, - ] - ) + elif value[0] == ele[0]: + # Calculate percentage differences for GFLOPS, scaling, and price-performance + price_perf = [ + float(value[2]) / float(value[4]), + float(ele[2]) / float(ele[4]), + ] + price_perf_diff = percentage_deviation(price_perf[0], price_perf[1]) + percentage_diff = percentage_deviation(value[2], ele[2]) + gflop_diff = percentage_deviation(value[3], ele[3]) + + results.append( + [ + value[0], + value[1], + value[2], + ele[2], + percentage_diff, + value[3], + ele[3], + gflop_diff, + value[4], + price_perf[0], + price_perf[1], + price_perf_diff, + ] + ) + + # Attempt to update the spreadsheet with the new comparison data try: - create_sheet(spreadsheetId, test_name) + create_sheet(spreadsheet_id, test_name) custom_logger.info("Deleting existing charts and data from the sheet...") - clear_sheet_charts(spreadsheetId, test_name) - clear_sheet_data(spreadsheetId, test_name) - custom_logger.info("Appending new " + test_name + " data to sheet...") - append_to_sheet(spreadsheetId, results, test_name) + clear_sheet_charts(spreadsheet_id, test_name) + clear_sheet_data(spreadsheet_id, test_name) + custom_logger.info(f"Appending new {test_name} data to sheet...") + append_to_sheet(spreadsheet_id, results, test_name) except Exception as exc: + # Log the error and return the spreadsheet ID if the operation fails custom_logger.debug(str(exc)) custom_logger.error("Failed to append data to sheet") - return spreadsheetId + return spreadsheet_id diff --git a/quisby/benchmarks/linpack/extract.py b/quisby/benchmarks/linpack/extract.py index 3b69d11..7b3d7aa 100644 --- a/quisby/benchmarks/linpack/extract.py +++ b/quisby/benchmarks/linpack/extract.py @@ -1,93 +1,145 @@ import csv import glob -import os.path +import logging +import os import re - from quisby.pricing.cloud_pricing import get_cloud_pricing, get_cloud_cpu_count from quisby.util import read_config +# Setting up logger for better error tracking and debugging +logger = logging.getLogger(__name__) + def linpack_format_data(**kwargs): """ - Add data into format to be shown in spreadsheets - Supports linpack like data. eg: autohpl + Adds data into a format suitable for spreadsheets. + + This function processes Linpack-like data (e.g., autohpl) to include system + information, GFLOPS, pricing, and CPU cores. + + Args: + kwargs: A dictionary containing the required input data, including: + - 'results': List to store formatted data. + - 'system_name': The name of the system being tested. + - 'gflops': The GFLOPS result from the test. + + Returns: + list: Updated 'results' list with the new data. + None: If GFLOPS data is not available or invalid. """ region = read_config("cloud", "region") - cloud_type = read_config("cloud", "cloud_type") + cloud_type = read_config("cloud", "cloud_type").lower() os_release = read_config("test", "OS_RELEASE") os_type = read_config("test", "os_type") - results = kwargs["results"] if kwargs["results"] else [] - system_name = kwargs["system_name"] if kwargs["system_name"] else None - if kwargs["gflops"]: - gflops = float(kwargs["gflops"]) - else: + + results = kwargs.get("results", []) + system_name = kwargs.get("system_name") + + # Ensure GFLOPS is provided and valid + gflops = kwargs.get("gflops") + if not gflops: + logger.warning(f"GFLOPS value is missing for system {system_name}. Skipping.") return None - price_per_hour = get_cloud_pricing( - system_name, region, cloud_type.lower(), os_type - ) - - no_of_cores = get_cloud_cpu_count( - system_name, region, cloud_type.lower() - ) - - results.append( - [ - system_name, - no_of_cores, - gflops, - 1, - price_per_hour, - float(gflops) / float(price_per_hour), - ] - ) + try: + gflops = float(gflops) + except ValueError: + logger.error(f"Invalid GFLOPS value: {gflops}. Could not convert to float.") + raise ValueError(f"Invalid GFLOPS value: {gflops}. Could not convert to float.") + + # Fetch pricing and CPU details from the cloud pricing API + try: + price_per_hour = get_cloud_pricing(system_name, region, cloud_type, os_type) + no_of_cores = get_cloud_cpu_count(system_name, region, cloud_type) + except Exception as e: + logger.error(f"Error fetching cloud pricing or CPU count for system {system_name}: {str(e)}") + raise RuntimeError(f"Error fetching cloud pricing or CPU count: {str(e)}") + + # If price_per_hour is invalid or 0, return an empty result to avoid divide by zero errors + if not price_per_hour or price_per_hour == 0.0: + logger.warning(f"Invalid price_per_hour for system {system_name}, skipping.") + return [] + + # Append formatted data to results + results.append([ + system_name, + no_of_cores, + gflops, + 1, # Assuming '1' refers to a single test instance + price_per_hour, + gflops / price_per_hour + ]) return results def extract_linpack_data(path, system_name): """ - Make shift function to handle linpack summary data - till a resolution is reached - """ + Extracts Linpack summary data from files and formats it for analysis. + + This function handles the extraction of data from Linpack summary files + and provides information about GFLOPS and the number of cores used. + Args: + path (str): Path to the directory containing Linpack summary files. + system_name (str): Name of the system being tested. + + Returns: + tuple: A tuple containing: + - list: Processed Linpack results. + - list: Summary data including file paths for reference. + """ results = [] + summary_data = [] no_of_cores = None gflops = None - summary_data = [] - server = read_config("server", "name") - result_dir = read_config("server", "result_dir") + # Check if the summary file exists summary_file = path - if not os.path.isfile(summary_file): - return None - - if os.path.basename(summary_file).endswith("csv"): - with open(summary_file) as csv_file: - csv_reader = csv.DictReader(csv_file, delimiter=":") - list_data = list(csv_reader) - last_row = list_data[-1] - gflops = last_row["MB/sec"] - threads = last_row["threads"] - summary_data.append([system_name, server + "/results/" + result_dir + "/" + path]) + logger.error(f"Summary file {summary_file} not found for system {system_name}.") + raise FileNotFoundError(f"Summary file {summary_file} not found.") + + # Process CSV summary file + if summary_file.endswith("csv"): + try: + with open(summary_file, 'r') as csv_file: + csv_reader = csv.DictReader(csv_file, delimiter=":") + list_data = list(csv_reader) + last_row = list_data[-1] + + gflops = last_row.get("MB/sec") + threads = last_row.get("threads") + except Exception as e: + logger.error(f"Error reading CSV summary file {summary_file}: {str(e)}") + raise RuntimeError(f"Error reading CSV summary file {summary_file}: {str(e)}") else: - return results, summary_data - - for file_path in glob.glob(path + f"/linpack*_threads_{threads}_*"): - with open(file_path) as txt_file: - data = txt_file.readlines() - for row in data: - if re.findall(r"Number of cores: (\d+)", row): - no_of_cores = re.findall(r"Number of cores: (\d+)", row)[0] - break - + # Return empty results if the file is not CSV + logger.warning(f"Summary file {summary_file} is not in CSV format. Skipping.") + return results + + # Process individual Linpack result files + if threads: + for file_path in glob.glob(f"{path}/linpack*_threads_{threads}_*"): + try: + with open(file_path, 'r') as txt_file: + data = txt_file.readlines() + for row in data: + match = re.search(r"Number of cores: (\d+)", row) + if match: + no_of_cores = match.group(1) + break + except Exception as e: + logger.error(f"Error reading Linpack result file {file_path}: {str(e)}") + raise RuntimeError(f"Error reading Linpack result file {file_path}: {str(e)}") + + # If GFLOPS data is found, format and append it if gflops: results = linpack_format_data( results=results, system_name=system_name, no_of_cores=no_of_cores, - gflops=gflops, + gflops=gflops ) - return results, summary_data + return results diff --git a/quisby/benchmarks/linpack/summary.py b/quisby/benchmarks/linpack/summary.py index 93a44d4..eeea478 100644 --- a/quisby/benchmarks/linpack/summary.py +++ b/quisby/benchmarks/linpack/summary.py @@ -1,10 +1,21 @@ import re from itertools import groupby - from quisby.util import mk_int, process_instance, read_config def extract_prefix_and_number(input_string): + """ + Extracts the prefix, number, and suffix from a given string. + + Args: + input_string (str): The string to extract the prefix, number, and suffix from. + + Returns: + tuple: A tuple containing: + - prefix (str): The prefix part of the string. + - number (int): The number extracted from the string. + - suffix (str): The suffix part of the string. + """ match = re.search(r'^(.*?)(\d+)(.*?)$', input_string) if match: prefix = match.group(1) @@ -15,38 +26,67 @@ def extract_prefix_and_number(input_string): def custom_key(item): - cloud_type = read_config("cloud","cloud_type") + """ + Generates a custom key for sorting/grouping based on the cloud type and item format. + + Args: + item (tuple): The item to generate the key for. The item is expected to be a tuple + where the first element is a string representing the instance type. + + Returns: + tuple: A tuple used as the sorting/grouping key. + """ + cloud_type = read_config("cloud", "cloud_type") + if item[0] == "local": return item[0] elif cloud_type == "aws": - instance_type =item[0].split(".")[0] - instance_number = item[0].split(".")[1] + instance_type, instance_number = item[0].split(".")[0], item[0].split(".")[1] return instance_type, instance_number elif cloud_type == "gcp": - instance_type = item[0].split("-")[0] - instance_number = int(item[0].split('-')[-1]) - return instance_type, instance_number + instance_type = item[0].split("-")[0] + instance_number = int(item[0].split('-')[-1]) + return instance_type, instance_number elif cloud_type == "azure": - instance_type, instance_number, version=extract_prefix_and_number(item[0]) + instance_type, version, instance_number = extract_prefix_and_number(item[0]) return instance_type, version, instance_number def group_data(results): + """ + Groups the data based on cloud type and instance attributes. + + Args: + results (list): A list of results that need to be grouped. + + Returns: + itertools.groupby: A grouped object based on the instance attributes. + """ cloud_type = read_config("cloud", "cloud_type") + if cloud_type == "aws": return groupby(results, key=lambda x: process_instance(x[0], "family", "version", "feature", "machine_type")) elif cloud_type == "azure": results = sorted(results, key=lambda x: process_instance(x[0], "family", "feature")) return groupby(results, key=lambda x: process_instance(x[0], "family", "version", "feature")) elif cloud_type == "gcp": - return groupby(results, key=lambda x: process_instance(x[0], "family", "version","sub_family","feature")) + return groupby(results, key=lambda x: process_instance(x[0], "family", "version", "sub_family", "feature")) elif cloud_type == "local": return groupby(results, key=lambda x: process_instance(x[0], "family")) - def sort_data(results): + """ + Sorts the results based on cloud type and instance attributes. + + Args: + results (list): A list of results to be sorted. + + Returns: + list: The sorted results. + """ cloud_type = read_config("cloud", "cloud_type") + if cloud_type == "aws": results.sort(key=lambda x: str(process_instance(x[0], "family"))) elif cloud_type == "azure": @@ -57,28 +97,38 @@ def sort_data(results): return groupby(results, key=lambda x: process_instance(x[0], "family")) -def create_summary_linpack_data(results, OS_RELEASE): +def create_summary_linpack_data(results, os_release): + """ + Creates a summary of Linpack test data, including GFLOPS, scaling, and cost information. + + Args: + results (list): The results from the Linpack test that need to be summarized. + os_release (str): The OS release for which the summary is being created. + + Returns: + list: The summarized results, including headers and computed values. + """ sorted_results = [] - header = [] - header.append( + header = [ [ "System", "Cores", - f"GFLOPS-{OS_RELEASE}", - f"GFLOP Scaling-{OS_RELEASE}", + f"GFLOPS-{os_release}", + f"GFLOP Scaling-{os_release}", "Cost/hr", - f"Price-perf-{OS_RELEASE}", + f"Price-perf-{os_release}", ] - ) + ] - results = list(filter(None, results)) + results = list(filter(None, results)) # Remove any None entries sort_data(results) - #results.sort(key=lambda x: str((x[0], "family", "version","sub_family", "feature"))) - for _, items in group_data(results): + for _, items in group_data(results): items = list(items) sorted_data = sorted(items, key=lambda x: mk_int(process_instance(x[0], "size"))) + cpu_scale, base_gflops = None, None + for index, row in enumerate(sorted_data): if not cpu_scale and not base_gflops: cpu_scale = int(row[1]) @@ -90,10 +140,8 @@ def create_summary_linpack_data(results, OS_RELEASE): cpu_scaling = 0 gflops_scaling = float(row[2]) / (int(row[1]) - cpu_scale) / base_gflops if cpu_scaling != 0 else 1 sorted_data[index][3] = format(gflops_scaling, ".4f") - res = [] - for item in sorted_data: - res.append(item) + + res = [item for item in sorted_data] sorted_results += header + res - # sorted_results += header_row + sorted_data return sorted_results From edfb0102b2b205709b4fc5fb0c0c45dc33360fc8 Mon Sep 17 00:00:00 2001 From: sousinha1997 Date: Mon, 6 Jan 2025 09:19:02 +0530 Subject: [PATCH 04/10] auto_hpl and linpack reformatting --- quisby/benchmarks/auto_hpl/graph.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/quisby/benchmarks/auto_hpl/graph.py b/quisby/benchmarks/auto_hpl/graph.py index bf1333d..3c475dd 100644 --- a/quisby/benchmarks/auto_hpl/graph.py +++ b/quisby/benchmarks/auto_hpl/graph.py @@ -1,6 +1,25 @@ from quisby.benchmarks.linpack.graph import graph_linpack_data -def graph_auto_hpl_data(spreadsheetId, test_name, action): - return graph_linpack_data(spreadsheetId, test_name, action) +def graph_auto_hpl_data(spreadsheet_id, test_name, action): + """ + Graphs AutoHPL data based on the provided test results. + This function calls the `graph_linpack_data` function to generate graphs + for the AutoHPL test data based on the provided spreadsheet, test name, + and action to be performed. + + Args: + spreadsheet_id (str): The ID of the spreadsheet containing the data. + test_name (str): The name of the test for which data is to be graphed. + action (str): The action to perform (e.g., "generate", "update", etc.) when graphing. + + Returns: + Any: Returns the result of the `graph_linpack_data` function. + """ + try: + # Call the function to graph the Linpack data for AutoHPL + return graph_linpack_data(spreadsheet_id, test_name, action) + except Exception as e: + # Handle potential errors and raise with a descriptive message + raise RuntimeError(f"Error graphing AutoHPL data: {str(e)}") From abbfa5cf1324326df93835e1d407825e8efc9225 Mon Sep 17 00:00:00 2001 From: sousinha1997 Date: Mon, 6 Jan 2025 09:24:23 +0530 Subject: [PATCH 05/10] coremark reformatting --- quisby/benchmarks/coremark/compare.py | 183 +++++++------ quisby/benchmarks/coremark/coremark.py | 342 ++++++++++++++++--------- 2 files changed, 324 insertions(+), 201 deletions(-) diff --git a/quisby/benchmarks/coremark/compare.py b/quisby/benchmarks/coremark/compare.py index cf8732f..b98e1e6 100644 --- a/quisby/benchmarks/coremark/compare.py +++ b/quisby/benchmarks/coremark/compare.py @@ -1,12 +1,13 @@ from quisby import custom_logger from itertools import groupby - from quisby.sheet.sheet_util import ( create_spreadsheet, append_to_sheet, read_sheet, get_sheet, - create_sheet, clear_sheet_data, clear_sheet_charts, + create_sheet, + clear_sheet_data, + clear_sheet_charts, ) from quisby.util import combine_two_array_alternating, merge_lists_alternately, read_config from quisby.benchmarks.coremark.graph import graph_coremark_data @@ -14,92 +15,126 @@ def extract_prefix_and_number(input_string): - match = re.search(r'^(.*?)(\d+)(.*?)$', input_string) - if match: - prefix = match.group(1) - suffix = match.group(3) # Extracts the suffix after the number - return prefix, suffix + """ + Extracts the prefix and suffix from an instance name that contains a number. + + :param input_string: Instance name, e.g., 't2.micro-01' + :return: Tuple (prefix, suffix) or (None, None) if no match + """ + try: + match = re.search(r'^(.*?)(\d+)(.*?)$', input_string) + if match: + prefix = match.group(1) + suffix = match.group(3) # Extracts the suffix after the number + return prefix, suffix + except Exception as exc: + custom_logger.error(f"Error extracting prefix and number from '{input_string}': {str(exc)}") return None, None def compare_inst(item1, item2): - cloud_type = read_config("cloud", "cloud_type") - if cloud_type == "local": - return True - elif cloud_type == "aws": - return item1.split(".")[0] == item2.split(".")[0] - elif cloud_type == "gcp": + """ + Compares two instance names based on the cloud provider's convention. - return item1.split("-")[0] == item2.split("-")[0] - elif cloud_type == "azure": - return extract_prefix_and_number(item1) == extract_prefix_and_number(item2) + :param item1: Instance name from the first spreadsheet + :param item2: Instance name from the second spreadsheet + :return: True if instance names are considered the same, False otherwise + """ + try: + cloud_type = read_config("cloud", "cloud_type") + if cloud_type == "local": + return True + elif cloud_type == "aws": + return item1.split(".")[0] == item2.split(".")[0] + elif cloud_type == "gcp": + return item1.split("-")[0] == item2.split("-")[0] + elif cloud_type == "azure": + return extract_prefix_and_number(item1) == extract_prefix_and_number(item2) + except Exception as exc: + custom_logger.error(f"Error comparing instances '{item1}' and '{item2}': {str(exc)}") + return False def compare_coremark_results(spreadsheets, spreadsheetId, test_name, table_name=["System name", "Price-perf"]): - values = [] - results = [] - spreadsheet_name = [] - - for spreadsheet in spreadsheets: - values.append(read_sheet(spreadsheet, range=test_name)) - spreadsheet_name.append(get_sheet(spreadsheet, test_name=test_name)["properties"]["title"]) - - for index, value in enumerate(values): - values[index] = (list(g) for k, g in groupby(value, key=lambda x: x != []) if k) - list_1 = list(values[0]) - list_2 = list(values[1]) - - for value in list_1: - for ele in list_2: - # Check max throughput - if value[0][0] in table_name and ele[0][0] in table_name and value[0][0] == ele[0][0]: - if compare_inst(value[1][0], ele[1][0]): - results.append([""]) - for item1 in value: - for item2 in ele: - if item1[0] == item2[0]: - results = merge_lists_alternately(results, item1, item2) - break - - elif value[0][0] == "Cost/Hr" and ele[0][0] == "Cost/Hr": - if compare_inst(value[1][0], ele[1][0]): - results.append([""]) - for item1 in value: - for item2 in ele: - if item1[0] == item2[0]: - results.append(item1) - break - - elif value[1][0] == ele[1][0]: - if value[0][0] == ele[0][0]: - results.append([""]) - results.append(value[0]) - for item1, item2 in zip(value[1:], ele[1:]): - results = merge_lists_alternately(results, item1, item2) - break - + """ + Compares CoreMark results from multiple spreadsheets and appends the merged data to the target sheet. + + :param spreadsheets: List of spreadsheet names to compare + :param spreadsheetId: Target spreadsheet ID for appending data + :param test_name: The name of the test to compare (e.g., 'coremark') + :param table_name: List of columns to compare (default ["System name", "Price-perf"]) + """ try: - create_sheet(spreadsheetId, test_name) - custom_logger.info("Deleting existing charts and data from the sheet...") - clear_sheet_charts(spreadsheetId, test_name) - clear_sheet_data(spreadsheetId, test_name) - custom_logger.info("Appending new " + test_name + " data to sheet...") - append_to_sheet(spreadsheetId, results, test_name) - #graph_coremark_data(spreadsheetId, test_name, "compare") + values = [] + results = [] + spreadsheet_name = [] + + # Read data from each spreadsheet + for spreadsheet in spreadsheets: + values.append(read_sheet(spreadsheet, range=test_name)) + spreadsheet_name.append(get_sheet(spreadsheet, test_name=test_name)["properties"]["title"]) + + # Group the values into non-empty chunks + for index, value in enumerate(values): + values[index] = (list(g) for k, g in groupby(value, key=lambda x: x != []) if k) + + list_1 = list(values[0]) + list_2 = list(values[1]) + + # Compare the CoreMark results from both spreadsheets + for value in list_1: + for ele in list_2: + # Check max throughput or other table data + if value[0][0] in table_name and ele[0][0] in table_name and value[0][0] == ele[0][0]: + if compare_inst(value[1][0], ele[1][0]): + results.append([""]) + for item1 in value: + for item2 in ele: + if item1[0] == item2[0]: + results = merge_lists_alternately(results, item1, item2) + break + # Handle cost/hour comparison + elif value[0][0] == "Cost/Hr" and ele[0][0] == "Cost/Hr": + if compare_inst(value[1][0], ele[1][0]): + results.append([""]) + for item1 in value: + for item2 in ele: + if item1[0] == item2[0]: + results.append(item1) + break + # General comparison based on row keys + elif value[1][0] == ele[1][0]: + if value[0][0] == ele[0][0]: + results.append([""]) + results.append(value[0]) + for item1, item2 in zip(value[1:], ele[1:]): + results = merge_lists_alternately(results, item1, item2) + break + + # Try to append the merged data to the target sheet + try: + create_sheet(spreadsheetId, test_name) + custom_logger.info(f"Deleting existing charts and data from the sheet '{test_name}'...") + clear_sheet_charts(spreadsheetId, test_name) + clear_sheet_data(spreadsheetId, test_name) + custom_logger.info(f"Appending new {test_name} data to sheet...") + append_to_sheet(spreadsheetId, results, test_name) + # Optionally, generate a graph for CoreMark comparison + # graph_coremark_data(spreadsheetId, test_name, "compare") + except Exception as exc: + custom_logger.error(f"Failed to append data to sheet '{test_name}' in spreadsheet {spreadsheetId}: {str(exc)}") + return spreadsheetId except Exception as exc: - custom_logger.debug(str(exc)) - custom_logger.error("Failed to append data to sheet") - return spreadsheetId - - + custom_logger.error(f"Error comparing CoreMark results: {str(exc)}") if __name__ == "__main__": + # Example usage with empty spreadsheet list and target spreadsheetId spreadsheets = [ - "", - "", + "", # Add first spreadsheet ID + "", # Add second spreadsheet ID ] test_name = "coremark" - compare_coremark_results(spreadsheets, "", test_name, - table_name=["System Name"]) \ No newline at end of file + # Call the function to compare the results and update the sheet + compare_coremark_results(spreadsheets, "", test_name, table_name=["System Name"]) diff --git a/quisby/benchmarks/coremark/coremark.py b/quisby/benchmarks/coremark/coremark.py index 8fbeb4e..45329ad 100644 --- a/quisby/benchmarks/coremark/coremark.py +++ b/quisby/benchmarks/coremark/coremark.py @@ -1,170 +1,258 @@ -""" Custom key to sort the data base don instance name """ from itertools import groupby - -from quisby import custom_logger import re - -from quisby.util import read_config +from quisby import custom_logger +from quisby.util import read_config, process_instance, mk_int from quisby.pricing.cloud_pricing import get_cloud_pricing -from quisby.util import process_instance -from quisby.util import mk_int +# Utility function to extract prefix, number, and suffix from instance names +def extract_prefix_and_number(input_string): + """ + Extracts the prefix, number, and suffix from a structured instance name string. + Example: 't2.micro-01' -> ('t2.micro', 1, '') -def extract_prefix_and_number(input_string): - match = re.search(r'^(.*?)(\d+)(.*?)$', input_string) - if match: - prefix = match.group(1) - number = int(match.group(2)) - suffix = match.group(3) - return prefix, number, suffix + :param input_string: The instance name as a string (e.g., 't2.micro-01') + :return: Tuple (prefix, number, suffix) or (None, None, None) if no match + """ + try: + match = re.search(r'^(.*?)(\d+)(.*?)$', input_string) + if match: + prefix = match.group(1) + number = int(match.group(2)) + suffix = match.group(3) + return prefix, number, suffix + except Exception as exc: + custom_logger.error(f"Error extracting prefix and number from input string '{input_string}': {str(exc)}") return None, None, None +# Custom key for sorting instances based on the cloud type and instance name def custom_key(item): - cloud_type = read_config("cloud", "cloud_type") - if item[1][0] == "local": - return item[1][0] - elif cloud_type == "aws": - instance_name = item[1][0] - instance_type = instance_name.split(".")[0] - instance_number = instance_name.split(".")[1] - return instance_type, instance_number - elif cloud_type == "gcp": - instance_type = item[1][0].split("-")[0] - instance_number = int(item[1][0].split('-')[-1]) - return instance_type, instance_number - elif cloud_type == "azure": - instance_type, instance_number, version = extract_prefix_and_number(item[1][0]) - return instance_type, version, instance_number + """ + Generates a custom sorting key based on the instance's cloud platform (AWS, GCP, Azure, or Local). + + :param item: The item containing instance name + :return: Tuple that can be used as a sorting key + """ + try: + cloud_type = read_config("cloud", "cloud_type") + + if item[1][0] == "local": + return item[1][0] # If local, use the first item directly as the key + + # For cloud instances, split names to extract type and number + if cloud_type == "aws": + instance_name = item[1][0] + instance_type, instance_number = instance_name.split(".") + return instance_type, instance_number + + elif cloud_type == "gcp": + instance_type = item[1][0].split("-")[0] + instance_number = int(item[1][0].split('-')[-1]) + return instance_type, instance_number + + elif cloud_type == "azure": + instance_type, version, instance_number = extract_prefix_and_number(item[1][0]) + return instance_type, version, instance_number + + except Exception as exc: + custom_logger.error(f"Error generating custom key for instance '{item[1][0]}': {str(exc)}") + return None +# Calculates price-performance ratio for an instance def calc_price_performance(inst, avg): - region = read_config("cloud", "region") - cloud_type = read_config("cloud", "cloud_type") - os_type = read_config("test", "os_type") - cost_per_hour = None - price_perf = 0.0 + """ + Calculates the price-performance ratio for a given instance. + + :param inst: Instance identifier (e.g., 't2.micro') + :param avg: Average performance for the instance + :return: Tuple (cost per hour, price-performance ratio) + """ try: - cost_per_hour = get_cloud_pricing( - inst, region, cloud_type.lower(), os_type) + region = read_config("cloud", "region") + cloud_type = read_config("cloud", "cloud_type") + os_type = read_config("test", "os_type") + + cost_per_hour = get_cloud_pricing(inst, region, cloud_type.lower(), os_type) price_perf = float(avg) / float(cost_per_hour) + + return cost_per_hour, price_perf + except Exception as exc: - custom_logger.debug(str(exc)) - custom_logger.error("Error calculating value !") - return cost_per_hour, price_perf + custom_logger.error(f"Error calculating price-performance for instance '{inst}': {str(exc)}") + return None, 0.0 +# Groups benchmarking results based on cloud platform def group_data(results): - cloud_type = read_config("cloud", "cloud_type") - if cloud_type == "aws": - return groupby(results, key=lambda x: process_instance(x[1][0], "family", "version", "feature", "machine_type")) - elif cloud_type == "azure": - results = sorted(results, key=lambda x: process_instance(x[1][0], "family", "feature")) - return groupby(results, key=lambda x: process_instance(x[1][0], "family", "feature")) - elif cloud_type == "gcp": - return groupby(results, key=lambda x: process_instance(x[1][0], "family", "version", "sub_family", "feature")) - elif cloud_type == "local": - return groupby(results, key=lambda x: process_instance(x[1][0], "family")) + """ + Groups benchmarking results based on instance type and cloud platform. + + :param results: List of benchmarking results + :return: Grouped results + """ + try: + cloud_type = read_config("cloud", "cloud_type") + + if cloud_type == "aws": + return groupby(results, key=lambda x: process_instance(x[1][0], "family", "version", "feature", "machine_type")) + elif cloud_type == "azure": + results = sorted(results, key=lambda x: process_instance(x[1][0], "family", "feature")) + return groupby(results, key=lambda x: process_instance(x[1][0], "family", "feature")) + elif cloud_type == "gcp": + return groupby(results, key=lambda x: process_instance(x[1][0], "family", "version", "sub_family", "feature")) + + elif cloud_type == "local": + return groupby(results, key=lambda x: process_instance(x[1][0], "family")) + except Exception as exc: + custom_logger.error(f"Error grouping benchmarking results: {str(exc)}") + return [] + + +# Sorts the results based on cloud platform naming conventions def sort_data(results): - cloud_type = read_config("cloud", "cloud_type") - if cloud_type == "aws": - results.sort(key=lambda x: str(process_instance(x[1][0], "family"))) - elif cloud_type == "azure": - results.sort(key=lambda x: str(process_instance(x[1][0], "family", "version", "feature"))) - elif cloud_type == "gcp": - results.sort(key=lambda x: str(process_instance(x[1][0], "family", "version", "sub_family"))) + """ + Sorts the benchmarking results based on instance naming conventions. + :param results: List of benchmarking results + """ + try: + cloud_type = read_config("cloud", "cloud_type") + if cloud_type == "aws": + results.sort(key=lambda x: str(process_instance(x[1][0], "family"))) + + elif cloud_type == "azure": + results.sort(key=lambda x: str(process_instance(x[1][0], "family", "version", "feature"))) + + elif cloud_type == "gcp": + results.sort(key=lambda x: str(process_instance(x[1][0], "family", "version", "sub_family"))) + + except Exception as exc: + custom_logger.error(f"Error sorting benchmarking results: {str(exc)}") + + +# Generates a summary report for CoreMark benchmarking data def create_summary_coremark_data(results, OS_RELEASE, sorted_results=None): + """ + Generates a summary report for CoreMark data including average performance, cost per hour, and price-performance. + + :param results: Benchmarking results + :param OS_RELEASE: OS release string (e.g., 'Ubuntu-20.04') + :param sorted_results: Pre-sorted benchmarking data (optional) + :return: Final report in structured format + """ final_results = [] - # Sort data based on instance name - results = list(filter(None, results)) - sort_data(results) - - for _, items in group_data(results): - cal_data = [["System name", "Test_passes-" + OS_RELEASE]] - items = list(items) - sorted_data = sorted(items, key=lambda x: mk_int(process_instance(x[1][0], "size"))) - # sorted_results.extend(sorted_data) - cost_per_hour, price_per_perf = [], [] - - # Add summary data - for item in sorted_data: - sum = 0 - avg = 0 - iterations = 0 - for index in range(3, len(item)): - sum = sum + float(item[index][1]) - iterations = iterations + 1 - avg = float(sum / iterations) - try: - cph, pp = calc_price_performance(item[1][0], avg) - except Exception as exc: - custom_logger.error(str(exc)) - break - cal_data.append([item[1][0], avg]) - price_per_perf.append([item[1][0], pp]) - cost_per_hour.append([item[1][0], cph]) - sorted_results = [[""]] - sorted_results += cal_data - sorted_results.append([""]) - sorted_results.append(["Cost/Hr"]) - sorted_results += cost_per_hour - sorted_results.append([""]) - sorted_results.append(["Price-perf", f"Passes/$-{OS_RELEASE}"]) - sorted_results += price_per_perf - final_results.extend(sorted_results) + try: + # Sort and filter results + results = list(filter(None, results)) + sort_data(results) + + for _, items in group_data(results): + cal_data = [["System name", "Test_passes-" + OS_RELEASE]] + items = list(items) + sorted_data = sorted(items, key=lambda x: mk_int(process_instance(x[1][0], "size"))) + + cost_per_hour, price_per_perf = [], [] + + # Add summary data for each instance + for item in sorted_data: + sum = 0 + avg = 0 + iterations = 0 + + # Calculate average performance + for index in range(3, len(item)): + sum += float(item[index][1]) + iterations += 1 + avg = float(sum / iterations) + + # Calculate cost per hour and price-perf + try: + cph, pp = calc_price_performance(item[1][0], avg) + except Exception as exc: + custom_logger.error(f"Error calculating price-performance for instance '{item[1][0]}': {str(exc)}") + break + + # Add data to final report + cal_data.append([item[1][0], avg]) + price_per_perf.append([item[1][0], pp]) + cost_per_hour.append([item[1][0], cph]) + + # Compile the summary report + sorted_results = [[""]] + sorted_results += cal_data + sorted_results.append([""]) + sorted_results.append(["Cost/Hr"]) + sorted_results += cost_per_hour + sorted_results.append([""]) + sorted_results.append(["Price-perf", f"Passes/$-{OS_RELEASE}"]) + sorted_results += price_per_perf + + final_results.extend(sorted_results) + + except Exception as exc: + custom_logger.error(f"Error creating CoreMark summary data: {str(exc)}") + return final_results +# Extracts and processes CoreMark data from a file def extract_coremark_data(path, system_name, OS_RELEASE): - """""" + """ + Extracts and processes CoreMark results from a file (CSV format). + + :param path: Path to the file containing benchmarking results + :param system_name: The name of the system being benchmarked + :param OS_RELEASE: OS release version (e.g., 'Ubuntu-20.04') + :return: Processed benchmarking results or None if there was an error + """ results = [] processed_data = [] - summary_data = [] - server = read_config("server", "name") - result_dir = read_config("server", "result_dir") - # Extract data from file try: + # Open the CSV file if path.endswith(".csv"): with open(path) as file: coremark_results = file.readlines() - summary_data.append([system_name, server + "/results/" + result_dir + "/" + path]) else: - return None + custom_logger.error(f"Invalid file format for path: {path}") + return None # Not a CSV file except Exception as exc: - custom_logger.debug(str(exc)) - custom_logger.error("Unable to extract data from csv file for coremark") + custom_logger.error(f"Error reading CSV file '{path}': {str(exc)}") + return None # Error reading file + + # Process the CoreMark data + try: + data_index = 0 + header = [] + for index, data in enumerate(coremark_results): + if "iteration" in data: + data_index = index + header = data.strip("\n").split(":") + else: + coremark_results[index] = data.strip("\n").split(":") + coremark_results = [header] + coremark_results[data_index + 1:] + + # Format the data for report generation + iteration = 1 + for row in coremark_results: + if "test passes" in row: + processed_data.append([""]) + processed_data.append([system_name]) + processed_data.append([row[0], row[2]]) # System name and test passes + else: + processed_data.append([iteration, row[2]]) # Iteration and performance + iteration += 1 + + results.append(processed_data) + except Exception as exc: + custom_logger.error(f"Error processing CoreMark data from file '{path}': {str(exc)}") return None - data_index = 0 - header = [] - for index, data in enumerate(coremark_results): - if "iteration" in data: - data_index = index - header = data.strip("\n").split(":") - else: - coremark_results[index] = data.strip("\n").split(":") - coremark_results = [header] + coremark_results[data_index + 1:] - - # for index, data in enumerate(coremark_results): - # coremark_results[index] = data.strip("\n").split(":") - - # Format the data - iteration = 1 - for row in coremark_results: - if "test passes" in row: - processed_data.append([""]) - processed_data.append([system_name]) - processed_data.append([row[0], row[2]]) - else: - processed_data.append([iteration, row[2]]) - iteration = iteration + 1 - results.append(processed_data) - return results, summary_data + return results From d5456ee7aad1b06fbc140d364eec71106c972a96 Mon Sep 17 00:00:00 2001 From: sousinha1997 Date: Mon, 6 Jan 2025 09:26:59 +0530 Subject: [PATCH 06/10] coremark_pro reformatting --- quisby/benchmarks/coremark_pro/compare.py | 186 +++++++++++------- .../benchmarks/coremark_pro/coremark_pro.py | 94 ++++++--- 2 files changed, 184 insertions(+), 96 deletions(-) diff --git a/quisby/benchmarks/coremark_pro/compare.py b/quisby/benchmarks/coremark_pro/compare.py index 15178cd..fd0122f 100644 --- a/quisby/benchmarks/coremark_pro/compare.py +++ b/quisby/benchmarks/coremark_pro/compare.py @@ -1,101 +1,141 @@ from itertools import groupby - from quisby import custom_logger from quisby.benchmarks.coremark_pro.graph import graph_coremark_pro_data from quisby.sheet.sheet_util import ( append_to_sheet, read_sheet, get_sheet, - create_sheet, clear_sheet_data, clear_sheet_charts, + create_sheet, + clear_sheet_data, + clear_sheet_charts, ) -from quisby.util import merge_lists_alternately,read_config +from quisby.util import merge_lists_alternately, read_config import re - def extract_prefix_and_number(input_string): - match = re.search(r'^(.*?)(\d+)(.*?)$', input_string) - if match: - prefix = match.group(1) - suffix = match.group(3) # Extracts the suffix after the number - return prefix, suffix + """ + Extracts the prefix and suffix from an instance name that contains a number. + + :param input_string: Instance name, e.g., 't2.micro-01' + :return: Tuple (prefix, suffix) or (None, None) if no match + """ + try: + match = re.search(r'^(.*?)(\d+)(.*?)$', input_string) + if match: + prefix = match.group(1) + suffix = match.group(3) # Extracts the suffix after the number + return prefix, suffix + except Exception as exc: + custom_logger.error(f"Error extracting prefix and number from '{input_string}': {str(exc)}") return None, None def compare_inst(item1, item2): - cloud_type = read_config("cloud", "cloud_type") - if cloud_type == "local": - return True - elif cloud_type == "aws": - return item1.split(".")[0] == item2.split(".")[0] - elif cloud_type == "gcp": - return item1.split("-")[0] == item2.split("-")[0] - elif cloud_type == "azure": - return extract_prefix_and_number(item1) == extract_prefix_and_number(item2) - - -def compare_coremark_pro_results(spreadsheets, spreadsheetId, test_name, table_name=["System name","Price-perf"]): + """ + Compares two instance names based on the cloud provider's naming convention. + + :param item1: Instance name from the first spreadsheet + :param item2: Instance name from the second spreadsheet + :return: True if instance names match, False otherwise + """ + try: + cloud_type = read_config("cloud", "cloud_type") + if cloud_type == "local": + return True + elif cloud_type == "aws": + return item1.split(".")[0] == item2.split(".")[0] + elif cloud_type == "gcp": + return item1.split("-")[0] == item2.split("-")[0] + elif cloud_type == "azure": + return extract_prefix_and_number(item1) == extract_prefix_and_number(item2) + except Exception as exc: + custom_logger.error(f"Error comparing instances '{item1}' and '{item2}': {str(exc)}") + return False + + +def compare_coremark_pro_results(spreadsheets, spreadsheetId, test_name, table_name=["System name", "Price-perf"]): + """ + Compares CoreMark Pro results from multiple spreadsheets and appends the merged data to the target sheet. + + :param spreadsheets: List of spreadsheet names to compare + :param spreadsheetId: Target spreadsheet ID for appending data + :param test_name: The name of the test to compare (e.g., 'coremark_pro') + :param table_name: List of columns to compare (default ["System name", "Price-perf"]) + """ values = [] results = [] spreadsheet_name = [] - for spreadsheet in spreadsheets: - values.append(read_sheet(spreadsheet, range=test_name)) - spreadsheet_name.append(get_sheet(spreadsheet, test_name=test_name)["properties"]["title"]) - - for index, value in enumerate(values): - values[index] = (list(g) for k, g in groupby(value, key=lambda x: x != []) if k) - list_1 = list(values[0]) - list_2 = list(values[1]) - - for value in list_1: - for ele in list_2: - # Check max throughput - if value[1][0] in table_name and ele[1][0] in table_name and value[1][0] == ele[1][0]: - if compare_inst(value[2][0], ele[2][0]) and value[0][0] == ele[0][0]: - results.append([""]) - for item1 in value: - for item2 in ele: - if item1[0] == item2[0]: - results = merge_lists_alternately(results, item1, item2) - break - - elif value[0][0] == "Cost/Hr" and ele[0][0] == "Cost/Hr": - if compare_inst(value[1][0], ele[1][0]): - results.append([""]) - for item1 in value: - for item2 in ele: - if item1[0] == item2[0]: - results.append(item1) - break - - elif value[1][0] == ele[1][0]: - if value[0][0] == ele[0][0]: - results.append([""]) - results.append(value[0]) - for item1, item2 in zip(value[1:], ele[1:]): - results = merge_lists_alternately(results, item1, item2) - break - try: - create_sheet(spreadsheetId, test_name) - custom_logger.info("Deleting existing charts and data from the sheet...") - clear_sheet_charts(spreadsheetId, test_name) - clear_sheet_data(spreadsheetId, test_name) - custom_logger.info("Appending new " + test_name + " data to sheet...") - append_to_sheet(spreadsheetId, results, test_name) - #graph_coremark_pro_data(spreadsheetId, test_name, "compare") + # Read data from each spreadsheet + for spreadsheet in spreadsheets: + values.append(read_sheet(spreadsheet, range=test_name)) + spreadsheet_name.append(get_sheet(spreadsheet, test_name=test_name)["properties"]["title"]) + + # Group the values into non-empty chunks + for index, value in enumerate(values): + values[index] = [list(g) for k, g in groupby(value, key=lambda x: x != []) if k] + + list_1 = list(values[0]) + list_2 = list(values[1]) + + # Compare the CoreMark Pro results from both spreadsheets + for value in list_1: + for ele in list_2: + # Check for max throughput or other table data + if value[1][0] in table_name and ele[1][0] in table_name and value[1][0] == ele[1][0]: + if compare_inst(value[2][0], ele[2][0]) and value[0][0] == ele[0][0]: + results.append([""]) + for item1 in value: + for item2 in ele: + if item1[0] == item2[0]: + results = merge_lists_alternately(results, item1, item2) + break + + # Handle cost/hour comparison + elif value[0][0] == "Cost/Hr" and ele[0][0] == "Cost/Hr": + if compare_inst(value[1][0], ele[1][0]): + results.append([""]) + for item1 in value: + for item2 in ele: + if item1[0] == item2[0]: + results.append(item1) + break + + # General comparison based on row keys + elif value[1][0] == ele[1][0]: + if value[0][0] == ele[0][0]: + results.append([""]) + results.append(value[0]) + for item1, item2 in zip(value[1:], ele[1:]): + results = merge_lists_alternately(results, item1, item2) + break + + # Attempt to create and update the sheet with the results + try: + create_sheet(spreadsheetId, test_name) + custom_logger.info("Deleting existing charts and data from the sheet...") + clear_sheet_charts(spreadsheetId, test_name) + clear_sheet_data(spreadsheetId, test_name) + custom_logger.info(f"Appending new {test_name} data to sheet...") + append_to_sheet(spreadsheetId, results, test_name) + # Optionally, generate a graph for CoreMark Pro comparison + # graph_coremark_pro_data(spreadsheetId, test_name, "compare") + except Exception as exc: + custom_logger.error(f"Failed to append data to sheet '{test_name}' in spreadsheet {spreadsheetId}: {str(exc)}") + return spreadsheetId + except Exception as exc: - custom_logger.debug(str(exc)) - custom_logger.error("Failed to append data to sheet") - return spreadsheetId + custom_logger.error(f"Error comparing CoreMark Pro results: {str(exc)}") if __name__ == "__main__": + # Example usage with empty spreadsheet list and target spreadsheetId spreadsheets = [ - "", - "", + "", # Add first spreadsheet ID + "", # Add second spreadsheet ID ] test_name = "coremark_pro" - compare_coremark_pro_results(spreadsheets, "", test_name, - table_name=["System Name"]) \ No newline at end of file + # Call the function to compare the results and update the sheet + compare_coremark_pro_results(spreadsheets, "", test_name, table_name=["System Name"]) diff --git a/quisby/benchmarks/coremark_pro/coremark_pro.py b/quisby/benchmarks/coremark_pro/coremark_pro.py index 13e3058..c70a72e 100644 --- a/quisby/benchmarks/coremark_pro/coremark_pro.py +++ b/quisby/benchmarks/coremark_pro/coremark_pro.py @@ -9,6 +9,12 @@ def extract_prefix_and_number(input_string): + """ + Extract the prefix, number, and suffix from an instance name. + + :param input_string: Instance name like 't2.micro-01' + :return: Tuple (prefix, number, suffix) or (None, None, None) if no match. + """ match = re.search(r'^(.*?)(\d+)(.*?)$', input_string) if match: prefix = match.group(1) @@ -19,43 +25,59 @@ def extract_prefix_and_number(input_string): def custom_key(item): + """ + Generate a custom key for sorting or grouping instances based on cloud provider and instance name format. + + :param item: A tuple containing instance data. + :return: A tuple key for grouping. + """ cloud_type = read_config("cloud", "cloud_type") try: if item[1][0] == "local": return item[1][0] elif cloud_type == "aws": - instance_type = item[1][0].split(".")[0] - instance_number = item[1][0].split(".")[1] + instance_type, instance_number = item[1][0].split(".") return instance_type, instance_number elif cloud_type == "gcp": - instance_type = item[1][0].split("-")[0] - instance_number = int(item[1][0].split('-')[-1]) - return instance_type, instance_number + instance_type, instance_number = item[1][0].split("-") + return instance_type, int(instance_number) elif cloud_type == "azure": - instance_type, instance_number, version = extract_prefix_and_number(item[1][0]) + instance_type, version, instance_number = extract_prefix_and_number(item[1][0]) return instance_type, version, instance_number except Exception as exc: - custom_logger.error(str(exc)) + custom_logger.error(f"Error in custom_key for {item[1][0]}: {str(exc)}") return "", "" def calc_price_performance(inst, avg): + """ + Calculate price-perf ratio for an instance based on its cost per hour and performance. + + :param inst: Instance type or ID. + :param avg: Average score for the instance. + :return: Tuple (cost_per_hour, price_perf) + """ region = read_config("cloud", "region") cloud_type = read_config("cloud", "cloud_type") os_type = read_config("test", "os_type") cost_per_hour = None price_perf = 0.0 try: - cost_per_hour = get_cloud_pricing( - inst, region, cloud_type.lower(), os_type) - price_perf = float(avg) / float(cost_per_hour) + cost_per_hour = get_cloud_pricing(inst, region, cloud_type.lower(), os_type) + price_perf = float(avg) / float(cost_per_hour) if cost_per_hour else 0 except Exception as exc: custom_logger.debug(str(exc)) - custom_logger.error("Error calculating value !") + custom_logger.error("Error calculating price-performance!") return cost_per_hour, price_perf def group_data(results): + """ + Group data based on cloud type and instance attributes. + + :param results: List of results to group. + :return: Grouped results. + """ cloud_type = read_config("cloud", "cloud_type") if cloud_type == "aws": return groupby(results, key=lambda x: process_instance(x[1][0], "family", "version", "feature", "machine_type")) @@ -69,6 +91,11 @@ def group_data(results): def sort_data(results): + """ + Sort data based on cloud type and instance attributes. + + :param results: List of results to sort. + """ cloud_type = read_config("cloud", "cloud_type") if cloud_type == "aws": results.sort(key=lambda x: str(process_instance(x[1][0], "family"))) @@ -79,34 +106,48 @@ def sort_data(results): def create_summary_coremark_pro_data(results, OS_RELEASE): + """ + Create a summary of the CoreMark Pro data, including price-performance and iteration details. + + :param results: List of benchmark results. + :param OS_RELEASE: OS release version (e.g., "Ubuntu 20.04"). + :return: List of summarized results. + """ ret_results = [] - # Sort data based on instance name + # Sort and group data results = list(filter(None, results)) sort_data(results) results = group_data(results) + for _, items in results: multi_iter = [["Multi Iterations"], ["System name", "Score-" + OS_RELEASE]] single_iter = [["Single Iterations"], ["System name", "Score-" + OS_RELEASE]] cal_data = [["System name", "Test_passes-" + OS_RELEASE]] items = list(items) + + # Sort data by instance size sorted_data = sorted(items, key=lambda x: mk_int(process_instance(x[1][0], "size"))) - # Add summary data + + # Collect cost per hour and price performance data cost_per_hour, price_perf_single, price_perf_multi = [], [], [] for item in sorted_data: for index in range(3, len(item)): multi_iter.append([item[1][0], item[index][1]]) single_iter.append([item[1][0], item[index][2]]) + try: cph, ppm = calc_price_performance(item[1][0], item[index][1]) cph, pps = calc_price_performance(item[1][0], item[index][2]) except Exception as exc: custom_logger.error(str(exc)) break + price_perf_multi.append([item[1][0], ppm]) price_perf_single.append([item[1][0], pps]) cost_per_hour.append([item[1][0], cph]) - # final_results += item + + # Prepare the final result for this item final_results = [[""]] final_results += single_iter final_results.append([""]) @@ -121,41 +162,48 @@ def create_summary_coremark_pro_data(results, OS_RELEASE): final_results.append(["Price-perf", f"Score/$-{OS_RELEASE}"]) final_results += price_perf_multi ret_results.extend(final_results) + return ret_results def extract_coremark_pro_data(path, system_name, OS_RELEASE): - """""" + """ + Extract CoreMark Pro data from a CSV file, process it, and return the formatted results. + + :param path: Path to the CSV file containing the benchmark results. + :param system_name: Name of the system being tested. + :param OS_RELEASE: OS release version (e.g., "Ubuntu 20.04"). + :return: Processed results. + """ results = [] processed_data = [] - summary_data = [] - server = read_config("server", "name") - result_dir = read_config("server", "result_dir") # Extract data from file try: if path.endswith(".csv"): with open(path) as file: coremark_pro_results = file.readlines() - summary_data.append([system_name, server + "/results/" + result_dir + "/" + path]) else: return None, None except Exception as exc: custom_logger.debug(str(exc)) - custom_logger.error("Unable to extract data from csv file for coremark_pro") + custom_logger.error("Unable to extract data from csv file for CoreMark Pro") return None, None + data_index = 0 header = [] + + # Parse the CSV data for index, data in enumerate(coremark_pro_results): if "Test:Multi iterations:Single Iterations:Scaling" in data: data_index = index header = data.strip("\n").split(":") else: coremark_pro_results[index] = data.strip("\n").split(":") + coremark_pro_results = [header] + coremark_pro_results[data_index + 1:] - # Format the data - iteration = 1 + # Format the data into the structure we need for row in coremark_pro_results: if "Test" in row: processed_data.append([""]) @@ -165,4 +213,4 @@ def extract_coremark_pro_data(path, system_name, OS_RELEASE): processed_data.append(["Score", row[1], row[2]]) results.append(processed_data) - return results, summary_data + return results From b0660059d04325bc0570946d412645fa7144c3f6 Mon Sep 17 00:00:00 2001 From: sousinha1997 Date: Mon, 6 Jan 2025 09:30:26 +0530 Subject: [PATCH 07/10] passmark reformatting --- quisby/benchmarks/passmark/compare.py | 51 ++++++++--- quisby/benchmarks/passmark/passmark.py | 118 +++++++++++++++++-------- 2 files changed, 122 insertions(+), 47 deletions(-) diff --git a/quisby/benchmarks/passmark/compare.py b/quisby/benchmarks/passmark/compare.py index 494109a..fe9cf25 100644 --- a/quisby/benchmarks/passmark/compare.py +++ b/quisby/benchmarks/passmark/compare.py @@ -1,4 +1,5 @@ from itertools import groupby +import re from quisby import custom_logger from quisby.benchmarks.passmark.graph import graph_passmark_data @@ -6,13 +7,19 @@ append_to_sheet, read_sheet, get_sheet, - create_sheet, clear_sheet_data, clear_sheet_charts, + create_sheet, + clear_sheet_data, + clear_sheet_charts, ) -from quisby.util import merge_lists_alternately,read_config -import re +from quisby.util import merge_lists_alternately, read_config +# Helper function to extract prefix and suffix from instance names def extract_prefix_and_number(input_string): + """ + Extract the prefix and suffix from an instance name that includes a number. + Example: "t2.micro-01" => ("t2.micro", "01") + """ match = re.search(r'^(.*?)(\d+)(.*?)$', input_string) if match: prefix = match.group(1) @@ -21,8 +28,13 @@ def extract_prefix_and_number(input_string): return None, None +# Helper function to compare instance names based on cloud type def compare_inst(item1, item2): + """ + Compare two instance names based on the cloud type. + """ cloud_type = read_config("cloud", "cloud_type") + if cloud_type == "local": return True elif cloud_type == "aws": @@ -33,23 +45,32 @@ def compare_inst(item1, item2): return extract_prefix_and_number(item1) == extract_prefix_and_number(item2) -def compare_passmark_results(spreadsheets, spreadsheetId, test_name, table_name=["System name","Price-perf"]): +# Function to compare PassMark results between two spreadsheets +def compare_passmark_results(spreadsheets, spreadsheetId, test_name, table_name=["System name", "Price-perf"]): + """ + Compare PassMark benchmark data between two Google Sheets. + The data is merged and appended to the target sheet. + """ values = [] results = [] spreadsheet_name = [] + # Read data from each spreadsheet for spreadsheet in spreadsheets: values.append(read_sheet(spreadsheet, range=test_name)) spreadsheet_name.append(get_sheet(spreadsheet, test_name=test_name)["properties"]["title"]) + # Group values into segments (non-empty groups) for index, value in enumerate(values): values[index] = (list(g) for k, g in groupby(value, key=lambda x: x != []) if k) + list_1 = list(values[0]) list_2 = list(values[1]) + # Merge the results by comparing each value and adding to the final results for value in list_1: for ele in list_2: - # Check max throughput + # Compare system name and price-perf if value[0][0] in table_name and ele[0][0] in table_name and value[0][0] == ele[0][0]: if compare_inst(value[1][0], ele[1][0]): results.append([""]) @@ -58,6 +79,8 @@ def compare_passmark_results(spreadsheets, spreadsheetId, test_name, table_name= if item1[0] == item2[0]: results = merge_lists_alternately(results, item1, item2) break + + # Compare cost per hour elif value[0][0] == "Cost/Hr" and ele[0][0] == "Cost/Hr": if compare_inst(value[1][0], ele[1][0]): results.append([""]) @@ -67,6 +90,7 @@ def compare_passmark_results(spreadsheets, spreadsheetId, test_name, table_name= results.append(item1) break + # Compare other matching rows elif value[1][0] == ele[1][0]: if value[0][0] == ele[0][0]: results.append([""]) @@ -75,26 +99,31 @@ def compare_passmark_results(spreadsheets, spreadsheetId, test_name, table_name= results = merge_lists_alternately(results, item1, item2) break + # Create the sheet and append the merged results try: create_sheet(spreadsheetId, test_name) custom_logger.info("Deleting existing charts and data from the sheet...") clear_sheet_charts(spreadsheetId, test_name) clear_sheet_data(spreadsheetId, test_name) - custom_logger.info("Appending new " + test_name + " data to sheet...") + + custom_logger.info(f"Appending new {test_name} data to sheet...") append_to_sheet(spreadsheetId, results, test_name) - #graph_passmark_data(spreadsheetId, test_name, "compare") + # Optionally, create a graph (commented out for now) + # graph_passmark_data(spreadsheetId, test_name, "compare") + except Exception as exc: custom_logger.debug(str(exc)) custom_logger.error("Failed to append data to sheet") return spreadsheetId +# Main execution block if __name__ == "__main__": spreadsheets = [ - "", - "", + "spreadsheet_id_1", # Replace with actual spreadsheet ID + "spreadsheet_id_2", # Replace with actual spreadsheet ID ] test_name = "passmark" - compare_passmark_results(spreadsheets, "", test_name, - table_name=["SYSTEM_NAME"]) \ No newline at end of file + # Compare the PassMark results from two spreadsheets + compare_passmark_results(spreadsheets, "spreadsheet_id_1", test_name, table_name=["SYSTEM_NAME"]) diff --git a/quisby/benchmarks/passmark/passmark.py b/quisby/benchmarks/passmark/passmark.py index 8b49935..b55b87c 100644 --- a/quisby/benchmarks/passmark/passmark.py +++ b/quisby/benchmarks/passmark/passmark.py @@ -1,16 +1,20 @@ +import re from itertools import groupby - from scipy.stats import gmean - from quisby import custom_logger from quisby.util import read_config from quisby.pricing.cloud_pricing import get_cloud_pricing -import re - from quisby.util import process_instance, mk_int def extract_prefix_and_number(input_string): + """ + Extract the prefix, number, and suffix from an instance name. + Example: 't2.micro-01' -> ('t2', 1, '.micro') + + :param input_string: Instance name string (e.g., 't2.micro-01'). + :return: Tuple (prefix, number, suffix) or (None, None, None) if no match. + """ match = re.search(r'^(.*?)(\d+)(.*?)$', input_string) if match: prefix = match.group(1) @@ -21,38 +25,58 @@ def extract_prefix_and_number(input_string): def custom_key(item): + """ + Generate a custom key for sorting/grouping based on the cloud provider type. + + :param item: A tuple containing instance data. + :return: A tuple key for grouping/sorting. + """ cloud_type = read_config("cloud", "cloud_type") - if item[0] == "local": - return item[0] - elif cloud_type == "aws": - instance_type = item[0].split(".")[0] - instance_number = item[0].split(".")[1] - return instance_type, instance_number - elif cloud_type == "gcp": - instance_type = item[0].split("-")[0] - instance_number = int(item[0].split('-')[-1]) - return instance_type, instance_number - elif cloud_type == "azure": - instance_type, instance_number, version= extract_prefix_and_number(item[0]) - return instance_type, version, instance_number + try: + if item[0] == "local": + return item[0] + elif cloud_type == "aws": + instance_type, instance_number = item[0].split(".") + return instance_type, instance_number + elif cloud_type == "gcp": + instance_type, instance_number = item[0].split("-") + return instance_type, int(instance_number) + elif cloud_type == "azure": + instance_type, version, instance_number = extract_prefix_and_number(item[0]) + return instance_type, version, instance_number + except Exception as exc: + custom_logger.error(f"Error in custom_key for {item[0]}: {str(exc)}") + return "", "" def calc_price_performance(inst, avg): + """ + Calculate the price-performance ratio for a given instance. + + :param inst: Instance type or ID. + :param avg: Average performance score (e.g., geometric mean). + :return: Tuple (cost_per_hour, price_performance). + """ region = read_config("cloud", "region") cloud_type = read_config("cloud", "cloud_type") os_type = read_config("test", "os_type") cost_per_hour = None try: - cost_per_hour = get_cloud_pricing( - inst, region, cloud_type.lower(), os_type) - price_perf = float(avg)/float(cost_per_hour) + cost_per_hour = get_cloud_pricing(inst, region, cloud_type.lower(), os_type) + price_perf = float(avg) / float(cost_per_hour) if cost_per_hour else 0 except Exception as exc: custom_logger.debug(str(exc)) - custom_logger.error("Error calculating value !") + custom_logger.error("Error calculating price-performance!") return cost_per_hour, price_perf def group_data(results): + """ + Group benchmark data based on cloud type and instance characteristics. + + :param results: List of benchmark results. + :return: Grouped results. + """ cloud_type = read_config("cloud", "cloud_type") if cloud_type == "aws": return groupby(results, key=lambda x: process_instance(x[1][0], "family", "version", "feature", "machine_type")) @@ -60,12 +84,17 @@ def group_data(results): results = sorted(results, key=lambda x: process_instance(x[1][0], "family", "feature")) return groupby(results, key=lambda x: process_instance(x[1][0], "family", "version", "feature")) elif cloud_type == "gcp": - return groupby(results, key=lambda x: process_instance(x[1][0], "family", "version","sub_family","feature")) + return groupby(results, key=lambda x: process_instance(x[1][0], "family", "version", "sub_family", "feature")) elif cloud_type == "local": return groupby(results, key=lambda x: process_instance(x[1][0], "family")) def sort_data(results): + """ + Sort benchmark data based on instance attributes and cloud type. + + :param results: List of benchmark results. + """ cloud_type = read_config("cloud", "cloud_type") if cloud_type == "aws": results.sort(key=lambda x: str(process_instance(x[1][0], "family"))) @@ -76,17 +105,26 @@ def sort_data(results): def create_summary_passmark_data(data, OS_RELEASE): + """ + Create a summary of PassMark data, including geometric mean and price-performance metrics. + + :param data: List of benchmark data. + :param OS_RELEASE: OS release version (e.g., "Ubuntu 20.04"). + :return: List of summarized results. + """ ret_results = [] results = list(filter(None, data)) sort_data(results) results = group_data(results) + for _, items in results: - mac_data = [["System name", "Geomean-" + OS_RELEASE]] + mac_data = [["System name", f"Geomean-{OS_RELEASE}"]] cost_data = [["Cost/Hr"]] price_perf_data = [["Price-perf", f"Geomean/$-{OS_RELEASE}"]] items = list(items) sorted_data = sorted(items, key=lambda x: mk_int(process_instance(x[1][0], "size"))) - cost_per_hour, price_per_perf = [], [] + + cost_per_hour, price_perf = [], [] # Add summary data for index, row in enumerate(sorted_data): inst = row[1][0] @@ -95,44 +133,50 @@ def create_summary_passmark_data(data, OS_RELEASE): try: gmean_data.append(float(row[i][1].strip())) except Exception as exc: - gmean_data.append(0.0) + gmean_data.append(0.0) # Default to 0.0 for non-numeric values gdata = gmean(gmean_data) try: cph, pp = calc_price_performance(inst, gdata) except Exception as exc: - custom_logger.error(str(exc)) + custom_logger.error(f"Error calculating price performance for {inst}: {str(exc)}") continue mac_data.append([inst, gdata]) cost_data.append([inst, cph]) price_perf_data.append([inst, pp]) + + # Append all data for the current group ret_results.append([""]) ret_results.extend(mac_data) ret_results.append([""]) ret_results.extend(cost_data) ret_results.append([""]) ret_results.extend(price_perf_data) + return ret_results def extract_passmark_data(path, system_name, OS_RELEASE): - """""" + """ + Extract and process PassMark benchmark data from a CSV file. + + :param path: Path to the CSV file containing the benchmark results. + :param system_name: Name of the system being tested. + :param OS_RELEASE: OS release version (e.g., "Ubuntu 20.04"). + :return: Processed results as a list. + """ results = [] - summary_data = [] - server = read_config("server", "name") - result_dir = read_config("server", "result_dir") + # Extract data from file try: if path.endswith("results.csv"): with open(path) as file: passmark_results = file.readlines() - summary_data.append([system_name, server + "/results/" + result_dir + "/" + path]) - else: return None except Exception as exc: - custom_logger.error(str(exc)) - return None, None + custom_logger.error(f"Error reading file {path}: {str(exc)}") + return None data_index = 0 header = [] @@ -142,9 +186,11 @@ def extract_passmark_data(path, system_name, OS_RELEASE): data_index = index else: passmark_results[index] = data.strip("\n").split(":") - passmark_results = [header] + passmark_results[data_index +1 :] + + passmark_results = [header] + passmark_results[data_index + 1:] results.append([""]) results.append([system_name]) results.extend(passmark_results) - return [results], summary_data + + return [results] From 3f49df0ed1a4feaaa5bea7bb188d70855604d5c4 Mon Sep 17 00:00:00 2001 From: sousinha1997 Date: Mon, 6 Jan 2025 09:36:17 +0530 Subject: [PATCH 08/10] pyperf reformatting --- quisby/benchmarks/pyperf/compare.py | 101 +++++++++++++++++++++------- quisby/benchmarks/pyperf/pyperf.py | 7 +- 2 files changed, 79 insertions(+), 29 deletions(-) diff --git a/quisby/benchmarks/pyperf/compare.py b/quisby/benchmarks/pyperf/compare.py index f9ccbde..de937ed 100644 --- a/quisby/benchmarks/pyperf/compare.py +++ b/quisby/benchmarks/pyperf/compare.py @@ -1,3 +1,4 @@ +import re from itertools import groupby from quisby import custom_logger @@ -5,50 +6,97 @@ append_to_sheet, read_sheet, get_sheet, - create_sheet, clear_sheet_data, clear_sheet_charts, + create_sheet, + clear_sheet_data, + clear_sheet_charts, ) -from quisby.util import merge_lists_alternately,read_config -import re +from quisby.util import merge_lists_alternately, read_config +# Helper function to extract prefix and suffix from instance names def extract_prefix_and_number(input_string): + """ + Extract the prefix and suffix from an instance name that contains a number. + Example: "t2.micro-01" => ("t2.micro", "01") + + Args: + input_string (str): Instance name, e.g., "t2.micro-01" + + Returns: + tuple: (prefix, suffix) or (None, None) if no match + """ match = re.search(r'^(.*?)(\d+)(.*?)$', input_string) if match: prefix = match.group(1) - suffix = match.group(3) # Extracts the suffix after the number + suffix = match.group(3) return prefix, suffix return None, None +# Compare two instance types based on cloud configuration def compare_inst(item1, item2): + """ + Compares two instances based on their cloud type. + + Args: + item1 (str): Instance type from the first sheet + item2 (str): Instance type from the second sheet + + Returns: + bool: True if instances match based on cloud type, False otherwise + """ cloud_type = read_config("cloud", "cloud_type") - if cloud_type == "local": - return True - elif cloud_type == "aws": - return item1.split(".")[0] == item2.split(".")[0] - elif cloud_type == "gcp": - return item1.split("-")[0] == item2.split("-")[0] - elif cloud_type == "azure": - return extract_prefix_and_number(item1) == extract_prefix_and_number(item2) + try: + if cloud_type == "local": + return True + elif cloud_type == "aws": + return item1.split(".")[0] == item2.split(".")[0] + elif cloud_type == "gcp": + return item1.split("-")[0] == item2.split("-")[0] + elif cloud_type == "azure": + return extract_prefix_and_number(item1) == extract_prefix_and_number(item2) + except Exception as exc: + custom_logger.error(f"Error comparing instances {item1} and {item2}: {exc}") + return False -def compare_pyperf_results(spreadsheets, spreadsheetId, test_name, table_name=["System name","Price-perf"]): +# Compare the pyperf results from multiple spreadsheets +def compare_pyperf_results(spreadsheets, spreadsheetId, test_name, table_name=["System name", "Price-perf"]): + """ + Compare and merge benchmark results from multiple spreadsheets and append the results to the given sheet. + + Args: + spreadsheets (list): List of spreadsheet IDs to compare + spreadsheetId (str): Spreadsheet ID where the result should be saved + test_name (str): Name of the test (e.g., "pyperf") + table_name (list): List of table names to compare (default: ["System name", "Price-perf"]) + + Returns: + str: The spreadsheet ID if the operation was successful + """ values = [] results = [] - spreadsheet_name = [] + spreadsheet_names = [] - for spreadsheet in spreadsheets: - values.append(read_sheet(spreadsheet, range=test_name)) - spreadsheet_name.append(get_sheet(spreadsheet, test_name=test_name)["properties"]["title"]) + # Read data from all spreadsheets + try: + for spreadsheet in spreadsheets: + values.append(read_sheet(spreadsheet, range=test_name)) + spreadsheet_names.append(get_sheet(spreadsheet, test_name=test_name)["properties"]["title"]) + except Exception as exc: + custom_logger.error(f"Error reading sheets: {exc}") + return spreadsheetId + # Group values by non-empty rows for index, value in enumerate(values): values[index] = (list(g) for k, g in groupby(value, key=lambda x: x != []) if k) list_1 = list(values[0]) list_2 = list(values[1]) + # Compare and merge data from both sheets for value in list_1: for ele in list_2: - # Check max throughput + # Check max throughput or cost/hr and compare if value[0][0] in table_name and ele[0][0] in table_name and value[0][0] == ele[0][0]: if compare_inst(value[1][0], ele[1][0]): results.append([""]) @@ -75,26 +123,29 @@ def compare_pyperf_results(spreadsheets, spreadsheetId, test_name, table_name=[" results = merge_lists_alternately(results, item1, item2) break - + # Write the results back to the sheet try: create_sheet(spreadsheetId, test_name) custom_logger.info("Deleting existing charts and data from the sheet...") clear_sheet_charts(spreadsheetId, test_name) clear_sheet_data(spreadsheetId, test_name) - custom_logger.info("Appending new " + test_name + " data to sheet...") + custom_logger.info(f"Appending new {test_name} data to sheet...") append_to_sheet(spreadsheetId, results, test_name) except Exception as exc: - custom_logger.debug(str(exc)) + custom_logger.debug(f"Error during sheet operations: {exc}") custom_logger.error("Failed to append data to sheet") return spreadsheetId + return spreadsheetId + if __name__ == "__main__": + # List of spreadsheets to compare spreadsheets = [ - "", - "", + "", # Replace with actual spreadsheet IDs + "", # Replace with actual spreadsheet IDs ] test_name = "pyperf" - compare_pyperf_results(spreadsheets, "", test_name, - table_name=["System name"]) \ No newline at end of file + # Compare results and update the sheet + compare_pyperf_results(spreadsheets, "", test_name, table_name=["System name"]) diff --git a/quisby/benchmarks/pyperf/pyperf.py b/quisby/benchmarks/pyperf/pyperf.py index 2f038e6..435a735 100644 --- a/quisby/benchmarks/pyperf/pyperf.py +++ b/quisby/benchmarks/pyperf/pyperf.py @@ -122,15 +122,14 @@ def create_summary_pyperf_data(data, OS_RELEASE): def extract_pyperf_data(path, system_name, OS_RELEASE): """""" results = [] - server = read_config("server", "name") - result_dir = read_config("server", "result_dir") + + # Extract data from file summary_data = [] try: if path: with open(path) as file: pyperf_results = file.readlines() - summary_data.append([system_name, +server+"/results/"+result_dir+"/"+path]) else: return None except Exception as exc: @@ -141,4 +140,4 @@ def extract_pyperf_data(path, system_name, OS_RELEASE): results.append([""]) results.append([system_name]) results.extend(pyperf_results[1:]) - return [results], summary_data \ No newline at end of file + return [results] \ No newline at end of file From ab64474891d8564d3c78f0e32fd453802552e58c Mon Sep 17 00:00:00 2001 From: sousinha1997 Date: Mon, 6 Jan 2025 09:43:26 +0530 Subject: [PATCH 09/10] remove summary creation --- quisby/benchmarks/hammerdb/extract.py | 12 +----------- quisby/benchmarks/phoronix/phoronix.py | 7 ++----- quisby/benchmarks/pig/extract.py | 11 +++-------- quisby/benchmarks/reboot/reboot.py | 11 ++--------- quisby/benchmarks/speccpu/extract.py | 18 ++++++------------ quisby/benchmarks/specjbb/specjbb.py | 7 +------ quisby/benchmarks/streams/streams.py | 6 ++---- quisby/benchmarks/uperf/uperf.py | 8 +------- quisby/example.ini | 4 ---- quisby/pricing/cloud_pricing.py | 9 +++++---- 10 files changed, 23 insertions(+), 70 deletions(-) diff --git a/quisby/benchmarks/hammerdb/extract.py b/quisby/benchmarks/hammerdb/extract.py index d64a4a3..2b57e57 100644 --- a/quisby/benchmarks/hammerdb/extract.py +++ b/quisby/benchmarks/hammerdb/extract.py @@ -1,30 +1,20 @@ -from quisby.util import read_config - def extract_hammerdb_data(path, system_name, test_name, OS_RELEASE): results = [] result_data = [] - summary_data = [] - summary_file = path - server = read_config("server", "name") - result_dir = read_config("server", "result_dir") - data_index = 0 - header_row = [] with open(path) as file: hammerdb_results = file.readlines() for index, line in enumerate(hammerdb_results): if "# connection:TPM" in line: data_index = index - header_row = line.strip("\n").split(":") else: result_data.append(line.strip("\n").split(":")) result_data = result_data[data_index:] - summary_data.append([system_name, server + "/results/" + result_dir + "/" + path]) results.append([""]) results.append([f"{test_name}-User Count", f"{system_name}-{OS_RELEASE}"]) results += result_data - return results, summary_data + return results diff --git a/quisby/benchmarks/phoronix/phoronix.py b/quisby/benchmarks/phoronix/phoronix.py index 1bdcfda..c477ef5 100644 --- a/quisby/benchmarks/phoronix/phoronix.py +++ b/quisby/benchmarks/phoronix/phoronix.py @@ -119,15 +119,12 @@ def create_summary_phoronix_data(data, OS_RELEASE): def extract_phoronix_data(path, system_name, OS_RELEASE): """""" results = [] - summary_data = [] - server = read_config("server", "name") - result_dir = read_config("server", "result_dir") + # Extract data from file try: if path.endswith("results.csv"): with open(path) as file: phoronix_results = file.readlines() - summary_data.append([system_name, server + "/results/" + result_dir + "/" + path]) else: return None except Exception as exc: @@ -146,4 +143,4 @@ def extract_phoronix_data(path, system_name, OS_RELEASE): results.append([""]) results.append([system_name]) results.extend(phoronix_results[1:]) - return [results], summary_data + return [results] diff --git a/quisby/benchmarks/pig/extract.py b/quisby/benchmarks/pig/extract.py index 15e1c6d..8e2576d 100644 --- a/quisby/benchmarks/pig/extract.py +++ b/quisby/benchmarks/pig/extract.py @@ -10,13 +10,9 @@ def extract_pig_data(path, system_name, OS_RELEASE): cpu_count = 0 region = read_config("cloud", "region") cloud_type = read_config("cloud", "cloud_type") - # path = path + f"/iteration_1.{system_name}" - summary_data = [] - summary_file = path - server = read_config("server", "name") - result_dir = read_config("server", "result_dir") + data_index = 0 - header = [] + try: with open(path) as file: pig_results = file.readlines() @@ -30,7 +26,6 @@ def extract_pig_data(path, system_name, OS_RELEASE): except Exception as exc: custom_logger.error(str(exc)) return None - summary_data.append([system_name, server + "/results/" + result_dir + "/" + path]) cpu_count = get_cloud_cpu_count( system_name, region, cloud_type.lower() @@ -41,5 +36,5 @@ def extract_pig_data(path, system_name, OS_RELEASE): results.append(["Threads", "rhel-" + f"{OS_RELEASE}"]) results += result_data - return results, summary_data + return results diff --git a/quisby/benchmarks/reboot/reboot.py b/quisby/benchmarks/reboot/reboot.py index 82be17c..9937717 100644 --- a/quisby/benchmarks/reboot/reboot.py +++ b/quisby/benchmarks/reboot/reboot.py @@ -1,14 +1,9 @@ import re import tarfile -from quisby.util import read_config - def extract_boot_data(path, system_name): results = [] - summary_data = [] - summary_file = path - server = read_config("server", "name") - result_dir = read_config("server", "result_dir") + # system_name = path.split("_")[2] try: with open(path + "/cloud_timings") as file: @@ -19,8 +14,6 @@ def extract_boot_data(path, system_name): except FileNotFoundError: return [] - summary_data.append([system_name, server + "/results/" + result_dir + "/" + path]) - tar = tarfile.open(path + "/boot_info/initial_boot_info.tar") for member in tar.getmembers(): if "initial_boot_info/boot_info" in str(member): @@ -40,4 +33,4 @@ def extract_boot_data(path, system_name): results.append(["System name", "Start Time", "Terminate Time", "Reboot Time"]) results.append([system_name, instance_start_time, terminate_time, reboot_time]) - return results, summary_data + return results diff --git a/quisby/benchmarks/speccpu/extract.py b/quisby/benchmarks/speccpu/extract.py index 04d7143..ac23ba6 100644 --- a/quisby/benchmarks/speccpu/extract.py +++ b/quisby/benchmarks/speccpu/extract.py @@ -7,13 +7,9 @@ def process_speccpu(path, system_name, suite, OS_RELEASE): results = [] - summary_data = [] - server = read_config("server", "name") - result_dir = read_config("server", "result_dir") with open(path) as csv_file: speccpu_results = list(csv.DictReader(csv_file, delimiter=":")) - summary_data.append([system_name, server + "/results/" + result_dir + "/" + path]) results.append([""]) results.append([system_name, suite]) @@ -24,19 +20,17 @@ def process_speccpu(path, system_name, suite, OS_RELEASE): except Exception as exc: custom_logger.debug(str(exc)) pass - return results,summary_data + return results def extract_speccpu_data(path, system_name, OS_RELEASE): results = [] summary_data = [] if "fprate" in path: - fp_results, fp_summary_data= process_speccpu(path, system_name, "fprate", OS_RELEASE) - results +=fp_results - summary_data += fp_summary_data + fp_results = process_speccpu(path, system_name, "fprate", OS_RELEASE) + results += fp_results elif "intrate" in path: - int_results, int_summary_data= process_speccpu(path, system_name, "intrate", OS_RELEASE) - results +=int_results - summary_data +=int_summary_data + int_results = process_speccpu(path, system_name, "intrate", OS_RELEASE) + results += int_results - return results, summary_data + return results diff --git a/quisby/benchmarks/specjbb/specjbb.py b/quisby/benchmarks/specjbb/specjbb.py index da6d329..ab82d4c 100644 --- a/quisby/benchmarks/specjbb/specjbb.py +++ b/quisby/benchmarks/specjbb/specjbb.py @@ -128,16 +128,11 @@ def create_summary_specjbb_data(specjbb_data, OS_RELEASE): def extract_specjbb_data(path, system_name, OS_RELEASE): """""" results = [[""], [system_name]] - summary_data = [] - server = read_config("server", "name") - result_dir = read_config("server", "result_dir") - # File read try: if path.endswith(".csv"): with open(path) as csv_file: specjbb_results = list(csv.DictReader(csv_file, delimiter=":")) - summary_data.append([system_name, server + "/results/" + result_dir + "/" + path]) else: return None except Exception as exc: @@ -151,4 +146,4 @@ def extract_specjbb_data(path, system_name, OS_RELEASE): else: results.append([data_dict["Warehouses"], data_dict["Bops"]]) - return results, summary_data + return results diff --git a/quisby/benchmarks/streams/streams.py b/quisby/benchmarks/streams/streams.py index a03c310..5577720 100644 --- a/quisby/benchmarks/streams/streams.py +++ b/quisby/benchmarks/streams/streams.py @@ -133,8 +133,7 @@ def extract_streams_data(path, system_name, OS_RELEASE): summary_data = [] summary_file = path - server = read_config("server", "name") - result_dir = read_config("server", "result_dir") + if not os.path.isfile(summary_file): return None @@ -142,7 +141,6 @@ def extract_streams_data(path, system_name, OS_RELEASE): with open(path) as file: streams_results = file.readlines() - summary_data.append([system_name, server + "/results/" + result_dir + "/" + path]) data_index = 0 for index, data in enumerate(streams_results): @@ -189,7 +187,7 @@ def extract_streams_data(path, system_name, OS_RELEASE): data_pos = pos - 1 proccessed_data[pos - 5].append(memory + "-" + OS_RELEASE) proccessed_data[data_pos].extend(row[1:]) - return proccessed_data, summary_data + return proccessed_data if __name__ == "__main__": diff --git a/quisby/benchmarks/uperf/uperf.py b/quisby/benchmarks/uperf/uperf.py index 1b207c0..98d8285 100644 --- a/quisby/benchmarks/uperf/uperf.py +++ b/quisby/benchmarks/uperf/uperf.py @@ -137,10 +137,6 @@ def extract_uperf_data(path, system_name): """""" results = [] data_position = {} - summary_data = [] - summary_file = path - server = read_config("server", "name") - result_dir = read_config("server", "result_dir") tests_supported = ["tcp_stream", "tcp_rr"] @@ -155,8 +151,6 @@ def extract_uperf_data(path, system_name): else: return None - summary_data.append([system_name, server + "/results/" + result_dir + "/" + path]) - # find all ports result index in csv row for index, row in enumerate(csv_reader[0]): if "all" in row: @@ -213,6 +207,6 @@ def extract_uperf_data(path, system_name): else: results.append(*items) - return results, summary_data + return results diff --git a/quisby/example.ini b/quisby/example.ini index c877cc5..b8e0fb4 100644 --- a/quisby/example.ini +++ b/quisby/example.ini @@ -28,7 +28,3 @@ filename = quisby.log max_bytes_log_file = 5 backup_count = 3 -[server] -name = https://pbench.app.intlab.redhat.com -result_dir = - diff --git a/quisby/pricing/cloud_pricing.py b/quisby/pricing/cloud_pricing.py index 2db7bbf..d5a6c0d 100644 --- a/quisby/pricing/cloud_pricing.py +++ b/quisby/pricing/cloud_pricing.py @@ -71,18 +71,19 @@ def get_gcp_prices(instance_name, region): prefix = "" gcp_price_list = google_ext_prices["gcp_price_list"] family, model, cpu = instance_name.split("-") - if family.upper() in ("N2", "N2D", "T2D", "T2A", "C2", "C2D", "M1", "M2", "N1", "E2"): + if family.upper() in ("N2", "N2D", "T2D", "T2A", "C2", "C2D", "M1", "M2", "N1", "E2", "C4A", "C3D"): prefix = "CP-COMPUTEENGINE-" + family.upper() + "-PREDEFINED-VM-CORE".strip() else: - custom_logger.error("This machine price is not available") - return + custom_logger.error("Machine price is not available for :" + instance_name) + return None for name, prices in gcp_price_list.items(): if prefix == name: for key, price in prices.items(): if region == key: return gcp_price_list[name][region] * float(cpu) - return 0.0 + custom_logger.error("Machine price is not available for region:" + region) + return None def get_aws_pricing(instance_type, region, os_type): From b67f764651c367baf5859914a235d77687551c38 Mon Sep 17 00:00:00 2001 From: sousinha1997 Date: Mon, 6 Jan 2025 09:44:30 +0530 Subject: [PATCH 10/10] remove summary creation --- quisby.py | 72 +++++++++--------------------------- quisby/benchmarks/fio/fio.py | 7 ++-- 2 files changed, 20 insertions(+), 59 deletions(-) diff --git a/quisby.py b/quisby.py index b4b34bb..5d3457c 100644 --- a/quisby.py +++ b/quisby.py @@ -203,7 +203,6 @@ def data_handler(proc_list, noti_flag, exclude_list): print(line, end="") with open(results_path) as file: - summary_result = [] custom_logger.info("Reading data files path provided in file : " + results_path) test_result_path = file.readlines() flag = False @@ -212,14 +211,9 @@ def data_handler(proc_list, noti_flag, exclude_list): if "test " in data: flag = False if results: - summary_result = [[""],[test_name]]+summary_result - #TODO Check better way to add this information - append_to_sheet(spreadsheetid, summary_result, "summary") - spreadsheetid = process_results(results, test_name, cloud_type, os_type, os_release, - spreadsheet_name, spreadsheetid) + spreadsheetid = process_results(results, test_name, cloud_type, os_type, os_release, spreadsheet_name, spreadsheetid) results = [] test_name = data.replace("test ", "").strip() - summary_result = [] source = "results" if test_name in proc_list or proc_list == [] and test_name not in exclude_list: flag = True @@ -241,107 +235,77 @@ def data_handler(proc_list, noti_flag, exclude_list): path = test_path + "/" + path.strip() custom_logger.debug(path) if test_name == "streams" and flag == True: - ret_val, summary_data = extract_streams_data(path, system_name, os_release) + ret_val = extract_streams_data(path, system_name, os_release) if ret_val: results += ret_val - if summary_data: - summary_result +=summary_data elif test_name == "uperf" and flag == True: - ret_val, summary_data = extract_uperf_data(path, system_name) + ret_val = extract_uperf_data(path, system_name) if ret_val: results += ret_val - if summary_data: - summary_result +=summary_data elif test_name == "linpack" and flag == True: - ret_val, summary_data = extract_linpack_data(path, system_name) + ret_val = extract_linpack_data(path, system_name) if ret_val: results += ret_val - if summary_data: - summary_result +=summary_data elif test_name == "specjbb" and flag == True: - ret_value, summary_data = extract_specjbb_data(path, system_name, os_release) + ret_value = extract_specjbb_data(path, system_name, os_release) if ret_value is not None: results.append(ret_value) - if summary_data: - summary_result +=summary_data elif test_name == "pig" and flag == True: - ret_val, summary_data = extract_pig_data(path, system_name, os_release) + ret_val = extract_pig_data(path, system_name, os_release) if ret_val: results += ret_val - if summary_data: - summary_result +=summary_data elif check_test_is_hammerdb(test_name) and flag == True: - ret_val, summary_data = extract_hammerdb_data(path, system_name, test_name, os_release) + ret_val = extract_hammerdb_data(path, system_name, test_name, os_release) if ret_val: results += ret_val - if summary_data: - summary_result +=summary_data elif test_name == "fio_run" and flag == True: ret_val = None if source == "results": - ret_val, summary_data = extract_fio_run_data(path, system_name, os_release) + ret_val = extract_fio_run_data(path, system_name, os_release) elif source == "pbench": - ret_val, summary_data = process_fio_run_result(path, system_name) + ret_val = process_fio_run_result(path, system_name) if ret_val: results += ret_val - if summary_data: - summary_result +=summary_data elif test_name == "boot" and flag == True: - ret_val, summary_data = extract_boot_data(path, system_name) + ret_val = extract_boot_data(path, system_name) if ret_val: results += ret_val - if summary_data: - summary_result +=summary_data elif test_name == "aim" and flag == True: ret_val = extract_aim_data(path, system_name) if ret_val: results += ret_val elif test_name == "auto_hpl" and flag == True: - ret_val, summary_data = extract_auto_hpl_data(path, system_name) + ret_val= extract_auto_hpl_data(path, system_name) if ret_val: results += ret_val - if summary_data: - summary_result +=summary_data elif test_name == "speccpu" and flag == True: - ret_val, summary_data = extract_speccpu_data(path, system_name, os_release) + ret_val = extract_speccpu_data(path, system_name, os_release) if ret_val: results += ret_val - if summary_data: - summary_result +=summary_data elif test_name == "etcd" and flag == True: ret_val = extract_etcd_data(path, system_name) if ret_val: results += ret_val elif test_name == "coremark" and flag == True: - ret_val, summary_data = extract_coremark_data(path, system_name, os_release) + ret_val = extract_coremark_data(path, system_name, os_release) if ret_val: results += ret_val - if summary_data: - summary_result +=summary_data elif test_name == "coremark_pro" and flag == True: - ret_val, summary_data = extract_coremark_pro_data(path, system_name, os_release) + ret_val = extract_coremark_pro_data(path, system_name, os_release) if ret_val: results += ret_val - if summary_data: - summary_result +=summary_data elif test_name == "passmark" and flag == True: - ret_val, summary_data = extract_passmark_data(path, system_name, os_release) + ret_val = extract_passmark_data(path, system_name, os_release) if ret_val: results += ret_val - if summary_data: - summary_result += summary_data elif test_name == "pyperf" and flag == True: - ret_val, summary_data = extract_pyperf_data(path, system_name, os_release) + ret_val = extract_pyperf_data(path, system_name, os_release) if ret_val: results += ret_val - if summary_data: - summary_result +=summary_data elif test_name == "phoronix" and flag == True: - ret_val, summary_data= extract_phoronix_data(path, system_name, os_release) + ret_val= extract_phoronix_data(path, system_name, os_release) if ret_val: results += ret_val - if summary_data: - summary_result +=summary_data else: if flag == False: pass @@ -355,10 +319,8 @@ def data_handler(proc_list, noti_flag, exclude_list): register_details_json(spreadsheet_name, spreadsheetid) else: try: - append_to_sheet(spreadsheetid, summary_result, "summary") spreadsheetid = process_results(results, test_name, cloud_type, os_type, os_release, spreadsheet_name, spreadsheetid) - except Exception as exc: custom_logger.error(str(exc)) pass diff --git a/quisby/benchmarks/fio/fio.py b/quisby/benchmarks/fio/fio.py index 2167220..a010e1b 100644 --- a/quisby/benchmarks/fio/fio.py +++ b/quisby/benchmarks/fio/fio.py @@ -116,16 +116,15 @@ def extract_fio_run_data(path, system_name, OS_RELEASE): results = [] summary_data = [] summary_file = path - server = read_config("server", "name") - result_dir = read_config("server", "result_dir") + + try: with open(path + "/result.csv") as csv_file: csv_data = csv_file.readlines() csv_data[-1] = csv_data[-1].strip() results += extract_csv_data(csv_data, os.path.basename(path)) - summary_data.append([system_name, server + "/results/" + result_dir + "/" + path]) - return group_data(results, system_name, OS_RELEASE), summary_data + return group_data(results, system_name, OS_RELEASE) except Exception as exc: custom_logger.error("Unable to find fio path") custom_logger.error(str(exc))